te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>javascript - Regex only capturing last instance of capture group in match - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - Regex only capturing last instance of capture group in match - Stack Overflow

programmeradmin4浏览0评论

I have the following regular expression in two different languages that produces the same odd results (javaScript and Flash). What I want to know is not how to fix it, but why the behavior is occurring?

The Regular Expression:

\[(\\{2}|\\\]|[^\]])*\]

The goal here is to match a bracketed string, and ensure that I don't stop at an escaped bracket.

If I have the text input [abcdefg] it is correctly matched, but the only thing returned as part of the capture group is g, where as I expect abcdefg. If I change the expression to \[((?:\\{2}|\\\]|[^\]])*)\], then I get the result that I want.

So why is this happening? Will this be consistent across other languages?

note: Simplifing the expression to \[([^\]])*\] produces the same issue.

I have the following regular expression in two different languages that produces the same odd results (javaScript and Flash). What I want to know is not how to fix it, but why the behavior is occurring?

The Regular Expression:

\[(\\{2}|\\\]|[^\]])*\]

The goal here is to match a bracketed string, and ensure that I don't stop at an escaped bracket.

If I have the text input [abcdefg] it is correctly matched, but the only thing returned as part of the capture group is g, where as I expect abcdefg. If I change the expression to \[((?:\\{2}|\\\]|[^\]])*)\], then I get the result that I want.

So why is this happening? Will this be consistent across other languages?

note: Simplifing the expression to \[([^\]])*\] produces the same issue.

Share Improve this question edited Aug 17, 2013 at 14:05 Daniel Gimenez asked Jun 30, 2013 at 18:45 Daniel GimenezDaniel Gimenez 20.6k4 gold badges57 silver badges76 bronze badges
Add a ment  | 

2 Answers 2

Reset to default 10

Regardless of the problem, ActionScript and JavaScript should always yield the same results, as they both implement ECMAScript (or a superset thereof, but for regular expressions they should not disagree).

But yes, this will be happening in any language (or rather any regex flavor). The reason is that you are repeating the capturing group. Let's take a simpler example: match (.)* against abc. So what we are repeating is (.). The first time it is tried, the engine enters the group, matches a with ., leaves the group and captures a. Only now does the quantifier kick in and it repeats the whole thing. So we enter the group again, and match and capture b. This capture overwrites the previous one, hence \1 does now contain b. Same again for the third repetition: the capture will be overwritten with with c.

I don't know of a regex flavor that behaves differently, and the only one that lets you access all previous captures (instead of just overwriting them) is .NET.

The solution is the one p.s.w.g. proposed. Make the grouping you need for the repetition non-capturing (this will improve performance, because you don't need all that capturing and overwriting anyway) and wrap the whole thing in a new group. Your expression has one little flaw though: you need to include include the backslash in the negated character class. Otherwise, backtracking could give you a match in [abc\]. So here is an expression that will work as you expect:

\[((?:\\{2}|\\\]|[^\]\\])*)\]

Working demo. (unfortunately, it doesn't show the captures, but it shows that it gives correct matches in all cases)

Note that your expression does not allow for other escape sequences. In particular a single \, followed by anything but a ] will cause your pattern to fail. If this is not what you desire, you can just use:

\[((?:\\.|[^\]\\])*)\]

Working demo.

Performance can further be improved with the "unrolling-the-loop" technique:

\[([^\]\\]*(?:\\.[^\]\\]*)*)\]

Working demo.

Try including the * quantifier inside the capture group, like this:

\[((?:\\{2}|\\\]|[^\]])*)\]
发布评论

评论列表(0)

  1. 暂无评论