I have been trying to port invRegex.py to a node.js implementation for a while, but I'm still struggling with it. I already have the regular expression parse tree thanks to the ret.js tokenizer and it works pretty well, but the actual generation and concatenation of all the distinct elements in a way that is memory-efficient is revealing very challenging to me. To keep it simple, lets say I have the following regex:
[01]{1,2}@[a-f]
Feeding that to invRegex.py
produces the following output (tabbified to take less space):
0@a 0@b 0@c 0@d 0@e 0@f
00@a 00@b 00@c 00@d 00@e 00@f
01@a 01@b 01@c 01@d 01@e 01@f
1@a 1@b 1@c 1@d 1@e 1@f
10@a 10@b 10@c 10@d 10@e 10@f
11@a 11@b 11@c 11@d 11@e 11@f
Considering I'm able to get each individual token and produce an array of all the valid individual outputs:
[01]{1,2} = function () {
return ['0', '00', '01', '1', '10', '11'];
};
@ = function () {
return ['@'];
};
[a-f] = function () {
return ['a', 'b', 'c', 'd', 'e', 'f'];
};
I can pute the cartesian product of all the arrays and get the same expected output:
var _ = require('underscore');
function cartesianProductOf() {
return _.reduce(arguments, function(a, b) {
return _.flatten(_.map(a, function(x) {
return _.map(b, function(y) {
return x.concat([y]);
});
}), true);
}, [ [] ]);
};
var tokens = [
['0', '00', '01', '1', '10', '11'],
['@'],
['a', 'b', 'c', 'd', 'e', 'f'],
];
var result = cartesianProductOf(tokens[0], tokens[1], tokens[2]);
_.each(result, function (value, key) {
console.log(value.join(''));
});
The problem with this is that it holds all the 36 values in memory, if I had a slightly more plicated regular expression, such as [a-z]{0,10}
it would hold 146813779479511
values in memory, which is totally unfeasible. I would like to process this huge list in an asynchronous fashion, passing each generated bination to a callback and allowing me to interrupt the process at any sensible point I see fit, much like invRegex.py or this Haskell package - unfortunately I can't understand Haskell and I don't know how to mimic the generator behavior in Python to Javascript either.
I tried running a couple of simple generator experiments in node 0.11.9 (with --harmony
) like this one:
function* alpha() {
yield 'a'; yield 'b'; yield 'c';
}
function* numeric() {
yield '0'; yield '1';
}
function* alphanumeric() {
yield* alpha() + numeric(); // what's the diff between yield and yield*?
}
for (var i of alphanumeric()) {
console.log(i);
}
Needless to say the above doesn't work. =/
Banging my head against the wall here, so any help tackling this problem would be highly appreciated.
UPDATE: Here is a sample ret.js parse tree for b[a-z]{3}
:
{
"type": ret.types.ROOT,
"stack": [
{
"type": ret.types.CHAR,
"value": 98 // b
},
{
"type": ret.types.REPETITION,
"max": 3,
"min": 3,
"value": {
"type": ret.types.SET,
"not": false,
"set": [
{
"type": ret.types.RANGE,
"from": 97, // a
"to": 122 // z
}
]
}
}
]
]
}
The SET
/ RANGE
type should yield 26 distinct values, and the parent REPETITION
type should take that previous value to the power of 3, yielding 17576 distinct binations. If I was to generate a flattened out tokens
array like I did before for cartesianProductOf
, the intermediate flattened values would take as much space as the actual cartesian product itself.
I hope this example explains better the problem I am facing.
I have been trying to port invRegex.py to a node.js implementation for a while, but I'm still struggling with it. I already have the regular expression parse tree thanks to the ret.js tokenizer and it works pretty well, but the actual generation and concatenation of all the distinct elements in a way that is memory-efficient is revealing very challenging to me. To keep it simple, lets say I have the following regex:
[01]{1,2}@[a-f]
Feeding that to invRegex.py
produces the following output (tabbified to take less space):
0@a 0@b 0@c 0@d 0@e 0@f
00@a 00@b 00@c 00@d 00@e 00@f
01@a 01@b 01@c 01@d 01@e 01@f
1@a 1@b 1@c 1@d 1@e 1@f
10@a 10@b 10@c 10@d 10@e 10@f
11@a 11@b 11@c 11@d 11@e 11@f
Considering I'm able to get each individual token and produce an array of all the valid individual outputs:
[01]{1,2} = function () {
return ['0', '00', '01', '1', '10', '11'];
};
@ = function () {
return ['@'];
};
[a-f] = function () {
return ['a', 'b', 'c', 'd', 'e', 'f'];
};
I can pute the cartesian product of all the arrays and get the same expected output:
var _ = require('underscore');
function cartesianProductOf() {
return _.reduce(arguments, function(a, b) {
return _.flatten(_.map(a, function(x) {
return _.map(b, function(y) {
return x.concat([y]);
});
}), true);
}, [ [] ]);
};
var tokens = [
['0', '00', '01', '1', '10', '11'],
['@'],
['a', 'b', 'c', 'd', 'e', 'f'],
];
var result = cartesianProductOf(tokens[0], tokens[1], tokens[2]);
_.each(result, function (value, key) {
console.log(value.join(''));
});
The problem with this is that it holds all the 36 values in memory, if I had a slightly more plicated regular expression, such as [a-z]{0,10}
it would hold 146813779479511
values in memory, which is totally unfeasible. I would like to process this huge list in an asynchronous fashion, passing each generated bination to a callback and allowing me to interrupt the process at any sensible point I see fit, much like invRegex.py or this Haskell package - unfortunately I can't understand Haskell and I don't know how to mimic the generator behavior in Python to Javascript either.
I tried running a couple of simple generator experiments in node 0.11.9 (with --harmony
) like this one:
function* alpha() {
yield 'a'; yield 'b'; yield 'c';
}
function* numeric() {
yield '0'; yield '1';
}
function* alphanumeric() {
yield* alpha() + numeric(); // what's the diff between yield and yield*?
}
for (var i of alphanumeric()) {
console.log(i);
}
Needless to say the above doesn't work. =/
Banging my head against the wall here, so any help tackling this problem would be highly appreciated.
UPDATE: Here is a sample ret.js parse tree for b[a-z]{3}
:
{
"type": ret.types.ROOT,
"stack": [
{
"type": ret.types.CHAR,
"value": 98 // b
},
{
"type": ret.types.REPETITION,
"max": 3,
"min": 3,
"value": {
"type": ret.types.SET,
"not": false,
"set": [
{
"type": ret.types.RANGE,
"from": 97, // a
"to": 122 // z
}
]
}
}
]
]
}
The SET
/ RANGE
type should yield 26 distinct values, and the parent REPETITION
type should take that previous value to the power of 3, yielding 17576 distinct binations. If I was to generate a flattened out tokens
array like I did before for cartesianProductOf
, the intermediate flattened values would take as much space as the actual cartesian product itself.
I hope this example explains better the problem I am facing.
Share Improve this question edited May 23, 2017 at 11:52 CommunityBot 11 silver badge asked Dec 28, 2013 at 13:47 Alix AxelAlix Axel 155k99 gold badges402 silver badges505 bronze badges 11- If it helps to understand the ret.js parse tree structure, I've coded a recursive function that calculates the number of valid return values. – Alix Axel Commented Dec 28, 2013 at 14:27
-
yield*
is like Python'syield from
. Also "I don't know how to mimic the generator behavior in Python to Javascript either." what behavior specifically? – Benjamin Gruenbaum Commented Dec 28, 2013 at 20:00 -
@BenjaminGruenbaum: It's still not very clear to me what
yield from
is exactly but from what I understood it's a way for a generator to pipe it's state methods to the inner iterators / generators; is this right? Tried it in Node with the example above and it throws an error, I suspect it's because the concatenation operator doesn't make sense there but I'm not sure. With plainyield
theconsole.log
outputs a single[object Generator][object Generator]
string and not the actual values. – Alix Axel Commented Dec 29, 2013 at 0:10 -
@BenjaminGruenbaum: As for the behavior in Python, well, basically concatenating a single generator value with all the remaining generator values (without terminating prematurely any generator in the process). The Python code starts at
GroupEmitter.groupGen()
as a generator itself, but it also seems that this generator is creating / returning other generators inside it. I don't know how to do that - I can't even get the two generators above (alpha
&numeric
) that have the same number of generateable elements to return all 9 possible binations. – Alix Axel Commented Dec 29, 2013 at 0:15 -
1
Or, you could use parentheses to clarify the
yield*
bind:yield (yield* alpha()) + (yield* numeric());
– bishop Commented Dec 31, 2013 at 16:45
6 Answers
Reset to default 6 +1000I advise you to write iterator classes. They are easy to implement (basically they are state machines), they have a low memory footprint, they can be bined to build up increasingly plex expressions (please scroll down to see the final result), and the resulting iterator can be wrapped in an enumerator.
Each iterator class has the following methods:
- first: initializes the state machine (first match)
- next: proceeds to the next state (next match)
- ok: initially true, but bees false once 'next' proceeds beyond the last match
- get: returns the current match (as a string)
- clone: clones the object; essential for repetition, because each instance needs its own state
Start off with the most trivial case: a sequence of one or more characters that should be matched literally (e.g. /foo/). Needless to say this has only one match, so 'ok' will bee false upon the first call of 'next'.
function Literal(literal) { this.literal = literal; }
Literal.prototype.first = function() { this.i = 0; };
Literal.prototype.next = function() { this.i++; };
Literal.prototype.ok = function() { return this.i == 0; };
Literal.prototype.get = function() { return this.literal; };
Literal.prototype.clone = function() { return new Literal(this.literal); };
Character classes ([abc]) are trivial too. The constructor accepts a string of characters; if you prefer arrays, that's easy to fix.
function CharacterClass(chars) { this.chars = chars; }
CharacterClass.prototype.first = function() { this.i = 0; };
CharacterClass.prototype.next = function() { this.i++; };
CharacterClass.prototype.ok = function() { return this.i < this.chars.length; };
CharacterClass.prototype.get = function() { return this.chars.charAt(this.i); };
CharacterClass.prototype.clone = function() { return new CharacterClass(this.chars); };
Now we need iterators that bine other iterators to form more plex regular expressions. A sequence is just two or more patterns in a row (like foo[abc]).
function Sequence(iterators) {
if (arguments.length > 0) {
this.iterators = iterators.length ? iterators : [new Literal('')];
}
}
Sequence.prototype.first = function() {
for (var i in this.iterators) this.iterators[i].first();
};
Sequence.prototype.next = function() {
if (this.ok()) {
var i = this.iterators.length;
while (this.iterators[--i].next(), i > 0 && !this.iterators[i].ok()) {
this.iterators[i].first();
}
}
};
Sequence.prototype.ok = function() {
return this.iterators[0].ok();
};
Sequence.prototype.get = function() {
var retval = '';
for (var i in this.iterators) {
retval += this.iterators[i].get();
}
return retval;
};
Sequence.prototype.clone = function() {
return new Sequence(this.iterators.map(function(it) { return it.clone(); }));
};
Another way to bine iterators is the choice (a.k.a. alternatives), e.g. foo|bar.
function Choice(iterators) { this.iterators = iterators; }
Choice.prototype.first = function() {
this.count = 0;
for (var i in this.iterators) this.iterators[i].first();
};
Choice.prototype.next = function() {
if (this.ok()) {
this.iterators[this.count].next();
while (this.ok() && !this.iterators[this.count].ok()) this.count++;
}
};
Choice.prototype.ok = function() {
return this.count < this.iterators.length;
};
Choice.prototype.get = function() {
return this.iterators[this.count].get();
};
Choice.prototype.clone = function() {
return new Choice(this.iterators.map(function(it) { return it.clone(); }));
};
Other regex features can be implemented by bining the existing classes. Class inheritance is a great way to do this. For example, an optional pattern (x?) is just a choice between the empty string and x.
function Optional(iterator) {
if (arguments.length > 0) {
Choice.call(this, [new Literal(''), iterator]);
}
}
Optional.prototype = new Choice();
Repetition (x{n,m}) is a bination of Sequence and Optional. Because I have to inherit one or the other, my implementation consists of two mutually dependent classes.
function RepeatFromZero(maxTimes, iterator) {
if (arguments.length > 0) {
Optional.call(this, new Repeat(1, maxTimes, iterator));
}
}
RepeatFromZero.prototype = new Optional();
function Repeat(minTimes, maxTimes, iterator) {
if (arguments.length > 0) {
var sequence = [];
for (var i = 0; i < minTimes; i++) {
sequence.push(iterator.clone()); // need to clone the iterator
}
if (minTimes < maxTimes) {
sequence.push(new RepeatFromZero(maxTimes - minTimes, iterator));
}
Sequence.call(this, sequence);
}
}
Repeat.prototype = new Sequence();
As I said earlier, an iterator can be wrapped into an enumerator. This is simply a loop that you can break whenever you want.
function Enumerator(iterator) {
this.iterator = iterator;
this.each = function(callback) {
for (this.iterator.first(); this.iterator.ok(); this.iterator.next()) {
callback(this.iterator.get());
}
};
}
Time to put it all together. Let's take some silly regular expression:
([ab]{2}){1,2}|[cd](f|ef{0,2}e)
Composing the iterator object is really straightforward:
function GetIterationsAsHtml() {
var iterator = new Choice([
new Repeat(1, 2,
new Repeat(2, 2, new CharacterClass('ab'))),
new Sequence([
new CharacterClass('cd'),
new Choice([
new Literal('f'),
new Sequence([
new Literal('e'),
new RepeatFromZero(2, new Literal('f')),
new Literal('e')
])
])
])
]);
var iterations = '<ol>\n';
var enumerator = new Enumerator(iterator);
enumerator.each(function(iteration) { iterations += '<li>' + iteration + '</li>\n'; });
return iterations + '</ol>';
}
This yields 28 matches, but I will spare you the output.
My apologies if my code is not pliant to software patterns, is not browser-patible (works OK on Chrome and Firefox) or suffers from poor OOP. I just hope it makes the concept clear.
EDIT: for pleteness, and following OP's initiative, I implemented one more iterator class: the reference.
A reference (\1 \2 etc) picks up the current match of an earlier capturing group (i.e. anything in parentheses). Its implementation is very similar to Literal, in that it has exactly one match.
function Reference(iterator) { this.iterator = iterator; }
Reference.prototype.first = function() { this.i = 0; };
Reference.prototype.next = function() { this.i++; };
Reference.prototype.ok = function() { return this.i == 0; };
Reference.prototype.get = function() { return this.iterator.get(); };
Reference.prototype.clone = function() { return new Reference(this.iterator); };
The constructor is given an iterator that represents the referenced subpattern. Taking (foo|bar)([xy])\2\1
as an example (yields fooxxfoo, fooyyfoo, barxxbar, baryybar):
var groups = new Array();
var iterator = new Sequence([
groups[1] = new Choice([new Literal('foo'), new Literal('bar')]),
groups[2] = new CharacterClass('xy'),
new Reference(groups[2]),
new Reference(groups[1])
]);
Capturing groups are specified as you build up the tree of iterator classes. I am still doing that manually here, but eventually you want this to be automated. That is just a matter of mapping your parse tree to a similar tree of iterator classes.
EDIT 2: here's a relatively simple recursive function that will convert a parse tree produced by ret.js into an iterator.
function ParseTreeMapper() {
this.capturingGroups = [];
}
ParseTreeMapper.prototype.mapToIterator = function(parseTree) {
switch (parseTree.type) {
case ret.types.ROOT:
case ret.types.GROUP:
var me = this;
var mapToSequence = function(parseTrees) {
return new Sequence(parseTrees.map(function(t) {
return me.mapToIterator(t);
}));
};
var group = parseTree.options ?
new Choice(parseTree.options.map(mapToSequence)) :
mapToSequence(parseTree.stack);
if (parseTree.remember) {
this.capturingGroups.push(group);
}
return group;
case ret.types.SET:
return new CharacterClass(this.mapToCharacterClass(parseTree.set));
case ret.types.REPETITION:
return new Repeat(parseInt(parseTree.min), parseInt(parseTree.max), this.mapToIterator(parseTree.value));
case ret.types.REFERENCE:
var ref = parseInt(parseTree.value) - 1;
return ref in this.capturingGroups ?
new Reference(this.capturingGroups[ref]) :
new Literal('<ReferenceOutOfRange>');
case ret.types.CHAR:
return new Literal(String.fromCharCode(parseTree.value));
default:
return new Literal('<UnsupportedType>');
}
};
ParseTreeMapper.prototype.mapToCharacterClass = function(parseTrees) {
var chars = '';
for (var i in parseTrees) {
var tree = parseTrees[i];
switch (tree.type) {
case ret.types.CHAR:
chars += String.fromCharCode(tree.value);
break;
case ret.types.RANGE:
for (var code = tree.from; code <= tree.to; code++) {
chars += String.fromCharCode(code);
}
break;
}
}
return chars;
};
Usage:
var regex = 'b[a-n]{3}';
var parseTree = ret(regex); // requires ret.js
var iterator = new ParseTreeMapper().mapToIterator(parseTree);
I put all ponents together in this demo: http://jsfiddle/Pmnwk/3/
Note: many regex syntax constructs are not supported (anchors, look-ahead, look-behind, recursion), but I guess it is already pretty much up to par with invRegex.py.
Here's a version that makes a function for each part of the input and poses all of them to produce a function that'll generate each regex result and feed it into that argument:
//Takes in a list of things, returns a function that takes a function and applies it to
// each Cartesian product. then poses all of the functions to create an
// inverse regex generator.
function CartesianProductOf() {
var args = arguments;
return function(callback) {
Array.prototype.map.call(args, function(vals) {
return function(c, val) {
vals.forEach(function(v) {
c(val + v);
});
};
}).reduce(function(prev, cur) {
return function(c, val) {
prev(function(v){cur(c, v)}, val);
};
})(callback, "");
};
}
Modified to work off a parse tree (copied a litte code from here):
//Takes in a list of things, returns a function that takes a function and applies it to
// each Cartesian product.
function CartesianProductOf(tree) {
var args = (tree.type == ret.types.ROOT)? tree.stack :
((tree.type == ret.types.SET)? tree.set : []);
return function(callback) {
var funs = args.map(function(vals) {
switch(vals.type) {
case ret.types.CHAR:
return function(c, val) {
c(val + vals.value);
};
case ret.types.RANGE:
return function(c, val) {
for(var i=vals.from; i<=vals.to; i++) {
c(val+String.fromCharCode(i));
}
};
case ret.types.SET:
return function(c, val) {
CartesianProductOf(vals)(function(i) {c(val+i)});
};
/* return function(c, val) {
vals.set.forEach(function(v) {
c(val + v);
});
}; */
case ret.types.REPETITION:
var tmp = CartesianProductOf(vals.value);
if(vals.max == vals.min) {
return fillArray(function(c, val) {
tmp(function(i){c(val+i);}); //Probably works?
}, vals.max);
} else {
return fillArray(function(c, val) {
tmp(function(i){c(val+i);});
}, vals.min).concat(fillArray(function(c, val) {
c(val);
tmp(function(i){c(val+i);});
}, vals.max-vals.min));
}
default:
return function(c, val) {
c(val);
};
}
}).reduce(function(prev, cur) { //Flatten array.
return prev.concat(cur);
}, []);
if(tree.type == rets.type.ROOT) //If it's a full tree bine all the functions.
funs.reduce(function(prev, cur) { //Compose!
return function(c, val) {
prev(function(v){cur(c, v)}, val);
};
})(callback, "");
else //If it's a set call each function.
funs.forEach(function(f) {f(callback, "")});
};
}
function fillArray(value, len) {
var arr = [];
for (var i = 0; i < len; i++) {
arr.push(value);
}
return arr;
}
If you're alright with a less functionalish, more C-esque solution:
function helper(callme, cur, stuff, pos) {
if(pos == stuff.length) {
callme(cur);
} else
for(var i=0; i<stuff[pos].length; i++) {
helper(callme, cur+stuff[pos][i], stuff, pos+1);
}
}
function CartesianProductOf(callback) {
helper(callback, "", Array.prototype.slice.call(arguments, 1), 0);
}
How about this:
var tokens = [
['0', '00', '01', '1', '10', '11'],
['@'],
['a', 'b', 'c', 'd', 'e', 'f'],
];
function cartesianProductOf(arr, callback) {
var cur = [];
var f = function(i) {
if (i >= arr.length) {
callback(cur.join(''));
return
}
for (var j=0; j<arr[i].length; j++) {
cur[i] = arr[i][j];
f(i+1);
}
};
f(0);
}
cartesianProductOf(tokens, function(str) { console.log(str); });
It sounds like you're asking for Lazy Cartesian Product: you do want the Cartesian product, but you don't want to pute them all beforehand (and consume all that memory). Said another way, you want to iterate through the Cartesian product.
If that's right, have you checked out this Javascript implementation of the X(n) formula? With that, you can either iterate over them in natural order <<0,0,0>, <0,0,1>, <0,1,0>, ...> or choose an arbitrary position to calculate.
It seems like you can just do:
// move through them in natural order, without preputation
lazyProduct(tokens, function (token) { console.log(token); });
// or...
// pick ones out at random
var LP = new LazyProduct(tokens);
console.log(LP.item(7)); // the 7th from the list, without prepute
console.log(LP.item(242)); // the 242nd from the list, again without prepute
Surely I must be missing something...? Generators are simply overkill given the X(n) formula.
Update
Into a JSFiddle I have placed an instrumented version of the lazyProduct
code, a sample tokens array of arrays, and a call to lazyProduct
with those tokens
.
When you run the code without modification, you'll see it generates the 0@a
, etc. output expected from the sample tokens
array. I think the link explains the logic pretty well, but in summary... if you unment the instrumentation in lazyProduct
, you'll notice that there are two key variables lens
and p
. lens
is a pre-pute of the length of each array (in the array of arrays) passed in. p
is a stack that holds the current path up to where you are (eg, if you're "1st array 3rd index, 2nd array 2nd index, and 3rd array 1st index" p
represents that), and this is what's passed into your callback function.
My callback function just does a join on the arguments (per your OP example), but again these are just the corresponding values in p mapped to the original array of arrays.
If you profile this further, you'll see the footprint needed to build the Cartesian product is limited to just what you need to call your callback function. Try it on one of your worst case tokens to see.
Update 2
I coded about 75% of an approach based on Cartesian product. My API took a ret.js parse tree, converted that to RPN, then generated sets of sets to pass into a X(n) calculator. Using @ruud example of ([ab]{2}){1,2}|[cd](f|ef{0,2}e)
, this would be generated:
new Option([
new Set([
new Set(new Set(['a','b']), new Set['a','b'])),
new Set(new Set(['a','b']), new Set['a','b']))
]),
new Set(
['c', 'd'],
new Option([
new Set(['f']),
new Set(['e']]),
new Set(['e','f']),
new Set(new Set(['e','f']), new Set(['e', 'f']))
])
])
The tricky parts were nested options (buggy) and inverse character classes & back-references (unsupported).
This approach was getting brittle, and really the Iterator solution is superior. Converting from your parse tree into that should be pretty straightforward. Thanks for the interesting problem!
There already are plenty of good answers here, but I specifically wanted the generator part to work, which did not for you. It seems that you were trying to do this :
//the alphanumeric part
for (x of alpha()) for (y of numeric()) console.log(x + y);
//or as generator itself like you wanted
function* alphanumeric() {
for (x of alpha()) for (y of numeric()) yield(x + y);
}
//iterating over it
for (var i of alphanumeric()) {
console.log(i);
}
Output:
a0
a1
b0
b1
c0
c1
You can use this for cartesian product required in regex matching.
Just want to share what I came up with, using generators and based off invRegex.py
:
var ret = require('ret');
var tokens = ret('([ab]) ([cd]) \\1 \\2 z');
var references = [];
capture(tokens);
// console.log(references);
for (string of generate(tokens)) {
console.log(string);
}
function capture(token) {
if (Array.isArray(token)) {
for (var i = 0; i < token.length; ++i) {
capture(token[i]);
}
}
else {
if ((token.type === ret.types.ROOT) || (token.type === ret.types.GROUP)) {
if ((token.type === ret.types.GROUP) && (token.remember === true)) {
var group = [];
if (token.hasOwnProperty('stack') === true) {
references.push(function* () {
yield* generate(token.stack);
});
}
else if (token.hasOwnProperty('options') === true) {
for (var generated of generate(token)) {
group.push(generated);
}
references.push(group);
}
}
if (token.hasOwnProperty('stack') === true) {
capture(token.stack);
}
else if (token.hasOwnProperty('options') === true) {
for (var i = 0; i < token.options.length; ++i) {
capture(token.options[i]);
}
}
return true;
}
else if (token.type === ret.types.REPETITION) {
capture(token.value);
}
}
}
function* generate(token) {
if (Array.isArray(token)) {
if (token.length > 1) {
for (var prefix of generate(token[0])) {
for (var suffix of generate(token.slice(1))) {
yield prefix + suffix;
}
}
}
else {
yield* generate(token[0]);
}
}
else {
if ((token.type === ret.types.ROOT) || (token.type === ret.types.GROUP)) {
if (token.hasOwnProperty('stack') === true) {
token.options = [token.stack];
}
for (var i = 0; i < token.options.length; ++i) {
yield* generate(token.options[i]);
}
}
else if (token.type === ret.types.POSITION) {
yield '';
}
else if (token.type === ret.types.SET) {
for (var i = 0; i < token.set.length; ++i) {
var node = token.set[i];
if (token.not === true) {
if ((node.type === ret.types.CHAR) && (node.value === 10)) {
}
}
yield* generate(node);
}
}
else if (token.type === ret.types.RANGE) {
for (var i = token.from; i <= token.to; ++i) {
yield String.fromCharCode(i);
}
}
else if (token.type === ret.types.REPETITION) {
if (token.min === 0) {
yield '';
}
for (var i = token.min; i <= token.max; ++i) {
var stack = [];
for (var j = 0; j < i; ++j) {
stack.push(token.value);
}
if (stack.length > 0) {
yield* generate(stack);
}
}
}
else if (token.type === ret.types.REFERENCE) {
console.log(references);
if (references.hasOwnProperty(token.value - 1)) {
yield* references[token.value - 1]();
// yield references[token.value - 1]().next().value;
}
else {
yield '';
}
}
else if (token.type === ret.types.CHAR) {
yield String.fromCharCode(token.value);
}
}
}
I still haven't figured out how to implement capturing groups / references and the values yielded in the REPETITION
token type are not generated in lexicographic order yet, but other than that it works.