I have a very long paragraph that I need to pare word boundary to replace matching with the wanted value.
The wanted value will be in many many different patterns. This why I need to have a lot lines of new RegExp to go through the replacement one by one.
var paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
paragraph
.replace( new RegExp("\\b"+ "german gateway" +"\\b", "ig"), "German gateway")
.replace( new RegExp("\\b"+ "Leonardo DA vinci" +"\\b", "ig"), "Leonardo da Vinci")
.replace( new RegExp("\\b"+ "some word" +"\\b", "ig"), "some other word")
.replace( new RegExp
//continue for at least Few Thousand Rows.
console.log(paragraph);
//sample output
German gateway is located at ... Leonardo da Vinci and other some other word superman
however too many new RegExp causing js to run into error.
Uncaught RangeError: Maximum call stack size exceeded
Is there any way to avoid heavily calling new RegExp while can maintaining exactly the regex rule that I want?
I have a very long paragraph that I need to pare word boundary to replace matching with the wanted value.
The wanted value will be in many many different patterns. This why I need to have a lot lines of new RegExp to go through the replacement one by one.
var paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
paragraph
.replace( new RegExp("\\b"+ "german gateway" +"\\b", "ig"), "German gateway")
.replace( new RegExp("\\b"+ "Leonardo DA vinci" +"\\b", "ig"), "Leonardo da Vinci")
.replace( new RegExp("\\b"+ "some word" +"\\b", "ig"), "some other word")
.replace( new RegExp
//continue for at least Few Thousand Rows.
console.log(paragraph);
//sample output
German gateway is located at ... Leonardo da Vinci and other some other word superman
however too many new RegExp causing js to run into error.
Uncaught RangeError: Maximum call stack size exceeded
Is there any way to avoid heavily calling new RegExp while can maintaining exactly the regex rule that I want?
Share Improve this question asked Jul 15, 2021 at 12:28 i need helpi need help 2,38610 gold badges55 silver badges73 bronze badges 2- "//continue for at least Few Thousand Rows." - Do you get payed for lines of code? o.O – Andreas Commented Jul 15, 2021 at 12:39
- doing this for own usage. just couldn't figure out whether any way or array regex, or have to write it in different way to get the same result without causing call stack error. – i need help Commented Jul 15, 2021 at 12:44
2 Answers
Reset to default 2Use an object and iterate over the properties. If you need a guaranteed order then you might want to use an array of objects with one object for every replacement.
const paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
const replacements = {
"german gateway": "German gateway",
"Leonardo DA vinci": "Leonardo da Vinci",
"some word": "some other word"
/* ... */
};
const result = Object.entries(replacements)
.reduce((result, replacement) => {
const rx = new RegExp("\\b" + replacement[0] + "\\b", "ig");
return result.replace(rx, replacement[1]);
}, paragraph);
console.log(result);
And you should definitely escape the variable part in the string you pass to the RegExp()
constructor.
You can use an "associative" array with keys to search for and values to replace with, build a regex trie out of the search words (keys) and run a single .replace
with a single regex to find all the phrases and replace them inline with a callback method as the replacement argument to .replace
.
First, run npm install regex-trie
and then use
let paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
const phrases = {"german gateway":"German gateway","Leonardo DA vinci":"Leonardo da Vinci","some word":"some other word"}
let RegexTrie = require('regex-trie');
let trie = new RegexTrie();
trie.add(Object.keys(phrases))
const regex = new RegExp(`\\b${trie.toRegExp().source}\\b`, 'ig')
let getValue = function(prop, obj){
prop = prop.toLowerCase();
for(var p in obj){
if(obj.hasOwnProperty(p) && prop == p.toLowerCase()){
return obj[p];
}
}
}
paragraph = paragraph.replace(regex, (m) => getValue(m, phrases));
console.log(paragraph)
// => German gateway is located at ... Leonardo da Vinci and other some other word superman
Here is a JavaScript snippet bundled with the help of browserify
:
(function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
let paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
const phrases = {"german gateway":"German gateway","Leonardo DA vinci":"Leonardo da Vinci","some word":"some other word"}
let RegexTrie = require('regex-trie');
let trie = new RegexTrie();
trie.add(Object.keys(phrases))
const regex = new RegExp(`\\b${trie.toRegExp().source}\\b`, 'ig')
let getValue = function(prop, obj){
prop = prop.toLowerCase();
for(var p in obj){
if(obj.hasOwnProperty(p) && prop == p.toLowerCase()){
return obj[p];
}
}
}
paragraph = paragraph.replace(regex, (m) => getValue(m, phrases));
console.log(paragraph)
},{"regex-trie":3}],2:[function(require,module,exports){
(function (global){(function (){
/*! http://mths.be/jsesc v0.5.0 by @mathias */
;(function(root) {
// Detect free variables `exports`
var freeExports = typeof exports == 'object' && exports;
// Detect free variable `module`
var freeModule = typeof module == 'object' && module &&
module.exports == freeExports && module;
// Detect free variable `global`, from Node.js or Browserified code,
// and use it as `root`
var freeGlobal = typeof global == 'object' && global;
if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
root = freeGlobal;
}
/*--------------------------------------------------------------------------*/
var object = {};
var hasOwnProperty = object.hasOwnProperty;
var forOwn = function(object, callback) {
var key;
for (key in object) {
if (hasOwnProperty.call(object, key)) {
callback(key, object[key]);
}
}
};
var extend = function(destination, source) {
if (!source) {
return destination;
}
forOwn(source, function(key, value) {
destination[key] = value;
});
return destination;
};
var forEach = function(array, callback) {
var length = array.length;
var index = -1;
while (++index < length) {
callback(array[index]);
}
};
var toString = object.toString;
var isArray = function(value) {
return toString.call(value) == '[object Array]';
};
var isObject = function(value) {
// This is a very simple check, but it’s good enough for what we need.
return toString.call(value) == '[object Object]';
};
var isString = function(value) {
return typeof value == 'string' ||
toString.call(value) == '[object String]';
};
var isFunction = function(value) {
// In a perfect world, the `typeof` check would be sufficient. However,
// in Chrome 1–12, `typeof /x/ == 'object'`, and in IE 6–8
// `typeof alert == 'object'` and similar for other host objects.
return typeof value == 'function' ||
toString.call(value) == '[object Function]';
};
/*--------------------------------------------------------------------------*/
// http://mathiasbynens.be/notes/javascript-escapes#single
var singleEscapes = {
'"': '\\"',
'\'': '\\\'',
'\\': '\\\\',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t'
// `\v` is omitted intentionally, because in IE < 9, '\v' == 'v'.
// '\v': '\\x0B'
};
var regexSingleEscape = /["'\\\b\f\n\r\t]/;
var regexDigit = /[0-9]/;
var regexWhitelist = /[ !#-&\(-\[\]-~]/;
var jsesc = function(argument, options) {
// Handle options
var defaults = {
'escapeEverything': false,
'quotes': 'single',
'wrap': false,
'es6': false,
'json': false,
'pact': true,
'indent': '\t',
'__indent__': ''
};
var json = options && options.json;
if (json) {
defaults.quotes = 'double';
defaults.wrap = true;
}
options = extend(defaults, options);
if (options.quotes != 'single' && options.quotes != 'double') {
options.quotes = 'single';
}
var quote = options.quotes == 'double' ? '"' : '\'';
var pact = options.pact;
var indent = options.indent;
var oldIndent;
var newLine = pact ? '' : '\n';
var result;
var isEmpty = true;
if (json && argument && isFunction(argument.toJSON)) {
argument = argument.toJSON();
}
if (!isString(argument)) {
if (isArray(argument)) {
result = [];
options.wrap = true;
oldIndent = options.__indent__;
indent += oldIndent;
options.__indent__ = indent;
forEach(argument, function(value) {
isEmpty = false;
result.push(
(pact ? '' : indent) +
jsesc(value, options)
);
});
if (isEmpty) {
return '[]';
}
return '[' + newLine + result.join(',' + newLine) + newLine +
(pact ? '' : oldIndent) + ']';
} else if (!isObject(argument)) {
if (json) {
// For some values (e.g. `undefined`, `function` objects),
// `JSON.stringify(value)` returns `undefined` (which isn’t valid
// JSON) instead of `'null'`.
return JSON.stringify(argument) || 'null';
}
return String(argument);
} else { // it’s an object
result = [];
options.wrap = true;
oldIndent = options.__indent__;
indent += oldIndent;
options.__indent__ = indent;
forOwn(argument, function(key, value) {
isEmpty = false;
result.push(
(pact ? '' : indent) +
jsesc(key, options) + ':' +
(pact ? '' : ' ') +
jsesc(value, options)
);
});
if (isEmpty) {
return '{}';
}
return '{' + newLine + result.join(',' + newLine) + newLine +
(pact ? '' : oldIndent) + '}';
}
}
var string = argument;
// Loop over each code unit in the string and escape it
var index = -1;
var length = string.length;
var first;
var second;
var codePoint;
result = '';
while (++index < length) {
var character = string.charAt(index);
if (options.es6) {
first = string.charCodeAt(index);
if ( // check if it’s the start of a surrogate pair
first >= 0xD800 && first <= 0xDBFF && // high surrogate
length > index + 1 // there is a next code unit
) {
second = string.charCodeAt(index + 1);
if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint = (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
result += '\\u{' + codePoint.toString(16).toUpperCase() + '}';
index++;
continue;
}
}
}
if (!options.escapeEverything) {
if (regexWhitelist.test(character)) {
// It’s a printable ASCII character that is not `"`, `'` or `\`,
// so don’t escape it.
result += character;
continue;
}
if (character == '"') {
result += quote == character ? '\\"' : character;
continue;
}
if (character == '\'') {
result += quote == character ? '\\\'' : character;
continue;
}
}
if (
character == '\0' &&
!json &&
!regexDigit.test(string.charAt(index + 1))
) {
result += '\\0';
continue;
}
if (regexSingleEscape.test(character)) {
// no need for a `hasOwnProperty` check here
result += singleEscapes[character];
continue;
}
var charCode = character.charCodeAt(0);
var hexadecimal = charCode.toString(16).toUpperCase();
var longhand = hexadecimal.length > 2 || json;
var escaped = '\\' + (longhand ? 'u' : 'x') +
('0000' + hexadecimal).slice(longhand ? -4 : -2);
result += escaped;
continue;
}
if (options.wrap) {
result = quote + result + quote;
}
return result;
};
jsesc.version = '0.5.0';
/*--------------------------------------------------------------------------*/
// Some AMD build optimizers, like r.js, check for specific condition patterns
// like the following:
if (
typeof define == 'function' &&
typeof define.amd == 'object' &&
define.amd
) {
define(function() {
return jsesc;
});
} else if (freeExports && !freeExports.nodeType) {
if (freeModule) { // in Node.js or RingoJS v0.8.0+
freeModule.exports = jsesc;
} else { // in Narwhal or RingoJS v0.7.0-
freeExports.jsesc = jsesc;
}
} else { // in Rhino or a web browser
root.jsesc = jsesc;
}
}(this));
}).call(this)}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{}],3:[function(require,module,exports){
var jsesc = require('jsesc');
/**
* @module regex-trie
*/
var RegexTrie = (function () {
"use strict";
/**
* The `RegexTrie` class builds a regular expression from a set of phrases
* added to it. It produces a non-optimised `RegExp` and only represents
* literal characters; only alphanumeric or underscore ("_") characters are
* left unescaped.
*
* @class RegexTrie
* @constructor
*/
var RegexTrie = function () {
if ( ! (this instanceof RegexTrie) ) {
return new RegexTrie();
}
this._num_phrases_in_trie = 0;
this._trie = {};
return this;
};
/**
*
* Phrases can be added to the trie using `add`. Elements can be wrapped in
* an array before being added. Only alphanumeric values will be added.
* Objects, booleans, arrays, etc will all be ignored (failed attempts to
* add values are silent.)
*
* @method add()
* @param phrase_to_add {array|string|number}
* @chainable
*/
RegexTrie.prototype.add = function (phrase_to_add) {
if ( phrase_to_add instanceof Array ) {
phrase_to_add.forEach(this.add, this);
}
phrase_to_add = this._coerce_to_string(phrase_to_add);
if ( ! this._is_phrase_valid(phrase_to_add) ) {
return this;
}
// Has this phrase already been added?
if ( this.contains(phrase_to_add) ) {
return this;
}
var trie = this._trie;
phrase_to_add.split('').forEach( function (chr) {
if ( chr in trie ) {
trie = trie[chr];
return;
}
trie[chr] = {};
trie = trie[chr];
}, this);
// Set the end marker (so we know this was a plete word)
trie.end = true;
this._num_phrases_in_trie++;
return this;
};
RegexTrie.prototype.toRegExp = function () {
if ( this._num_phrases_in_trie === 0 ) return;
var result = this.toString();
return new RegExp(result);
};
RegexTrie.prototype.toString = function () {
if ( this._num_phrases_in_trie === 0 ) return;
var _walk_trie = function (trie, this_arg) {
var keys = Object.keys(trie),
alt_group = [],
char_class = [],
end = false; // marks the end of a phrase
keys.forEach( function (key) {
var walk_result, insert;
if ( key === 'end' ) {
end = true;
return;
}
walk_result =
this._quotemeta(key) + _walk_trie(trie[key], this_arg);
// When we have more than one key, `insert` references
// the alternative regexp group, otherwise it points to
// the char class group.
insert = ( keys.length > 1 ) ? [].push.bind(alt_group)
: [].push.bind(char_class);
insert(walk_result);
}, this_arg);
return this_arg._to_regex(alt_group, char_class, end);
};
var result = _walk_trie(this._trie, this);
return result;
};
RegexTrie.prototype._to_regex = function (alt_group, char_class, end) {
var group_has_one_element = function (el) {
return el.length === 1;
},
result = "";
// Once we've finished walking through the tree we need to build
// the regex match groups...
if ( alt_group.length > 0 ) {
if ( alt_group.length === 1 ) {
// Individual elements are merged with the current result.
result += alt_group[0];
}
else if ( alt_group.every(group_has_one_element) ) {
// When every single array in the alternative group is
// a single element array, this gets flattened in to
// a character class.
result += ( '[' + alt_group.join('') + ']' );
}
else {
// Finally, build a non-capturing alternative group.
result += ( '(?:' + alt_group.join('|') + ')' );
}
}
else if ( char_class.length > 0 ) {
result += char_class[0];
}
if ( end && result ) {
if ( result.length === 1 ) {
result += '?';
}
else {
result = '(?:' + result + ')?';
}
}
return result;
};
RegexTrie.prototype.contains = function (phrase_to_fetch) {
if ( ! this._is_phrase_valid(phrase_to_fetch) &&
this._num_phrases_in_trie > 0 ) {
return false;
}
var trie = this._trie;
// Wrap the attempts to contains in a try/catch block; any non-existant
// keys will cause an exception, which we treat as 'this value does not
// exist'.
try {
phrase_to_fetch.split('').forEach( function (chr) {
trie = trie[chr];
});
return ( trie.hasOwnProperty('end') && trie.end === true );
}
catch (e) {
// Fall through
}
return false;
};
RegexTrie.prototype._coerce_to_string = function (phrase) {
if ( typeof phrase === 'number' && ! isNaN(phrase) ) {
phrase = phrase.toString();
}
return phrase;
};
RegexTrie.prototype._is_phrase_valid = function (phrase) {
return ( typeof phrase === 'string' && phrase.length > 0 );
};
RegexTrie.prototype._quotemeta = function (phrase) {
if ( ! this._is_phrase_valid(phrase) ) {
return phrase;
}
return phrase
.replace(/([\t\n\f\r\\\$\(\)\*\+\-\.\?\[\]\^\{\|\}])/g, '\\$1')
.replace(/[^\x20-\x7E]/g, jsesc);
};
return RegexTrie;
})();
module.exports = RegexTrie;
},{"jsesc":2}]},{},[1]);