最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

Javascript RegEx Remove Multiple words from string - Stack Overflow

programmeradmin1浏览0评论

Using Javascript. (note there is a similar post, but the OP requested Java, this is for Javascript)

I'm trying to remove a list of words from an entire string without looping (preferably using Regular Expressions).

This is what I have so far, and it removes some of the words but not all of them. Can someone help identify what I'm doing wrong with my RegEx function?

   //Remove all instances of the words in the array
  var removeUselessWords = function(txt) {

	var uselessWordsArray = 
        [
          "a", "at", "be", "can", "cant", "could", "couldnt", 
          "do", "does", "how", "i", "in", "is", "many", "much", "of", 
          "on", "or", "should", "shouldnt", "so", "such", "the", 
          "them", "they", "to", "us",  "we", "what", "who", "why", 
          "with", "wont", "would", "wouldnt", "you"
        ];
			
	var expStr = uselessWordsArray.join(" | ");
	return txt.replace(new RegExp(expStr, 'gi'), ' ');
  }

  var str = "The person is going on a walk in the park. The person told us to do what we need to do in the park";
  
  console.log(removeUselessWords(str));

//The result should be: "person going walk park. person told need park."

Using Javascript. (note there is a similar post, but the OP requested Java, this is for Javascript)

I'm trying to remove a list of words from an entire string without looping (preferably using Regular Expressions).

This is what I have so far, and it removes some of the words but not all of them. Can someone help identify what I'm doing wrong with my RegEx function?

   //Remove all instances of the words in the array
  var removeUselessWords = function(txt) {

	var uselessWordsArray = 
        [
          "a", "at", "be", "can", "cant", "could", "couldnt", 
          "do", "does", "how", "i", "in", "is", "many", "much", "of", 
          "on", "or", "should", "shouldnt", "so", "such", "the", 
          "them", "they", "to", "us",  "we", "what", "who", "why", 
          "with", "wont", "would", "wouldnt", "you"
        ];
			
	var expStr = uselessWordsArray.join(" | ");
	return txt.replace(new RegExp(expStr, 'gi'), ' ');
  }

  var str = "The person is going on a walk in the park. The person told us to do what we need to do in the park";
  
  console.log(removeUselessWords(str));

//The result should be: "person going walk park. person told need park."

Share Improve this question asked Apr 4, 2018 at 15:37 bagofmilkbagofmilk 1,5508 gold badges36 silver badges73 bronze badges 3
  • 1 Get rid of the whitespace around | for starters. – Jared Smith Commented Apr 4, 2018 at 15:41
  • If I do that, then the function removes all characters instead of words. (ie: "walk" would be "wlk") – bagofmilk Commented Apr 4, 2018 at 15:43
  • @Jared Smith, I retract my statement above, as RomanPerekhrest made use of your comment. – bagofmilk Commented Apr 4, 2018 at 15:47
Add a comment  | 

2 Answers 2

Reset to default 13

Three moments:

  • join array items with | without side spaces
  • enclose regex alternation group into parentheses (...|...)
  • specify word boundary \b to match a separate words

var removeUselessWords = function(txt) {
    var uselessWordsArray = 
        [
          "a", "at", "be", "can", "cant", "could", "couldnt", 
          "do", "does", "how", "i", "in", "is", "many", "much", "of", 
          "on", "or", "should", "shouldnt", "so", "such", "the", 
          "them", "they", "to", "us",  "we", "what", "who", "why", 
          "with", "wont", "would", "wouldnt", "you"
        ];
			
	  var expStr = uselessWordsArray.join("|");
	  return txt.replace(new RegExp('\\b(' + expStr + ')\\b', 'gi'), ' ')
                    .replace(/\s{2,}/g, ' ');
  }

var str = "The person is going on a walk in the park. The person told us to do what we need to do in the park";
  
console.log(removeUselessWords(str));

May be this is what you want:

   //Remove all instances of the words in the array
  var removeUselessWords = function(txt) {

	var uselessWordsArray = 
        [
          "a", "at", "be", "can", "cant", "could", "couldnt", 
          "do", "does", "how", "i", "in", "is", "many", "much", "of", 
          "on", "or", "should", "shouldnt", "so", "such", "the", 
          "them", "they", "to", "us",  "we", "what", "who", "why", 
          "with", "wont", "would", "wouldnt", "you"
        ];
			
	var expStr = uselessWordsArray.join("\\b|\\b");
	return txt.replace(new RegExp(expStr, 'gi'), '').trim().replace(/ +/g, ' ');
  }

  var str = "The person is going on a walk in the park. The person told us to do what we need to do in the park";
  
  console.log(removeUselessWords(str));

//The result should be: "person going walk park. person told need park."

发布评论

评论列表(0)

  1. 暂无评论