te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>r - Count how often words from a vector occur in a string - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

r - Count how often words from a vector occur in a string - Stack Overflow

programmeradmin3浏览0评论

I have a string of text and a vector of words:

String: "Auch ein blindes Huhn findet einmal ein Korn."
Vector: "auch", "ein"

I want to check how often each word in the vector is contained in the string and calculate the sum of the frequencies. For the example, the correct result would be 3.

I have come so far as to be able to check which words occur in the string and calculate the sum:

library(stringr)
deu <- c("\\bauch\\b", "\\bein\\b")
str_detect(tolower("Auch ein blindes Huhn findet einmal ein Korn."), deu)

[1] TRUE TRUE

sum(str_detect(tolower("Auch ein blindes Huhn findet einmal ein Korn."), deu))

[1] 2

Unfortunately str_detect does not return the number of occurences (1, 2), but only whether a word occurs in a string (TRUE, TRUE), so the sum of the output from str_detect is not equal to the number of words.

Is there a function in R similar to preg_match_all in PHP?

preg_match_all("/\bauch\b|\bein\b/i", "Auch ein blindes Huhn findet einmal ein Korn.", $matches);
print_r($matches);

Array
(
    [0] => Array
        (
            [0] => Auch
            [1] => ein
            [2] => ein
        )

)

echo preg_match_all("/\bauch\b|\bein\b/i", "Auch ein blindes Huhn findet einmal ein Korn.", $matches);

3

I would like to avoid loops.


I have looked at a lot of similar questions, but they either don't count the number of occurrences or do not use a vector of patterns to search. I may have overlooked a question that answers mine, but before you mark this as duplicate, please make sure that the "duplicate" actually asks the exact same thing. Thank you.

I have a string of text and a vector of words:

String: "Auch ein blindes Huhn findet einmal ein Korn."
Vector: "auch", "ein"

I want to check how often each word in the vector is contained in the string and calculate the sum of the frequencies. For the example, the correct result would be 3.

I have come so far as to be able to check which words occur in the string and calculate the sum:

library(stringr)
deu <- c("\\bauch\\b", "\\bein\\b")
str_detect(tolower("Auch ein blindes Huhn findet einmal ein Korn."), deu)

[1] TRUE TRUE

sum(str_detect(tolower("Auch ein blindes Huhn findet einmal ein Korn."), deu))

[1] 2

Unfortunately str_detect does not return the number of occurences (1, 2), but only whether a word occurs in a string (TRUE, TRUE), so the sum of the output from str_detect is not equal to the number of words.

Is there a function in R similar to preg_match_all in PHP?

preg_match_all("/\bauch\b|\bein\b/i", "Auch ein blindes Huhn findet einmal ein Korn.", $matches);
print_r($matches);

Array
(
    [0] => Array
        (
            [0] => Auch
            [1] => ein
            [2] => ein
        )

)

echo preg_match_all("/\bauch\b|\bein\b/i", "Auch ein blindes Huhn findet einmal ein Korn.", $matches);

3

I would like to avoid loops.


I have looked at a lot of similar questions, but they either don't count the number of occurrences or do not use a vector of patterns to search. I may have overlooked a question that answers mine, but before you mark this as duplicate, please make sure that the "duplicate" actually asks the exact same thing. Thank you.

Share Improve this question asked 2 days ago BenBen 49112 bronze badges 4
  • 1 what you want is str_count(tolower("Auch ein blindes Huhn findet mal ein Korn"), paste0("\\b", c("ein","Huhn"), "\\b")). See this post, which is similar stackoverflow/a/67195512/28479453 – Tim G Commented 2 days ago
  • @dog That's brilliant (and stupid of me!). Would you add that as an answer? I'd like to accept it. – Ben Commented 2 days ago
  • simple loop in python standard lib: for i in ["auch", "ein"]: print(i + ":", "Auch ein blindes Huhn findet einmal ein Korn.".lower().split().count(i)) – Friede Commented 2 days ago
  • 1 @Friede The question is about R. – Ben Commented 2 days ago
Add a comment  | 

4 Answers 4

Reset to default 5

You can use str_count like

stringr::str_count(tolower("Auch ein blindes Huhn findet mal ein Korn"), paste0("\\b", tolower(c("ein","Huhn")), "\\b"))
[1] 2 1

You could sprintf a pattern by adding \\b for borders and use lengths on gregexpr.

> vp <- v |> sprintf(fmt='\\b%s\\b') |> setNames(v) |> print()
        auch          ein 
"\\bauch\\b"  "\\bein\\b" 
> lapply(vp, gregexpr, text=tolower(string)) |> unlist(recursive=FALSE) |> lengths()
auch  ein 
   1    2 

The |> print() is just for simultaneously assigning and printing and can be removed.


Data:

string <- "Auch ein blindes Huhn findet einmal ein Korn."
v <- c("auch", "ein")

Given string and pattern like below

s <- "Auch ein blindes Huhn findet einmal ein Korn."
p <- c("auch", "ein")

you can try strsplit + %in%:

  • Option 1 (to get the sum of occurrences)
> sum(gsub("\\W", "", strsplit(tolower(s), " ")[[1]]) %in% p)
[1] 3
  • Option 2 (use table if you would like to see the summary of counts)
> table(gsub("\\W", "", strsplit(tolower(s), " ")[[1]]))[p]

auch  ein
   1    2

Character String Processing

If base R is too complex in its syntax, I would go with {stringi}

stringi::stri_count_regex(tolower(String), sprintf('\\b%s\\b', Vector)) |> 
  setNames(Vector) # optional
auch  ein 
   1    2 

Data

String = 'Auch ein blindes Huhn findet einmal ein Korn.'
Vector = c('auch', 'ein')
发布评论

评论列表(0)

  1. 暂无评论