te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>r - Trying to slice a group but keeping the members of a certain subgroup - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

r - Trying to slice a group but keeping the members of a certain subgroup - Stack Overflow

programmeradmin3浏览0评论

So, I have the following code where out of a list of candidates in candidates2024 I only keep the two with the highest number of votes (after grouping candidates by city and state)

candidates2024 <- candidates2024 %>%
  group_by(nm_municipio, sg_uf) %>%  # Group by both city name and state
  slice_max(order_by = qt_votos_nom_validos, n = 2) %>%  # Select top 2 candidates per group
  ungroup()

However, there is a third variable I would like to account for. The dummy variable incumbent is 1 if the candidate won their previous election and 0 otherwise. If there is a candidate in a group that is the incumbent, I would like them to automatically take up one of the two slots, and then the next slot goes to the candidate with the (next) highest number of votes. If there are no incumbents in the city the code should run normally.

Here is the dput:

structure(list(sg_uf = c("BA", "BA", "BA", "BA", "BA", "BA"), 
nm_municipio = c("JACARACI", "JACARACI", "JACOBINA", "JACOBINA", 
"JACOBINA", "JACOBINA"), cd_cargo = c(11L, 11L, 11L, 11L, 
11L, 11L), ds_cargo = c("Prefeito", "Prefeito", "Prefeito", 
"Prefeito", "Prefeito", "Prefeito"), nr_candidato = c(40L, 
55L, 35L, 40L, 65L, 50L), nm_candidato = c("DEUSDEDIT CARVALHO ROCHA", 
"HANNEY LADEIA SOARES FLORES", "VALDICE CASTRO VIEIRA DA SILVA", 
"MARIANA MATOS DE OLIVEIRA", "TIAGO MANOEL DIAS FERREIRA", 
"VALESSIO SOARES DE BRITO"), nm_urna_candidato = c("DETINHO", 
"HANNEY LADEIA", "VALDICE", "MARIANA OLIVEIRA", "TIAGO DIAS", 
"VALESSIO FILHO DE JACOBINA"), sg_partido = c("PSB", "PSD", 
"PMB", "PSB", "PC do B", "PSOL"), ds_composicao_coligacao = c("Federação BRASIL DA ESPERANÇA - FE BRASIL(PT/PC do B/PV) / AGIR / PSB", 
"PP / PSD", "REPUBLICANOS / PP / PDT / PL / PRD / DC / PMB / SOLIDARIEDADE", 
"AVANTE / Federação PSDB CIDADANIA(PSDB/CIDADANIA) / PSB", 
"PRTB / PSD / MOBILIZA / MDB / AGIR / PODE / Federação BRASIL DA ESPERANÇA - FE BRASIL(PT/PC do B/PV)", 
"Federação PSOL REDE(PSOL/REDE)"), nr_turno = c(1L, 1L, 
1L, 1L, 1L, 1L), ds_sit_totalizacao = c("Eleito", "Não Eleito", 
"Eleito", "Não Eleito", "Não Eleito", "Não Eleito"), dt_ult_totalizacao = c("2024-10-06 20:25:25", 
"2024-10-06 20:25:25", "2024-10-06 20:08:52", "2024-10-06 20:08:52", 
"2024-10-06 20:08:52", "2024-10-06 20:08:52"), sg_ue = c(36498L, 
36498L, 36510L, 36510L, 36510L, 36510L), sq_candidato = c(50002150514, 
50002002442, 50002347768, 50002074476, 50002074352, 50002074411
), nm_tipo_destinacao_votos = c("Válido", "Válido", "Válido", 
"Válido", "Válido", "Válido"), sq_eleicao_divulga = c(2045202024L, 
2045202024L, 2045202024L, 2045202024L, 2045202024L, 2045202024L
), aa_eleicao = c(2024L, 2024L, 2024L, 2024L, 2024L, 2024L
), nm_regiao = c("NORDESTE", "NORDESTE", "NORDESTE", "NORDESTE", 
"NORDESTE", "NORDESTE"), pc_votos_validos = c(0.5092, 0.4908, 
0.4686, 0.3296, 0.1921, 0.0097), qt_votos_nom_validos = c(4804L, 
4630L, 21952L, 15444L, 9000L, 454L), qt_votos_concorrentes = c(9434L, 
9434L, 46850L, 46850L, 46850L, 46850L), incumbent = c(0, 
0, 0, 0, 1, 0), ranLastElection = c(0, 0, 0, 1, 1, 0)), row.names = 1088:1093, class = "data.frame")

My code as it is would cut the candidate Tiago Dias as he got the third most votes in his city. However, I Want to keep him as he is the incumbent, so the desired result is that Tiago Dias and Valdice (most votes) are kept in the dataframe

Thank you for the help!

So, I have the following code where out of a list of candidates in candidates2024 I only keep the two with the highest number of votes (after grouping candidates by city and state)

candidates2024 <- candidates2024 %>%
  group_by(nm_municipio, sg_uf) %>%  # Group by both city name and state
  slice_max(order_by = qt_votos_nom_validos, n = 2) %>%  # Select top 2 candidates per group
  ungroup()

However, there is a third variable I would like to account for. The dummy variable incumbent is 1 if the candidate won their previous election and 0 otherwise. If there is a candidate in a group that is the incumbent, I would like them to automatically take up one of the two slots, and then the next slot goes to the candidate with the (next) highest number of votes. If there are no incumbents in the city the code should run normally.

Here is the dput:

structure(list(sg_uf = c("BA", "BA", "BA", "BA", "BA", "BA"), 
nm_municipio = c("JACARACI", "JACARACI", "JACOBINA", "JACOBINA", 
"JACOBINA", "JACOBINA"), cd_cargo = c(11L, 11L, 11L, 11L, 
11L, 11L), ds_cargo = c("Prefeito", "Prefeito", "Prefeito", 
"Prefeito", "Prefeito", "Prefeito"), nr_candidato = c(40L, 
55L, 35L, 40L, 65L, 50L), nm_candidato = c("DEUSDEDIT CARVALHO ROCHA", 
"HANNEY LADEIA SOARES FLORES", "VALDICE CASTRO VIEIRA DA SILVA", 
"MARIANA MATOS DE OLIVEIRA", "TIAGO MANOEL DIAS FERREIRA", 
"VALESSIO SOARES DE BRITO"), nm_urna_candidato = c("DETINHO", 
"HANNEY LADEIA", "VALDICE", "MARIANA OLIVEIRA", "TIAGO DIAS", 
"VALESSIO FILHO DE JACOBINA"), sg_partido = c("PSB", "PSD", 
"PMB", "PSB", "PC do B", "PSOL"), ds_composicao_coligacao = c("Federação BRASIL DA ESPERANÇA - FE BRASIL(PT/PC do B/PV) / AGIR / PSB", 
"PP / PSD", "REPUBLICANOS / PP / PDT / PL / PRD / DC / PMB / SOLIDARIEDADE", 
"AVANTE / Federação PSDB CIDADANIA(PSDB/CIDADANIA) / PSB", 
"PRTB / PSD / MOBILIZA / MDB / AGIR / PODE / Federação BRASIL DA ESPERANÇA - FE BRASIL(PT/PC do B/PV)", 
"Federação PSOL REDE(PSOL/REDE)"), nr_turno = c(1L, 1L, 
1L, 1L, 1L, 1L), ds_sit_totalizacao = c("Eleito", "Não Eleito", 
"Eleito", "Não Eleito", "Não Eleito", "Não Eleito"), dt_ult_totalizacao = c("2024-10-06 20:25:25", 
"2024-10-06 20:25:25", "2024-10-06 20:08:52", "2024-10-06 20:08:52", 
"2024-10-06 20:08:52", "2024-10-06 20:08:52"), sg_ue = c(36498L, 
36498L, 36510L, 36510L, 36510L, 36510L), sq_candidato = c(50002150514, 
50002002442, 50002347768, 50002074476, 50002074352, 50002074411
), nm_tipo_destinacao_votos = c("Válido", "Válido", "Válido", 
"Válido", "Válido", "Válido"), sq_eleicao_divulga = c(2045202024L, 
2045202024L, 2045202024L, 2045202024L, 2045202024L, 2045202024L
), aa_eleicao = c(2024L, 2024L, 2024L, 2024L, 2024L, 2024L
), nm_regiao = c("NORDESTE", "NORDESTE", "NORDESTE", "NORDESTE", 
"NORDESTE", "NORDESTE"), pc_votos_validos = c(0.5092, 0.4908, 
0.4686, 0.3296, 0.1921, 0.0097), qt_votos_nom_validos = c(4804L, 
4630L, 21952L, 15444L, 9000L, 454L), qt_votos_concorrentes = c(9434L, 
9434L, 46850L, 46850L, 46850L, 46850L), incumbent = c(0, 
0, 0, 0, 1, 0), ranLastElection = c(0, 0, 0, 1, 1, 0)), row.names = 1088:1093, class = "data.frame")

My code as it is would cut the candidate Tiago Dias as he got the third most votes in his city. However, I Want to keep him as he is the incumbent, so the desired result is that Tiago Dias and Valdice (most votes) are kept in the dataframe

Thank you for the help!

Share Improve this question edited Feb 17 at 12:25 justsomeslav asked Feb 17 at 11:42 justsomeslavjustsomeslav 112 bronze badges 2
  • Could you please share some reproducible data using dput? – Quinten Commented Feb 17 at 11:49
  • 1 Done (I think, I'm new to stackoverflow so I don't quite know how it works) – justsomeslav Commented Feb 17 at 12:21
Add a comment  | 

1 Answer 1

Reset to default 0

In {dplyr} you are looking for

library(dplyr)
candidates2024 |>
  group_by(nm_municipio, sg_uf) |>
  arrange(desc(incumbent), desc(qt_votos_nom_validos), .by_group=TRUE) |>
  slice_head(n=2) 
# A tibble: 4 × 5
# Groups:   nm_municipio, sg_uf [2]
  nm_candidato                   nm_municipio sg_uf incumbent qt_votos_nom_validos
  <chr>                          <chr>        <chr>     <dbl>                <int>
1 DEUSDEDIT CARVALHO ROCHA       JACARACI     BA            0                 4804
2 HANNEY LADEIA SOARES FLORES    JACARACI     BA            0                 4630
3 TIAGO MANOEL DIAS FERREIRA     JACOBINA     BA            1                 9000
4 VALDICE CASTRO VIEIRA DA SILVA JACOBINA     BA            0                21952

One way with base R.

split(candidates2024, ~ nm_municipio + sg_uf) |>
  lapply(\(l) sort_by(l, ~ incumbent + qt_votos_nom_validos, decreasing=TRUE) |> 
           head(n=2)) |>
  do.call(what='rbind') |> `rownames<-`(NULL) # cosmetics
                    nm_candidato nm_municipio sg_uf incumbent qt_votos_nom_validos
1       DEUSDEDIT CARVALHO ROCHA     JACARACI    BA         0                 4804
2    HANNEY LADEIA SOARES FLORES     JACARACI    BA         0                 4630
3     TIAGO MANOEL DIAS FERREIRA     JACOBINA    BA         1                 9000
4 VALDICE CASTRO VIEIRA DA SILVA     JACOBINA    BA         0                21952

Note that, for readability, we have subset the data to include just the columns needed to complete the task:

candidates2024 = subset(candidates2024, select = c(nm_candidato, nm_municipio, sg_uf, incumbent, qt_votos_nom_validos))
发布评论

评论列表(0)

  1. 暂无评论