library(tidyverse)
ex <- structure(list(group = c("Group A", "Group B", "Group C"), data = list(
structure(list(a = c(25.1, 15.1, 28.7, 29.7, 5.3, 3.4, 5.3,
10.1, 2.4, 18, 4.7, 22.1, 9.5, 3.1, 26.5, 5.1, 24, 22.5,
19.4, 22.9, 24.5, 18.2, 7.9, 5.3, 24.7), b = c(95.1, 51,
100, 94.1, 47.3, 0, 50.7, 45.8, 40.7, 49.4, 51.9, 76.4, 26.7,
19.8, 37.4, 59.4, 59.1, 60.2, 26.1, 2.8, 100, 40.7, 56.4,
42.5, 0), c = c(39.9, 42.7, 16.3, 11.1, 56.9, 17.8, 62, 28.1,
43, 44.8, 54.8, 8.7, 5.5, 40.2, 7.7, 60.7, 24.8, 7.5, 3.5,
16.9, 31.6, 45.8, 76.7, 58.6, 15.8), d = c(-2.39999999999999,
28.6, -4.59999999999999, -1.39999999999999, 10.3, 3.1, 23.4,
-43, -36.3, 32.4, 33.1, 9.8, 1.5, -17.6, 16.6, 20.9, 7.8,
-1.7, -23.3, 0, -15, 59.3, -40.2, 46.9, 4.7)), .Names = c("a",
"b", "c", "d"), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(a = c(5, 4.7, 30.3,
14.3, 31.6, 6, 4.9, 23.3, 26.9, 16.9, 27.2, 23.8, 19.9, 28.6,
9.9, 17.4, 14.3, 12.5, 30.4, 30.3, 30, 6, 18, 23.7, 5.1),
b = c(48.9, 41.3, 20.1, 63.7, 85.1, 30.3, 52.8, 49.7,
27.1, 51.6, 21.8, 52.4, 52.5, 59.6, 13.7, 53.1, 69, 66.9,
23.4, 35.4, 45.8, 23.7, 62.9, 90.3, 59.6), c = c(37.4,
18.5, 64.6, 13.5, 7.8, 6.8, 12.7, 8.5, 7.8, 5.4, 14.1,
20.5, 10.9, 10.5, 7.5, 14.7, 6.9, 0.699999999999999,
4.7, 1.9, 11.9, 0.9, 7.2, 9.2, 42.2), d = c(4.9, -3.7,
13.5, 21.9, -2.69999999999999, 6.6, 0.5, -12.3, 38.7,
-25.8, -18, 28.4, 38.3, -3.6, 39.4, 19, 23.4, -38.7,
17, 36.3, -31.7, -9.3, -10.5, 9.7, -10.6)), .Names = c("a",
"b", "c", "d"), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(a = c(29.9, 12.8, 23.9,
26.2, 27.5, 32.6, 33.2, 24.8, 29, 22.6, 4.7, 25.6, 4.7, 13.1,
25.9, 14.5, 23.5, 26.6, 12.8, 24.1, 9.1, 31.9, 24.8, 4.6,
17.9), b = c(63.7, 23.3, 71.2, 46.7, 30.6, 49.3, 14.6, 68.4,
27.9, 49.1, 60.5, 26.4, 56.9, 55.4, 37.9, 40.7, 32.7, 68.5,
42.7, 27.9, 67.5, 43.4, 76.6, 53.3, 26.8), c = c(1.6, 32,
18.6, 14, 0.5, 7.2, 27.3, 8.9, 11, 15.5, 16.7, 16.4, 63.1,
14.7, 6.8, 9, 3.1, 11.7, 11, 11.5, 10.6, 14.9, 7.1, 13.2,
5.1), d = c(-35.4, 21, 12, 1.8, 37.6, 9.2, 17.6, 0, -19.4,
32.6, -32, -3.6, 7.2, -25.7, 9.1, -8, 35.8, 24.8, -13.9,
-21.7, -28.7, 0.200000000000003, -16.9, -26.5, 26.2)), .Names = c("a",
"b", "c", "d"), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame"))), h_candidates = list(structure(c(0.17320508075689, 2.37782856461527, 2.94890646051978, 3.35205778704499, 3.66771041547043, 3.95224618679369), .Names = c("0%", "0.01%", "0.02%", "0.03%", "0.04%", "0.05%")), structure(c(0.316227766016836, 2.63452963884554, 3.2327619513522, 3.63593179253957, 3.97743636027027, 4.22137418384109), .Names = c("0%", "0.01%", "0.02%", "0.03%", "0.04%", "0.05%")), structure(c(0.316227766016837, 2.7258026340878, 3.24807635378234, 3.62353418639869, 3.92683078321437, 4.17731971484109), .Names = c("0%", "0.01%", "0.02%", "0.03%", "0.04%", "0.05%"))), assignment = list(
structure(list(`0%` = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
`0.01%` = c(1, 2, 3, 3, 4, 5, 4, 6, 7, 8, 9, 10, 11,
12, 13, 4, 14, 15, 16, 17, 18, 19, 20, 21, 17), `0.02%` = c(1,
2, 3, 3, 4, 5, 4, 6, 7, 8, 9, 10, 11, 12, 13, 4, 14,
15, 16, 17, 18, 19, 20, 21, 17), `0.03%` = c(1, 2, 3,
3, 4, 5, 4, 6, 7, 8, 9, 10, 11, 12, 13, 4, 10, 14, 15,
16, 17, 18, 19, 9, 16), `0.04%` = c(1, 2, 3, 4, 5, 6,
5, 7, 8, 9, 10, 11, 12, 13, 14, 5, 11, 15, 16, 17, 18,
19, 20, 10, 17)), .Names = c("0%", "0.01%", "0.02%",
"0.03%", "0.04%"), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(`0%` = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25), `0.01%` = c(1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 4, 17, 18, 19, 20, 21, 22,
23, 24), `0.02%` = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 13, 4, 16, 17, 9, 18, 19, 14, 20, 21), `0.03%` = c(1,
2, 3, 4, 5, 6, 2, 7, 8, 9, 10, 11, 12, 13, 14, 12, 4, 15,
6, 8, 16, 17, 13, 18, 19), `0.04%` = c(1, 2, 3, 4, 5, 6,
2, 7, 8, 9, 10, 11, 12, 13, 14, 12, 4, 15, 6, 8, 7, 16, 13,
17, 1)), .Names = c("0%", "0.01%", "0.02%", "0.03%", "0.04%"
), row.names = c(NA, -25L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(`0%` = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
), `0.01%` = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 12, 15, 16, 17, 15, 18, 19, 4, 20, 21, 22), `0.02%` = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 5, 10, 11, 12, 13, 11, 14, 5, 15,
14, 16, 17, 18, 8, 19, 20), `0.03%` = c(1, 2, 3, 4, 5, 6,
7, 3, 8, 9, 10, 11, 12, 10, 11, 13, 5, 14, 13, 8, 10, 4,
3, 13, 6), `0.04%` = c(1, 2, 3, 4, 5, 5, 6, 3, 7, 8, 9, 10,
11, 9, 10, 12, 5, 13, 12, 7, 9, 4, 3, 12, 5)), .Names = c("0%",
"0.01%", "0.02%", "0.03%", "0.04%"), row.names = c(NA, -25L
), class = c("tbl_df", "tbl", "data.frame")))), .Names = c("group", "data", "h_candidates", "assignment"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L))
С данными, структурированными, как указано выше, я хотел бы изменить все значения в назначении data.frames
, которые появляются менее k
раз (скажем, k = 5
) в столбце.
Поэтому мне нужно решение, которое принимает последующие data.frames
, затем последующие столбцы в data.frame
, проверяет, какие значения появляются менее 5 раз в столбце, и если они есть, просто замените их на 0
.
В лучшем случае решение будет включать tidyverse
функций. Я думаю, что здесь нужны вложенные purrr::map
, а также dplyr::mutate
, но не знаю, как считать появления внутри столбца и затем заменять значения.
ex$assignment
? Кажется, это единственный фрейм данных, который имеет повторяющиеся значения. - person Omry Atia   schedule 08.08.2018