有没有办法将多个数据框条目连接成单个条目？

我正在使用Excel电子表格中的一些活动前和活动后的数据，其中一个问题可能有几个答案（学生必须select所有正确的答案才能得到这个问题的全部功劳），但数据最初的格式，每个单独的回答都列在自己的行中，其他元数据（数据库信息，学生用户名，性别等）只是复制到每一行 – 我将在下面的例子中详细说明。

>data Database Username Gender InterviewType Question Answer 1 steve Male Pre Which of the following... "Response 1" 1 steve Male Pre Which of the following... "Response 2" 1 steve Male Pre Which of the following... "Response 3" 1 steve Male Pre Please indicate your race. "White" 1 steve Male Pre Explain how you would... "Response" 2 jenna Female Pre Which of the following... "Response 1" 2 jenna Female Pre Which of the following... "Response 2" 2 jenna Female Pre Please indicate your race. "White" 2 jenna Female Pre Explain how you would... "Response" 1 jack Male Pre Which of the following... "Response 1" 1 jack Male Pre Which of the following... "Response 2" 1 jack Male Pre Which of the following... "Response 3" 1 jack Male Pre Please indicate your race. "Black" 1 jack Male Pre Explain how you would... "Response" 3 billy Male Pre Which of the following... "Response 1" 3 billy Male Pre Which of the following... "Response 2" 3 billy Male Pre Please indicate your race. "Other" 3 billy Male Pre Explain how you would... "Response"

当我对数据进行格式化时，数据更加有用，使得每个单独的问题在数据框中都有自己的列，所以通过合作努力，我们编写了下面的代码，将每个独特的问题转化为自己的列：

 x = data require(stringr) temp = which(x$Db == "Db") x = x[-temp,] questions = unique(x$Question) concdMeta = apply(x[,c('Db', 'username', 'gender', 'interviewForm')], MARGIN = 1, FUN = paste, collapse = " & ") students = unique(concdMeta) out = matrix(nrow = length(students), ncol = 4 + length(questions)) row.names(out) = students colnames(out) = c(colnames(x)[1:4], questions) out = data.frame(out) for(i in 1:nrow(out)) { Z = str_split(row.names(out)[i], ' & ')[[1]] for(j in 1:4) { out[i, j] = Z[j] } } for (i in 1:nrow(x)) { db = x$Db[i] un = x$username[i] g = x$gender[i] iF = x$interviewForm[i] q = x$Question[i] a = x$Answer[i] this.meta = paste(x[i, 1:4], collapse = " & ") matching.row = row.names(out) == this.meta out[matching.row, 4 + which(questions == q)] = a } out[is.na(out)] = "NA"

上面的代码非常有效，除了我们有一个问题：它列出了对这个问题的最终答案，无论最后的回答是那个学生。因此，对于上面的Steve来说，它只会列出Jenna等的“Response 3”或“Response 2”。如此：

 >data.reformatted Database Username Gender InterviewType Which of the following... Please indicate... Explain how... 1 steve Male Pre "Response 3" "White" "Response" 2 jenna Female Pre "Response 2" "White" "Response" 1 jack Male Pre "Response 3" "Black" "Response" 3 billy Male Pre "Response 2" "Other" "Response"

我们可以对上面的代码进行补充，以便将所有响应连接到单个单元格中？这意味着最终的数据集将如下所示：

 Database Username Gender InterviewType Which of the following... Please indicate... Explain how... 1 steve Male Pre "Response 1, Response 2, Response 3" "White" "Response" 2 jenna Female Pre "Response 1, Response 2" "White" "Response" 1 jack Male Pre "Response 1, Response 2, Response 3" "Black" "Response" 3 billy Male Pre "Response 1, Response 2" "Other" "Response"

除了上面的回答，更自动化的版本会是这样的：

 library(dplyr) library(tidyr) data$rownum <- 1:nrow(data) # add row numbers as a column # needed for the spread function to work # (complains about duplicates otherwise) questions <- as.character(unique(data$Question)) qNames <- paste0("q", 1:length(questions)) data <- data.frame(lapply(data, as.character), stringsAsFactors = FALSE) # change questions names to question keys for (q in 1:length(questions)){ data[data$Question == questions[q], "Question"] <- qNames[q] } data.wide <- data %>% spread(Question, Answer) #colnames(data.wide)[6:8] <- c("Explain", "Indicate", "Which") # change column names data.wide <- data.frame(lapply(data.wide, as.character), stringsAsFactors = FALSE) data.wide[is.na(data.wide)] <- "" head(data.wide) # Group by necessary columns and merge rows using Reduce and paste dummy <- data.wide %>% dplyr::group_by(Database, Username, Gender, InterviewType) # Create command as a string str_start <- "dummy2 <- dummy %>% dplyr::summarize(" str_end <- paste0(qNames[length(qNames)]," = Reduce(function(...) paste(...),", qNames[length(qNames)],"))") str_middle <- "" for (q in 1:(length(qNames) - 1 )){ str_middle <- paste(str_middle, qNames[q] ," = Reduce(function(...) paste(...),", qNames[q], "),") } str_final <- paste0(str_start, str_middle, str_end) # Execute Command eval(parse(text = str_final)) # Change columns names for (q in 1:length(questions)){ colnames(dummy2)[colnames(dummy2) == qNames[q]] <- as.character(questions[q]) }

我已经放在一起，似乎做你想做的一些代码。您需要安装dplyr才能运行。

 data$rownum <- 1:nrow(data) # add row numbers as a column # needed for the spread function to work # (complains about duplicates otherwise) data.wide <- data %>% spread(Question, Answer) colnames(data.wide)[6:8] <- c("Explain", "Indicate", "Which") # change column names data.wide[is.na(data.wide)] <- "" head(data.wide) Database Username Gender InterviewType rownum Explain Indicate Which 1 1 jack Male Pre 10 "Response 1" 2 1 jack Male Pre 11 "Response 2" 3 1 jack Male Pre 12 "Response 3" 4 1 jack Male Pre 13 "Black" 5 1 jack Male Pre 14 "Response" 6 1 steve Male Pre 1 "Response 1 # Group by necessary columns and merge rows using Reduce and paste result <- data.wide %>% dplyr::group_by(Database, Username, Gender, InterviewType) %>% dplyr::summarize( Which = Reduce(function(...) paste(...), Which), Indicate = Reduce(function(...) paste(...), Indicate), Explain = Reduce(function(...) paste(...), Explain)) head(as.data.frame(result)) Database Username Gender InterviewType Which Indicate Explain 1 1 jack Male Pre "Response 1" "Response 2" "Response 3" "Black" "Response" 2 1 steve Male Pre "Response 1" "Response 2" "Response 3" "White" "Response" 3 2 jenna Female Pre "Response 1" "Response 2" "White" "Response" 4 3 billy Male Pre "Response 1" "Response 2" "Other" "Response"

希望这可以帮助。

有没有办法将多个数据框条目连接成单个条目？

检查连续的excellogging，如果它们相同，则为它们分配相同的ID

如何将父和子logging集合到一个logging集中？

使用Excel中的excel信息读入和创build一个列表/数组的方法

在excel中匹配两列，在拼写上略有差异

在Excel表格中，如果有人试图操纵我的标准数据，特定单元格的颜色将被改变

如何旋转excel数据

如何在Excel中格式化不同的DOB格式？

如何将datetypes更改为短date

Excel：如何根据水平表中的计算值在垂直表中获取相应的值

重塑表格以创build按前缀聚合的时间序列