Excel或R:重塑数据

我有一些数据如下所示:

ID Data 1 abc 2 abcabc 3 abc 4 abcabcabcabc 5 abcabcabc 

我想在下面

 ID Data 1 abc 2 abc 2 abc 3 abc 4 abc 4 abc 4 abc 4 abc 5 abc 5 abc 5 abc 

abc在不同的列单元格中,所以Data实际上是许多列。 如果需要,我可以连接它们。

本质上bc是相关的,但对于一些ID我有多个结果,我想数据在一个长的格式而不是宽,但保持每行的ID。

如果这更容易,我可以在这里做。

Dput:

 structure(list(ID = c(9999812L, 999908L, 9993595L, 9992905L, 9989664L, 9984487L, 9980956L, 9980112L, 9980091L, 9979915L, 9979613L, 9979400L, 9978215L, 9976882L, 9975335L, 9974511L, 9973804L, 9973025L ), a = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "a", class = "factor"), b = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "b", class = "factor"), c = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "c", class = "factor"), a.1 = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("", "a"), class = "factor"), b.1 = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("", "b"), class = "factor"), c.1 = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("", "c"), class = "factor"), a.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "a"), class = "factor"), b.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "b"), class = "factor"), c.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "c"), class = "factor"), a.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "a"), class = "factor"), b.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "b"), class = "factor"), c.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "c"), class = "factor"), a.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("", "a"), class = "factor"), b.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("", "b"), class = "factor"), c.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("", "c"), class = "factor")), .Names = c("ID", "a", "b", "c", "a.1", "b.1", "c.1", "a.2", "b.2", "c.2", "a.3", "b.3", "c.3", "a.4", "b.4", "c.4"), class = "data.frame", row.names = c(NA, -18L)) 

使用您的初始数据,您可以使用“stringr”和“reshape2”来融化数据。

 dt <- data.frame(x = 1:5, y = c( "abc" , "abcabc","abc","abcabcabcabc", "abcabcabc")) library("stringr") library("reshape2") maxlen <- max(lengths(str_extract_all(dt$y,"(\\w)\\s(\\w)\\s(\\w)(\\1\\s\\2\\s\\3)*"))) list_lists <- str_extract_all(dt$y,"(\\w)\\s(\\w)\\s(\\w)(\\1\\s\\2\\s\\3)*") li <- lapply(list_lists, `length<-`,maxlen) dtnew <- data.frame(x =cbind(dt$x),do.call("rbind",li)) dtnew1 <- melt(dtnew,id.vars="x") dtnew1 <- dtnew1[!is.na(dtnew1$value),] dtnew1[order(dtnew1$x),] > dtnew1[order(dtnew1$x),c(1,3)] x value 1 1 abc 2 2 abc 7 2 abc 3 3 abc 4 4 abc 9 4 abc 14 4 abc 19 4 abc 5 5 abc 10 5 abc 15 5 abc > 

编辑 :对于更新的数据,使一个名为“ concat ”,这是连接值的列“a”到“c.4”

您可以使用: concat <- data.frame(concat=do.call("paste0",dt[,2:length(dt)]))来连接字段

然后分配dt$concat <- concat

 library("stringr") library("reshape2") maxlen <- max(lengths(str_extract_all(dt$concat,"(\\w)(\\w)(\\w)"))) list_lists <- str_extract_all(dt$concat,"(\\w)(\\w)(\\w)") li <- lapply(list_lists, `length<-`,maxlen) dtnew <- data.frame(x =cbind(dt$ID),y=do.call("rbind",li)) dtnew1 <- melt(dtnew,id.vars="x") dtnew1 <- dtnew1[!is.na(dtnew1$value),] dtnew1[order(dtnew1$x),c(1,3)] > dtnew1[order(dtnew1$x),c(1,3)] x value 2 999908 abc 18 9973025 abc 36 9973025 abc 54 9973025 abc 72 9973025 abc 90 9973025 abc 17 9973804 abc 35 9973804 abc 16 9974511 abc 15 9975335 abc 33 9975335 abc 51 9975335 abc 69 9975335 abc 14 9976882 abc 13 9978215 abc 12 9979400 abc 30 9979400 abc 48 9979400 abc 11 9979613 abc 10 9979915 abc 9 9980091 abc 8 9980112 abc 7 9980956 abc 6 9984487 abc 24 9984487 abc 5 9989664 abc 4 9992905 abc 3 9993595 abc 1 9999812 abc 19 9999812 abc >