-
Notifications
You must be signed in to change notification settings - Fork 0
/
pairs.R
66 lines (46 loc) · 1.83 KB
/
pairs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# If you work with free recall tasks and items belonging to categories,
# it may be useful to know how often an item from a given category is recalled
# immediately before or after an item from another category. This tool counts
#the occurence of all possible category-pairs for all your participants.
#
# It expects the database to be in a very specific format; namely:
# - Comma separated values (.csv file)
# - Wide format (one line per participant; one recall per column)
# - Previously coded recalls (numbers in place of categories)
# - First column is your participant key
# 1. Get a sequence from the data
# name your file "input.csv" or change here accordingly
# place file in project's workspace of change path accordingly
data <- read.csv("input.csv")
# create sequence
sequence <- data.frame(seq = apply(
data[-1], 1, function(x) paste(
x[!is.na(x)],
collapse = "")
))
# 2. Get all possible pairs
# get unique categories
categories <- unique(unlist(data[-1]))
categories <- categories[!is.na(categories)]
# combinations
pairs <- apply(
expand.grid(categories, categories), 1, paste, collapse = "")
# 3. Function: count occurrences of a pattern 'sub' in a sequence 'string'
count_sub <- function(sub,string) {
gg <- gregexpr(paste0("(?=",sub,")"), string, perl = TRUE)[[1]]
if (length(gg)==1 && gg==-1) 0 else length(gg)
}
# 4. Search for every combination in 'categories' in every row of 'sequence'
# nested 'apply'
results <- t(sapply(sequence$seq, function(seq) {
sapply(pairs, function(pair) {
count_sub(pair, seq)
})
}))
# make it prettier
results_df <- as.data.frame(results)
results_df <- results_df[ , order(colnames(results_df))]
# 5. Add to original data and export
data_final <- cbind(data, results_df)
# exporting to the project's workspace
write.csv(data_final, "output.csv", row.names = FALSE)