-
Notifications
You must be signed in to change notification settings - Fork 1
/
Data Sim and Tidyverse.R
106 lines (72 loc) · 2.87 KB
/
Data Sim and Tidyverse.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
library(tidyverse)
# Make Dataset
set.seed(843)
data <- tibble(ID = 1:100,
Educ = sample(10:25, 100, replace = T),
Age = sample(18:75, 100, replace = T),
Sex = sample(c("male", "female"), 100, replace = T),
Race = sample(c("white", "white", "white", "white",
"black", "black", "latinx", "latinx", "other"),
100, replace = T),
Married = sample(0:1, 100, replace = T),
Income_1 = round(3000*Educ + rnorm(100, sd = 500)),
INCOME_2 = round(4000 *Educ + rnorm(100, sd = 500)),
Depression_1 = sample(4:25, 100, replace = T),
Depression_2 = round(1.2*Depression_1 + rnorm(100, sd = 1)),
Anx_1 = round(Depression_1 + rnorm(100, sd = 2)),
Anx_2 = round(Anx_1 + rnorm(100, sd = 2)))
data <- data %>%
dplyr::group_by(Sex) %>%
dplyr::mutate(smoking = dplyr::case_when(Sex == "female" ~ sample(c("Smoker", "Non-smoker", 99, NA),
size = n(),
replace = TRUE,
prob = c(0.15, 0.65, 0.10, 0.10)),
Sex == "male" ~ sample(c("Smoker", "Non-smoker", 99, NA),
size = n(),
replace = TRUE,
prob = c(0.25, 0.65, 0.10, 0.10))))
dplyr::ungroup()
write.csv(data, "C:\\Users\\dculi\\Box\\R Course\\data_csv.csv")
library(haven)
write_sav(data, "C:\\Users\\dculi\\Box\\R Course\\data_sav.sav")
#install.packages("tidyverse")
library(tidyverse)
#Piping
summary(data)
data %>%
summary()
#Piping with Select function
data[, c("id", "educ", "age")]
data[, c(1:3)]
data %>%
select(id, educ, age)
data %>%
select(1:3)
data %>%
select(starts_with("income"))
data %>%
select(ends_with(c("_1", "_2")))
data %>%
select(contains("anx"))
#Piping with Mutate
class(data$sex)
data <- data %>%
mutate(sexF = factor(sex))
class(data$sexF)
# Group and Summarize
data$race <- as.factor(data$race)
data %>%
group_by(race) %>%
summarize(N = n(),
m = mean(depression_1),
sd = sd(depression_1))
# Filter
data[data$anx_1 > 20,]
data %>%
filter(anx_1 > 20)
data %>%
filter(sexF == "female")
data %>%
filter(sexF == "female" & race == "black")
data %>%
filter(sexF == "female" | educ == 20)