Load libraries and data.
library(dplyr)
library(ggplot2)
library(broom)
library(tidyr)
library(mosaic)
library(nlme)
library(lme4)
library(knitr)
load("movies2.RData")
pop_summary <- movies2 %>% group_by(genre) %>%
summarize(
mean = mean(rating),
sigma = sd(rating)) # call it sigma instead of sd
pop_summary # answer to a.
pop_diff <- pop_summary$mean[1] - pop_summary$mean[2]
pop_sigma <- sqrt(pop_summary$sigma[1]^2+ pop_summary$sigma[2]^2)
c(pop_diff, pop_sigma) # answer to b.
[1] -0.4177215 1.8998867
answer to c.: Normal distribution with mean pop_diff=
-0.4177215 and standard deviation pop_sigma/N
where N
is a sample size.
set.seed(2017)
# answer to d.
movies2 %>% group_by(genre) %>%
sample_n(30) %>% summarize(mean = mean(rating), sd = sd(rating), n= n())
# answer to e.
my_movie_samples <- function(sample_size) {
movies2 %>% # test
group_by(genre) %>%
sample_n(sample_size) %>% # don't get confused with sample()
summarize( mean = mean(rating),
sd = sd(rating),
n = n())
}
# answer to f.
my_boot1 <- mosaic::do(100) * {
my_movie_samples(30)
}
reshape_movie_samples <- function(bt_samples) {
bt_samples %>% data.frame() %>% # don't forget to use data.frame()
dplyr::select(-.row) %>%
reshape(idvar= ".index", timevar="genre",
direction="wide") %>%
mutate(bt_diff = (mean.Action - mean.Romance))
}
density_sample_movies <- function(rehsaped_samples, N, B) {
rehsaped_samples %>%
ggplot(aes(x = bt_diff)) +
geom_density(fill = "steelblue", adjust = 2, alpha = .75) + xlim(c(-2, 2) + pop_diff) +
geom_vline(xintercept = mean(rehsaped_samples$bt_diff), color = "steelblue", size = 1) +
geom_vline(xintercept = pop_diff, color = "yellow", size = 1) + # CTL prediction mean
stat_function(fun = dnorm, colour = "yellow", size =1, # CTL prediction distribution
args = list(mean = pop_diff,
sd = pop_sigma/sqrt(rehsaped_samples$n.Action[1]))) +
labs(title = paste0("Bootstrop: ", B, ", Num observations:", N ))
}
stats_sample_movies <- function(reshaped_samples) {
reshaped_samples %>%
summarize(
diff_mean = mean(bt_diff),
diff_sd = sd(bt_diff),
p_val = sum(bt_diff>0)/length(bt_diff)*2,
theory_mean = pop_diff,
theory_sd = pop_sigma/sqrt(length(bt_diff)),
abs_error_mean = abs(diff_mean - theory_mean),
abs_error_sd = abs(diff_sd - theory_sd)
)
}
# answer to g.
reshaped_my_boot1 <- reshape_movie_samples(my_boot1)
density_sample_movies(reshaped_my_boot1, 30, 100)
stats_sample_movies(reshaped_my_boot1)
loc_N <- c(20, 30, 40, 50, 75, 100)
loc_B <- c(100, 500, 1000, 2000, 4000)
list_density <- list()
list_stats <- list()
# This will take some time
for (idx_N in 1:length(loc_N)) {
list_density[[idx_N]] <- list()
list_stats[[idx_N]] <- list()
for (idx_B in 1:length(loc_B)) {
print(paste0('N =', loc_N[idx_N],', B = ', loc_B[idx_B]))
my_boot1 <- mosaic::do(loc_B[idx_B]) * {
my_movie_samples(loc_N[idx_N])
}
reshaped_my_boot1 <- reshape_movie_samples(my_boot1)
list_density[[idx_N]][[idx_B]] <- density_sample_movies(reshaped_my_boot1,
loc_N[idx_N], loc_B[idx_B])
list_stats[[idx_N]][[idx_B]] <- stats_sample_movies(reshaped_my_boot1)
}
}
[1] "N =20, B = 100"
[1] "N =20, B = 500"
[1] "N =20, B = 1000"
[1] "N =20, B = 2000"
[1] "N =20, B = 4000"
[1] "N =30, B = 100"
[1] "N =30, B = 500"
[1] "N =30, B = 1000"
[1] "N =30, B = 2000"
[1] "N =30, B = 4000"
[1] "N =40, B = 100"
[1] "N =40, B = 500"
[1] "N =40, B = 1000"
[1] "N =40, B = 2000"
[1] "N =40, B = 4000"
[1] "N =50, B = 100"
[1] "N =50, B = 500"
[1] "N =50, B = 1000"
[1] "N =50, B = 2000"
[1] "N =50, B = 4000"
[1] "N =75, B = 100"
[1] "N =75, B = 500"
[1] "N =75, B = 1000"
[1] "N =75, B = 2000"
[1] "N =75, B = 4000"
[1] "N =100, B = 100"
[1] "N =100, B = 500"
[1] "N =100, B = 1000"
[1] "N =100, B = 2000"
[1] "N =100, B = 4000"
# Use Plot Pane in RStudio <- -> to observe the influence of N
for (idx_N in 1:length(loc_N)) print(list_density[[idx_N]][[which(loc_B==max(loc_B))]])
# dispersion decreases with N
for (idx_N in 1:length(loc_N)) print(list_density[[idx_N]][[which(loc_B==min(loc_B))]])
extract_list_stats_N <- function(seq, idx_B, stat) {
lapply(c(1:length(seq)),
function (idx_N) list_stats[[idx_N]][[idx_B]][[stat]]) %>% unlist()
}
extract_list_stats_B <- function(seq, idx_N, stat) {
lapply(c(1:length(seq)),
function (idx_B) list_stats[[idx_N]][[idx_B]][[stat]]) %>% unlist()
}
max_B <- which(loc_B==max(loc_B)) # index of max B
max_N <- which(loc_N==max(loc_N)) # index of max N
results_N <- data.frame(
N = loc_N,
p_val = extract_list_stats_N(loc_N, max_B, "p_val"),
abs_error_mean = extract_list_stats_N(loc_N, max_B, "abs_error_mean"),
abs_error_sd = extract_list_stats_N(loc_N, max_B, "abs_error_sd")
)
results_B <- data.frame(
B = loc_B,
p_val = extract_list_stats_B(loc_B, max_N, "p_val"),
abs_error_mean = extract_list_stats_B(loc_B, max_N, "abs_error_mean"),
abs_error_sd = extract_list_stats_B(loc_B, max_N, "abs_error_sd")
)
# answer to e.
results_N %>% # proportional to 1/sqrt(N)
ggplot(aes(x = N, y = p_val)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_N %>% # already unbiased estimator (each mean gets more accurate with N but the mean of means do not)
ggplot(aes(x = N, y = abs_error_mean)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_N %>% # proportional to 1/sqrt(N)
ggplot(aes(x = N, y = abs_error_sd)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
# answer to f.
# slowly converges to some lower or upper bounds for given N, not following 1/sqrt(N) function
results_B %>%
ggplot(aes(x = B, y = p_val)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_B %>%
ggplot(aes(x = B, y = abs_error_mean)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_B %>%
ggplot(aes(x = B, y = abs_error_sd)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
ChickWeight2 <- ChickWeight # make a copy that we may modify
head(ChickWeight2)
table(ChickWeight2$Chick)
18 16 15 13 9 20 10 8 17 19 4 6 11 3 1 12 2 5 14 7 24 30 22 23 27
2 7 8 12 12 12 12 11 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
28 26 25 29 21 33 37 36 31 39 38 32 40 34 35 44 45 43 41 47 49 46 50 42 48
12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 10 12 12 12 12 12 12 12 12 12
table(ChickWeight2$Diet)
1 2 3 4
220 120 120 118
table(ChickWeight2$Chick, ChickWeight2$Diet)
1 2 3 4
18 2 0 0 0
16 7 0 0 0
15 8 0 0 0
13 12 0 0 0
9 12 0 0 0
20 12 0 0 0
10 12 0 0 0
8 11 0 0 0
17 12 0 0 0
19 12 0 0 0
4 12 0 0 0
6 12 0 0 0
11 12 0 0 0
3 12 0 0 0
1 12 0 0 0
12 12 0 0 0
2 12 0 0 0
5 12 0 0 0
14 12 0 0 0
7 12 0 0 0
24 0 12 0 0
30 0 12 0 0
22 0 12 0 0
23 0 12 0 0
27 0 12 0 0
28 0 12 0 0
26 0 12 0 0
25 0 12 0 0
29 0 12 0 0
21 0 12 0 0
33 0 0 12 0
37 0 0 12 0
36 0 0 12 0
31 0 0 12 0
39 0 0 12 0
38 0 0 12 0
32 0 0 12 0
40 0 0 12 0
34 0 0 12 0
35 0 0 12 0
44 0 0 0 10
45 0 0 0 12
43 0 0 0 12
41 0 0 0 12
47 0 0 0 12
49 0 0 0 12
46 0 0 0 12
50 0 0 0 12
42 0 0 0 12
48 0 0 0 12
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_point(size = .25, alpha=.5) + facet_wrap(~Chick)
# answer to a
lm( weight ~ Diet + Time + I(Time^2) , data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet + Time + I(Time^2), data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-146.372 -16.983 -0.684 15.283 132.295
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.56559 4.18810 5.149 3.60e-07 ***
Diet2 16.08443 4.02910 3.992 7.40e-05 ***
Diet3 36.41776 4.02910 9.039 < 2e-16 ***
Diet4 30.28713 4.05041 7.478 2.86e-13 ***
Time 5.42189 0.83037 6.530 1.46e-10 ***
I(Time^2) 0.15615 0.03758 4.155 3.74e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 35.49 on 572 degrees of freedom
Multiple R-squared: 0.7528, Adjusted R-squared: 0.7506
F-statistic: 348.3 on 5 and 572 DF, p-value: < 2.2e-16
# answer to b
lm( weight ~ Diet + Time + I(Time^2) + Chick, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet + Time + I(Time^2) + Chick, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-89.323 -14.177 -0.719 14.127 82.761
Coefficients: (3 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 187.95907 56.48103 3.328 0.000937 ***
Diet2 -111.80410 33.76427 -3.311 0.000992 ***
Diet3 158.02561 151.28543 1.045 0.296710
Diet4 -795.65043 364.56383 -2.182 0.029516 *
Time 5.49744 0.64936 8.466 2.54e-16 ***
I(Time^2) 0.15092 0.02937 5.138 3.91e-07 ***
Chick.L 1536.03638 616.23832 2.493 0.012988 *
Chick.Q 1132.76640 633.68151 1.788 0.074417 .
Chick.C 701.07385 414.17189 1.693 0.091102 .
Chick^4 48.78857 77.52011 0.629 0.529382
Chick^5 -652.87056 344.64786 -1.894 0.058732 .
Chick^6 -617.94097 321.03777 -1.925 0.054790 .
Chick^7 -42.42155 21.07590 -2.013 0.044645 *
Chick^8 469.78475 223.49896 2.102 0.036032 *
Chick^9 383.05395 202.41855 1.892 0.058988 .
Chick^10 18.20186 50.35810 0.361 0.717909
Chick^11 -183.80024 93.53871 -1.965 0.049944 *
Chick^12 -189.58610 113.30433 -1.673 0.094873 .
Chick^13 -119.70805 75.00316 -1.596 0.111080
Chick^14 -22.65442 16.42693 -1.379 0.168449
Chick^15 93.64822 57.69960 1.623 0.105182
Chick^16 172.81090 100.87837 1.713 0.087290 .
Chick^17 151.42028 82.48806 1.836 0.066972 .
Chick^18 7.38371 21.39654 0.345 0.730165
Chick^19 -210.88545 106.69882 -1.976 0.048625 *
Chick^20 -208.42605 108.68477 -1.918 0.055689 .
Chick^21 -22.62376 17.08739 -1.324 0.186077
Chick^22 151.86556 77.69090 1.955 0.051143 .
Chick^23 162.98077 86.46808 1.885 0.059999 .
Chick^24 65.91731 43.69643 1.509 0.132020
Chick^25 -38.14827 14.09694 -2.706 0.007028 **
Chick^26 -38.66113 38.65533 -1.000 0.317698
Chick^27 -87.80072 58.36564 -1.504 0.133099
Chick^28 -103.28557 57.51292 -1.796 0.073089 .
Chick^29 -26.05045 18.29960 -1.424 0.155169
Chick^30 81.79350 48.56642 1.684 0.092744 .
Chick^31 164.74431 89.13544 1.848 0.065128 .
Chick^32 11.45557 9.15985 1.251 0.211626
Chick^33 62.49349 30.62838 2.040 0.041811 *
Chick^34 90.81597 45.86262 1.980 0.048205 *
Chick^35 18.49639 11.65825 1.587 0.113216
Chick^36 44.91319 29.41438 1.527 0.127384
Chick^37 -28.65947 19.38399 -1.479 0.139869
Chick^38 101.27934 58.13177 1.742 0.082051 .
Chick^39 -198.63652 105.99375 -1.874 0.061479 .
Chick^40 -100.96158 67.80076 -1.489 0.137062
Chick^41 97.05775 56.70464 1.712 0.087553 .
Chick^42 -54.84619 21.76126 -2.520 0.012018 *
Chick^43 -98.99305 38.04020 -2.602 0.009520 **
Chick^44 62.56038 28.47294 2.197 0.028442 *
Chick^45 -106.60378 51.64156 -2.064 0.039479 *
Chick^46 -14.60983 15.89201 -0.919 0.358350
Chick^47 NA NA NA NA
Chick^48 NA NA NA NA
Chick^49 NA NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 27.62 on 526 degrees of freedom
Multiple R-squared: 0.8623, Adjusted R-squared: 0.8489
F-statistic: 64.58 on 51 and 526 DF, p-value: < 2.2e-16
# answer to c
lmer( weight ~ Diet + Time + I(Time^2) + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet + Time + I(Time^2) + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5563
Scaled residuals:
Min 1Q Median 3Q Max
-3.4591 -0.5216 -0.0029 0.4874 3.1903
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 523.8 22.89
Residual 762.5 27.61
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 21.46154 6.08152 3.529
Diet2 16.26072 9.42627 1.725
Diet3 36.59405 9.42627 3.882
Diet4 30.21469 9.43254 3.203
Time 5.47872 0.64791 8.456
I(Time^2) 0.15195 0.02932 5.183
Correlation of Fixed Effects:
(Intr) Diet2 Diet3 Diet4 Time
Diet2 -0.521
Diet3 -0.521 0.339
Diet4 -0.520 0.339 0.339
Time -0.388 -0.005 -0.005 -0.007
I(Time^2) 0.324 0.001 0.001 0.004 -0.964
# answer to d
lm( weight ~ Diet*Time - Diet, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time - Diet, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-135.727 -15.696 -0.928 13.141 129.109
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.8588 2.6596 10.475 < 2e-16 ***
Time 7.0492 0.2573 27.392 < 2e-16 ***
Diet2:Time 1.6112 0.3044 5.294 1.71e-07 ***
Diet3:Time 3.7383 0.3044 12.282 < 2e-16 ***
Diet4:Time 2.8614 0.3086 9.273 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 34.08 on 573 degrees of freedom
Multiple R-squared: 0.7717, Adjusted R-squared: 0.7701
F-statistic: 484.1 on 4 and 573 DF, p-value: < 2.2e-16
# answer to e
lm( weight ~ Diet*Time - Diet + Chick, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time - Diet + Chick, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-79.06 -14.78 -1.71 14.71 87.54
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 28.2445 1.9880 14.207 < 2e-16 ***
Time 6.6906 0.2598 25.752 < 2e-16 ***
Chick.L 25.6467 13.8456 1.852 0.064541 .
Chick.Q -13.5654 12.4737 -1.088 0.277306
Chick.C 51.9446 11.7779 4.410 1.25e-05 ***
Chick^4 9.2225 11.3481 0.813 0.416764
Chick^5 -33.3384 10.2390 -3.256 0.001203 **
Chick^6 41.0952 11.1258 3.694 0.000244 ***
Chick^7 -0.2107 9.0451 -0.023 0.981424
Chick^8 5.8962 10.1114 0.583 0.560059
Chick^9 13.4345 8.9387 1.503 0.133453
Chick^10 -16.1181 8.8059 -1.830 0.067763 .
Chick^11 -45.0382 8.3171 -5.415 9.34e-08 ***
Chick^12 9.6177 8.1713 1.177 0.239727
Chick^13 55.0050 7.9173 6.947 1.11e-11 ***
Chick^14 8.1415 7.7757 1.047 0.295562
Chick^15 -45.9058 7.7621 -5.914 6.03e-09 ***
Chick^16 -15.9530 7.7400 -2.061 0.039784 *
Chick^17 28.2005 7.7793 3.625 0.000317 ***
Chick^18 17.2879 7.6943 2.247 0.025065 *
Chick^19 -24.3940 7.6489 -3.189 0.001512 **
Chick^20 13.0230 7.7369 1.683 0.092924 .
Chick^21 12.9952 7.4552 1.743 0.081901 .
Chick^22 -10.4667 7.6565 -1.367 0.172202
Chick^23 3.4996 7.4382 0.470 0.638199
Chick^24 3.9263 7.4620 0.526 0.598990
Chick^25 -35.2223 7.3733 -4.777 2.31e-06 ***
Chick^26 20.8035 7.4297 2.800 0.005298 **
Chick^27 41.5754 7.3843 5.630 2.94e-08 ***
Chick^28 8.9789 7.3614 1.220 0.223121
Chick^29 -22.6530 7.3736 -3.072 0.002236 **
Chick^30 -7.0641 7.3921 -0.956 0.339699
Chick^31 11.6522 7.4664 1.561 0.119215
Chick^32 16.4033 7.3314 2.237 0.025680 *
Chick^33 -3.1074 7.3950 -0.420 0.674512
Chick^34 -6.9050 7.4016 -0.933 0.351297
Chick^35 0.5745 7.3249 0.078 0.937516
Chick^36 -16.0760 7.3734 -2.180 0.029681 *
Chick^37 -4.2042 7.3449 -0.572 0.567300
Chick^38 -12.9041 7.3685 -1.751 0.080489 .
Chick^39 -11.6780 7.4907 -1.559 0.119601
Chick^40 42.2806 7.4377 5.685 2.18e-08 ***
Chick^41 -13.5917 7.4133 -1.833 0.067307 .
Chick^42 -32.8739 7.3684 -4.461 9.97e-06 ***
Chick^43 -38.4670 7.4214 -5.183 3.12e-07 ***
Chick^44 12.3700 7.4307 1.665 0.096565 .
Chick^45 -20.2123 7.4505 -2.713 0.006889 **
Chick^46 23.0112 7.3715 3.122 0.001898 **
Chick^47 -11.3521 7.4007 -1.534 0.125651
Chick^48 -16.5302 7.3528 -2.248 0.024981 *
Chick^49 -31.1805 7.3562 -4.239 2.66e-05 ***
Diet2:Time 1.9185 0.4294 4.468 9.67e-06 ***
Diet3:Time 4.7322 0.4294 11.022 < 2e-16 ***
Diet4:Time 2.9653 0.4350 6.817 2.57e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 25.37 on 524 degrees of freedom
Multiple R-squared: 0.8843, Adjusted R-squared: 0.8726
F-statistic: 75.54 on 53 and 524 DF, p-value: < 2.2e-16
# answer to f
lmer( weight ~ Diet*Time - Diet + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet * Time - Diet + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5488
Scaled residuals:
Min 1Q Median 3Q Max
-3.3226 -0.5908 -0.0699 0.5435 3.5918
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 532.9 23.09
Residual 643.0 25.36
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 28.1852 3.8206 7.377
Time 6.7729 0.2435 27.818
Diet2:Time 1.8442 0.3857 4.782
Diet3:Time 4.4756 0.3857 11.605
Diet4:Time 2.9418 0.3906 7.532
Correlation of Fixed Effects:
(Intr) Time Dt2:Tm Dt3:Tm
Time -0.287
Diet2:Time 0.008 -0.581
Diet3:Time 0.008 -0.581 0.366
Diet4:Time 0.004 -0.573 0.361 0.361
# answer to g.
# default smooth fit
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_jitter(size =.75, alpha=.5) +
geom_smooth()
`geom_smooth()` using method = 'loess'
# linear fit
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_jitter(size =.75, alpha=.5) +
geom_smooth(method = "lm", formula = y ~ x)
# quadratic fit
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_jitter(size =.75, alpha=.5) +
geom_smooth(method = "lm", formula = y ~ x + I(x^2))
# answer to h.
model_h1 <- lm( weight ~ Diet*Time + Diet*I(Time^2) - Diet, data = ChickWeight2)
summary(model_h1)
Call:
lm(formula = weight ~ Diet * Time + Diet * I(Time^2) - Diet,
data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-143.223 -10.075 -0.585 8.274 123.360
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 38.05700 3.55089 10.718 < 2e-16 ***
Time 4.52849 0.97255 4.656 4.01e-06 ***
I(Time^2) 0.10994 0.04881 2.253 0.0247 *
Diet2:Time 1.21418 1.22829 0.989 0.3233
Diet3:Time 0.66645 1.22829 0.543 0.5876
Diet4:Time 3.05116 1.23514 2.470 0.0138 *
Diet2:I(Time^2) 0.02287 0.07087 0.323 0.7470
Diet3:I(Time^2) 0.18123 0.07087 2.557 0.0108 *
Diet4:I(Time^2) -0.01139 0.07167 -0.159 0.8737
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 33.44 on 569 degrees of freedom
Multiple R-squared: 0.7817, Adjusted R-squared: 0.7786
F-statistic: 254.6 on 8 and 569 DF, p-value: < 2.2e-16
# fixed effect model
model_h2 <- lm( weight ~ Diet*Time + Diet*I(Time^2) - Diet + Chick, data = ChickWeight2)
summary(model_h2)
Call:
lm(formula = weight ~ Diet * Time + Diet * I(Time^2) - Diet +
Chick, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-95.502 -13.076 0.203 13.994 81.077
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.98665 2.60577 14.578 < 2e-16 ***
Time 4.50263 0.93250 4.829 1.81e-06 ***
I(Time^2) 0.10337 0.04243 2.436 0.01517 *
Chick.L 37.36872 17.82733 2.096 0.03655 *
Chick.Q -30.14575 15.13162 -1.992 0.04687 *
Chick.C 31.64251 13.38520 2.364 0.01845 *
Chick^4 4.75804 12.70793 0.374 0.70825
Chick^5 -17.87169 10.88152 -1.642 0.10111
Chick^6 51.99355 12.63730 4.114 4.51e-05 ***
Chick^7 1.92716 8.78650 0.219 0.82648
Chick^8 -4.66427 11.15130 -0.418 0.67592
Chick^9 7.42417 9.07255 0.818 0.41355
Chick^10 -17.23033 8.94144 -1.927 0.05452 .
Chick^11 -38.67025 8.19412 -4.719 3.05e-06 ***
Chick^12 10.91829 8.18744 1.334 0.18294
Chick^13 55.44602 7.78830 7.119 3.63e-12 ***
Chick^14 8.36440 7.53577 1.110 0.26753
Chick^15 -44.59191 7.58249 -5.881 7.31e-09 ***
Chick^16 -18.73044 7.58426 -2.470 0.01384 *
Chick^17 23.43568 7.65636 3.061 0.00232 **
Chick^18 17.04334 7.54211 2.260 0.02425 *
Chick^19 -20.10368 7.55775 -2.660 0.00806 **
Chick^20 17.28227 7.78841 2.219 0.02692 *
Chick^21 13.06748 7.21141 1.812 0.07055 .
Chick^22 -13.46959 7.66728 -1.757 0.07955 .
Chick^23 0.51112 7.27698 0.070 0.94403
Chick^24 3.60122 7.35660 0.490 0.62468
Chick^25 -34.28977 7.14904 -4.796 2.11e-06 ***
Chick^26 21.45460 7.24988 2.959 0.00322 **
Chick^27 41.74158 7.18159 5.812 1.08e-08 ***
Chick^28 10.96163 7.15924 1.531 0.12635
Chick^29 -20.72739 7.16074 -2.895 0.00396 **
Chick^30 -8.31765 7.18100 -1.158 0.24728
Chick^31 8.05395 7.34475 1.097 0.27334
Chick^32 16.20988 7.09435 2.285 0.02272 *
Chick^33 -4.34289 7.22026 -0.601 0.54778
Chick^34 -8.54630 7.24031 -1.180 0.23839
Chick^35 0.38663 7.08472 0.055 0.95650
Chick^36 -17.27562 7.18588 -2.404 0.01656 *
Chick^37 -3.80682 7.12368 -0.534 0.59330
Chick^38 -14.79768 7.17399 -2.063 0.03964 *
Chick^39 -7.92055 7.39232 -1.071 0.28446
Chick^40 42.62529 7.25733 5.873 7.62e-09 ***
Chick^41 -14.87047 7.21112 -2.062 0.03969 *
Chick^42 -31.66806 7.14938 -4.429 1.15e-05 ***
Chick^43 -36.08786 7.28236 -4.956 9.77e-07 ***
Chick^44 10.82116 7.29652 1.483 0.13867
Chick^45 -17.17002 7.34838 -2.337 0.01984 *
Chick^46 23.35888 7.18171 3.253 0.00122 **
Chick^47 -10.46481 7.20117 -1.453 0.14677
Chick^48 -16.90634 7.14240 -2.367 0.01830 *
Chick^49 -30.23053 7.14858 -4.229 2.78e-05 ***
Diet2:Time 1.30757 1.57426 0.831 0.40658
Diet3:Time 0.55489 1.57426 0.352 0.72462
Diet4:Time 3.38747 1.58261 2.140 0.03278 *
Diet2:I(Time^2) 0.02692 0.07106 0.379 0.70495
Diet3:I(Time^2) 0.19293 0.07106 2.715 0.00684 **
Diet4:I(Time^2) -0.02033 0.07186 -0.283 0.77736
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 24.54 on 520 degrees of freedom
Multiple R-squared: 0.8926, Adjusted R-squared: 0.8808
F-statistic: 75.8 on 57 and 520 DF, p-value: < 2.2e-16
# random effect model
model_h3 <- lmer( weight ~ Diet*Time + Diet*I(Time^2) - Diet + (1 | Chick),
data = ChickWeight2)
summary(model_h3)
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet * Time + Diet * I(Time^2) - Diet + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5463.7
Scaled residuals:
Min 1Q Median 3Q Max
-4.0129 -0.5185 -0.0110 0.5288 3.4598
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 522.4 22.86
Residual 600.6 24.51
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 37.99451 4.14966 9.156
Time 4.53465 0.85256 5.319
I(Time^2) 0.10316 0.03995 2.583
Diet2:Time 1.25349 1.34921 0.929
Diet3:Time 0.58107 1.34921 0.431
Diet4:Time 3.25969 1.35625 2.403
Diet2:I(Time^2) 0.02795 0.06420 0.435
Diet3:I(Time^2) 0.19097 0.06420 2.975
Diet4:I(Time^2) -0.01613 0.06493 -0.248
Correlation of Fixed Effects:
(Intr) Time I(T^2) Dt2:Tm Dt3:Tm Dt4:Tm D2:I(T D3:I(T
Time -0.349
I(Time^2) 0.281 -0.961
Diet2:Time 0.005 -0.557 0.546
Diet3:Time 0.005 -0.557 0.546 0.351
Diet4:Time 0.003 -0.553 0.543 0.349 0.349
Dt2:I(Tm^2) -0.005 0.539 -0.575 -0.961 -0.339 -0.337
Dt3:I(Tm^2) -0.005 0.539 -0.575 -0.339 -0.961 -0.337 0.357
Dt4:I(Tm^2) -0.003 0.532 -0.567 -0.335 -0.335 -0.960 0.353 0.353
# raw data plot
base_plot <- ChickWeight2 %>%
ggplot(aes( x = Time,
y = weight,
color = Diet)) +
geom_jitter(size =.75, alpha=.5)
base_plot
# add a model fit
base_plot +
geom_smooth(aes( x = Time,
y = predict(model_h1),
color = Diet),
method = "lm", formula = y ~ x + I(x^2))
# answer to i.
# generate variables for 25th, 50th, and 75th percentiles
# of weight for each Diet and Time
ChickWeight2 <- ChickWeight2 %>%
group_by(Diet, Time) %>%
mutate(y25 = quantile(weight, .25, na.rm=TRUE),
y75 = quantile(weight, .75, na.rm=TRUE),
y50 = quantile(weight, .50, na.rm=TRUE)) %>%
ungroup()
base_plot2 <- ChickWeight2 %>%
ggplot(aes( x = Time,
y = y50,
ymin = y25,
ymax = y75,
color = Diet)) +
geom_point(position = position_dodge(width = 1), size=.5) +
geom_linerange(position = position_dodge(width = 1)) +
labs(y = "weight range 25-75th percentile")
# weight range 25-75th percentile
base_plot2
# add a model fit from model_h1
base_plot2 +
geom_smooth(aes( x = Time,
y = predict(model_h1),
color = Diet),
method = "lm", formula = y ~ x + I(x^2))
# add a model fit from model_h2
base_plot2 +
geom_smooth(aes( x = Time,
y = predict(model_h2),
color = Diet),
method = "lm", formula = y ~ x + I(x^2), se=FALSE)
# add a model fit from model_h3
base_plot2 +
geom_smooth(
aes( x = Time,
y = predict(model_h3), #, re.form=(~1 | Chick)),
color = Diet),
method = "lm", formula = y ~ x + I(x^2), se=FALSE)
The effects of Diet in three models are virtually indistinguishable.