> # generate some fakedata
> # both sets come from the same distribution
> y.grp1 <- rnorm(20, 0, 1)
> y.grp2 <- rnorm(1, 0, 1)
> y <- c(y.grp1, y.grp2)
>
> y.grp1
[1] 0.649044532 -2.256491187 1.097277554 -1.254684115 0.156735591
[6] 0.272097697 1.142179114 0.095524117 1.612587487 -0.862135249
[11] 0.044291385 0.004164832 -0.569598917 -0.314329979 -1.853036794
[16] -0.517776731 0.331797174 -1.175629448 0.149509153 1.507455853
> y.grp2
[1] 0.6222212
>
> grp <- rep(c("group 1", "group 2"), c(20, 1))
>
> # Two sample t-test - this is what would be appropriate here.
> t.test(y ~ grp, var.equal = TRUE)
Two Sample t-test
data: y by grp
t = -0.6614, df = 19, p-value = 0.5163
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2.953803 1.535259
sample estimates:
mean in group group 1 mean in group group 2
-0.0870509 0.6222212
> # One sample t-test where you treat the value in the second group
> # as the mean you're testing against
> # This is what I'm saying you shouldn't do.
> t.test(y.grp1, mu = y.grp2)
One Sample t-test
data: y.grp1
t = -3.0309, df = 19, p-value = 0.006874
alternative hypothesis: true mean is not equal to 0.6222212
95 percent confidence interval:
-0.5768476 0.4027459
sample estimates:
mean of x
-0.0870509