# First lets simulate some data
females <- sample(9:15, size = 25, replace = T)
males <- sample(7:13, size = 25, replace = T)
# now we can arrange this data into a dataframe
dat <- data.frame(c(females, males),
rep(c("female","male"), each=25))
colnames(dat) <- c("size","sex")
# here we calculate our chosen emprical statistics
# in this case we chose the difference in the means of
# our groups
emp.obs <- mean(dat$size[dat$sex=="female"]) -
mean(dat$size[dat$sex=="male"])
# this will hold the null distribution that we are about
# simulate
null.dist <- c()
# each time through this loop we will calculate our
# statistic under the assumptions of the null distribution
# (mainly that the sexes have the same size and are random
# draws from the same statistical population)
for(i in 1:10000){
# this randomizes the sex assignment
dat$sex <- sample(dat$sex)
# this calculates one measure of our stat these
# will all be recorded to form the null distribution
null.dist[i] <- mean(dat$size[dat$sex=="female"]) -
mean(dat$size[dat$sex=="male"])
}
# here we calculate a pvalue
sum(emp.obs <= null.dist)/length(null.dist)
## [1] 4e-04
# here we visualize our result
plot(density(null.dist))
abline(v=emp.obs, col="red", lwd=2)
For the correlation test we will use the iris
dataset
that is included with R. We will do both a parametric and non parametric
version.
# first we load the data
data(iris)
# the typical parametric version of the correlation test
cor.test(x=iris$Sepal.Length, y=iris$Petal.Length)
##
## Pearson's product-moment correlation
##
## data: iris$Sepal.Length and iris$Petal.Length
## t = 21.646, df = 148, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8270363 0.9055080
## sample estimates:
## cor
## 0.8717538
# the non-parametric version
cor.test(x=iris$Sepal.Length, y=iris$Petal.Length,
method = "spearman")
## Warning in cor.test.default(x = iris$Sepal.Length, y = iris$Petal.Length, :
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: iris$Sepal.Length and iris$Petal.Length
## S = 66429, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8818981