Jackknife for Gamma Parameters
Recall our friend the method of moments estimator:
gamma.est <- function(the_data) {
m <- mean(the_data)
v <- var(the_data)
a <- m^2/v
s <- v/m
return(c(a=a,s=s))
}
Jackknife for Gamma Parameters Function
Write a function called gamma.jackknife
that takes argument a_vector
and returns jackknife standard error estimates on the gamma parameters.
gamma.jackknife <- function(a_vector) {
n1=length(a_vector)
estimate_a=c(n1)
estimate_s=c(n1)
for(i in 1:n1)
{
estimate_a[i]=gamma.est(a_vector[-i])[1]
estimate_s[i]=gamma.est(a_vector[-i])[2]
}
se_a=sqrt((var(estimate_a))*((n1-1)^2)/n1)
se_s=sqrt((var(estimate_s))*((n1-1)^2)/n1)
jackknife.stderrs=c(se_a,se_s)
return(jackknife.stderrs)
}
Jackknife for Gamma Parameters Example
input <- rgamma(1000, shape=0.1, scale=10)
gamma.est(input)
gamma.jackknife(input)
Jackknife for linear regression coefficients
Write a function called jackknife.lm
that takes arguments df
, formula
and p
and returns jackknife standard error estimates on the coefficients of a linear regression model.
jackknife.lm <- function(df,formula,p) {
n=nrow(df)
jackknife.ests=matrix(0,nrow=p,ncol=n)
for (i in 1:n){
new.coefs=lm(as.formula(formula),data=df[-i,])$coefficients
jackknife.ests[,i]=new.coefs
}
var=apply(jackknife.ests,1,var)
jackknife.var=(n-1)^2/n*var
jackknife.stderr=sqrt(jackknife.var)
return(jackknife.stderr)
}
Jackknife for linear regression coefficients Example
output <- 1.2 + 0.6*input + rnorm(1000, 0, 2.1)
data <- data.frame(output,input)
my.lm <- lm(output~input, data=data)
coefficients(my.lm)
# "Official" standard errors
sqrt(diag(vcov(my.lm)))
jackknife.lm(df=data,formula="output~input",p=2)
Refactoring the Jackknife
Omitting one point or row is a common sub-task
The general pattern:
figure out the size of the data
for each case
omit that case
repeat some estimation and get a vector of numbers
take variances across cases
scale up variances
take the square roots
Refactor by extracting the common “omit one” operation
Refactor by defining a general “jackknife” operation
The Common Operation
Problem: Omit one particular data point from a larger structure
Difficulty: Do we need a comma in the index or not?
Solution: Works for vectors, lists, 1D and 2D arrays, matrices, data frames:
Goal:
- Make the function select the correct dimensions
- length for a 1d object
- number of rows for 2d
Write a function
omit.case
that omits a point given the data and returns the data minus that point. Make sure it can handle higher dimensions.omit.case <- function(the_data,omitted_point) { # This should take the data and omit one point at a time and return the new data dim=data(the_data) if (is.null(dim)||length(dim)==1){ return(the_data[-omitted_point]) }else{ return(the_data[-omitted_point,]) } }
Write a function
omit_and_est
that takes the data with an omitted point and returns whatever function your estimator does.omit_and_est <- function(omit) { # This function should take the output of omit.case and use it as input for the estimator estimator(omit.case(the_data,omit)) }
jackknife <- function(estimator,the_data) {
if(is.null(dim(the_data))){
n=length(the_data)
}else{
n=nrow(the_data)
}
omit_and_est <- function(omit) {
# This function should take the output of omit.case and use it as input for the estimator
estimator(omit.case(the_data,omit))
}
jackknife.ests <- matrix(sapply(1:n, omit_and_est), ncol=n)
var.of.reestimates <- apply(jackknife.ests,1,var)
jackknife.var <- ((n-1)^2/n)* var.of.reestimates
jackknife.stderr <- sqrt(jackknife.var)
return(jackknife.stderr)
}
It works
mean.jackknife <- function(a_vector) {
a<-rep(0,length(a_vector))
for (i in 1:length(a_vector)){
a[i]<-mean(a_vector[-i])
}
jackknife.variance<-((length(a_vector) - 1)/length(a_vector)) * sum((a - mean(a))^2)
jackknife.stderr<-sqrt(jackknife.variance)
return(jackknife.stderr)
}
some_normals=rnorm(100,mean=7,sd=5)
jackknife(estimator=mean,the_data=some_normals)
all.equal(jackknife(estimator=mean,the_data=some_normals),
mean.jackknife(some_normals))
all.equal(jackknife(estimator=gamma.est,the_data=data$input),
gamma.jackknife(data$input))
all.equal(jackknife(estimator=gamma.est,the_data=data$input),
gamma.jackknife(data$input), check.names=FALSE)
est.coefs <- function(the_data) {
return(lm(output~input,data=the_data)$coefficients)
}
est.coefs(data)
all.equal(est.coefs(data), coefficients(my.lm))
jackknife(estimator=est.coefs,the_data=data)
all.equal(jackknife(estimator=est.coefs,the_data=data),
jackknife.lm(df=data,formula="output~input",p=2))
Further Refactoring of jackknife()
The code for jackknife()
is still a bit clunky: - Ugly if-else
for finding n
- Bit at the end for scaling variances down to standard errors
write a function that calculates the
n
needed for the above code:data_size <- function(the_data) { if (is.null(dim(the_data))){ n=length(the_data) }else{ n=nrow(the_data) } }
Write a funcrion that calculate the variance of all the estimates and returns the standard error
scale_and_sqrt_vars <- function(jackknife.ests,n) {
var=apply(jackknife.ests,1,var)
jackknife.var=(n-1)^2/n*var
jackknife.stderr=sqrt(jackknife.var)
return(jackknife.stderr)
}
Now invoke those functions
jackknife <- function(estimator,the_data) {
n <- data_size(the_data)
omit_and_est <- function(omit) {
estimator(omit.case(the_data,omit))
}
jackknife.ests <- matrix(sapply(1:n, omit_and_est), ncol=n)
return(scale_and_sqrt_vars(jackknife.ests,n))
}