March 7, 2017 In-Class Project

Mon, Mar 13, 2017

Jackknife for Gamma Parameters

Recall our friend the method of moments estimator:

gamma.est <- function(the_data) {
  m <- mean(the_data)
  v <- var(the_data)
  a <- m^2/v
  s <- v/m
  return(c(a=a,s=s))
}

Jackknife for Gamma Parameters Function

Write a function called gamma.jackknife that takes argument a_vector and returns jackknife standard error estimates on the gamma parameters.

gamma.jackknife <- function(a_vector) {
  n1=length(a_vector)
  estimate_a=c(n1)
  estimate_s=c(n1)
  for(i in 1:n1)
  {
    estimate_a[i]=gamma.est(a_vector[-i])[1]
    estimate_s[i]=gamma.est(a_vector[-i])[2]
  }
  se_a=sqrt((var(estimate_a))*((n1-1)^2)/n1)
  se_s=sqrt((var(estimate_s))*((n1-1)^2)/n1)
  jackknife.stderrs=c(se_a,se_s)
  return(jackknife.stderrs)
}

Jackknife for Gamma Parameters Example

input <- rgamma(1000, shape=0.1, scale=10)
gamma.est(input)
gamma.jackknife(input)

Jackknife for linear regression coefficients

Write a function called jackknife.lm that takes arguments df, formula and p and returns jackknife standard error estimates on the coefficients of a linear regression model.

jackknife.lm <- function(df,formula,p) {
 n=nrow(df)
 jackknife.ests=matrix(0,nrow=p,ncol=n)
 for (i in 1:n){
   new.coefs=lm(as.formula(formula),data=df[-i,])$coefficients
   jackknife.ests[,i]=new.coefs
 }
 var=apply(jackknife.ests,1,var)
 jackknife.var=(n-1)^2/n*var
 jackknife.stderr=sqrt(jackknife.var)
  return(jackknife.stderr)
}

Jackknife for linear regression coefficients Example

output <- 1.2 + 0.6*input +  rnorm(1000, 0, 2.1)
data <- data.frame(output,input)
my.lm <- lm(output~input, data=data)
coefficients(my.lm)
# "Official" standard errors
sqrt(diag(vcov(my.lm)))
jackknife.lm(df=data,formula="output~input",p=2)

Refactoring the Jackknife

Omitting one point or row is a common sub-task
The general pattern:

figure out the size of the data
for each case
   omit that case
   repeat some estimation and get a vector of numbers
take variances across cases
scale up variances
take the square roots

Refactor by extracting the common “omit one” operation
Refactor by defining a general “jackknife” operation

The Common Operation

Problem: Omit one particular data point from a larger structure
Difficulty: Do we need a comma in the index or not?
Solution: Works for vectors, lists, 1D and 2D arrays, matrices, data frames:

Goal:

Make the function select the correct dimensions
- length for a 1d object
- number of rows for 2d

Write a function omit.case that omits a point given the data and returns the data minus that point. Make sure it can handle higher dimensions.

      omit.case <- function(the_data,omitted_point) {
    # This should take the data and omit one point at a time and return the new data
        dim=data(the_data)
        if (is.null(dim)||length(dim)==1){
          return(the_data[-omitted_point])
        }else{
          return(the_data[-omitted_point,])
        }
        }

Write a function omit_and_est that takes the data with an omitted point and returns whatever function your estimator does.

    omit_and_est <- function(omit) {
          # This function should take the output of omit.case and use it as input for the estimator
       estimator(omit.case(the_data,omit))

    }

jackknife <- function(estimator,the_data) {
  
  if(is.null(dim(the_data))){
    n=length(the_data)
  }else{
    n=nrow(the_data)
  }
  omit_and_est <- function(omit) {
              # This function should take the output of omit.case and use it as input for the estimator
           estimator(omit.case(the_data,omit))
          
        }
  
  
  jackknife.ests <- matrix(sapply(1:n, omit_and_est), ncol=n)
  var.of.reestimates <- apply(jackknife.ests,1,var)
  jackknife.var <- ((n-1)^2/n)* var.of.reestimates
  jackknife.stderr <- sqrt(jackknife.var)
  return(jackknife.stderr)
}

It works

mean.jackknife <- function(a_vector) {
  a<-rep(0,length(a_vector))
  for (i in 1:length(a_vector)){
    a[i]<-mean(a_vector[-i])
  }
  jackknife.variance<-((length(a_vector) - 1)/length(a_vector)) * sum((a - mean(a))^2)
  jackknife.stderr<-sqrt(jackknife.variance)
  return(jackknife.stderr)
}

some_normals=rnorm(100,mean=7,sd=5)
jackknife(estimator=mean,the_data=some_normals)
all.equal(jackknife(estimator=mean,the_data=some_normals),
          mean.jackknife(some_normals))

all.equal(jackknife(estimator=gamma.est,the_data=data$input),
          gamma.jackknife(data$input))

all.equal(jackknife(estimator=gamma.est,the_data=data$input),
          gamma.jackknife(data$input), check.names=FALSE)

est.coefs <- function(the_data) {
  return(lm(output~input,data=the_data)$coefficients)
}
est.coefs(data)
all.equal(est.coefs(data), coefficients(my.lm))

jackknife(estimator=est.coefs,the_data=data)
all.equal(jackknife(estimator=est.coefs,the_data=data),
          jackknife.lm(df=data,formula="output~input",p=2))

Further Refactoring of jackknife()

The code for jackknife() is still a bit clunky: - Ugly if-else for finding n - Bit at the end for scaling variances down to standard errors

write a function that calculates the n needed for the above code:

  data_size <- function(the_data) {
if (is.null(dim(the_data))){
  n=length(the_data)
}else{
  n=nrow(the_data)
}

  }

Write a funcrion that calculate the variance of all the estimates and returns the standard error

scale_and_sqrt_vars <- function(jackknife.ests,n) {
  var=apply(jackknife.ests,1,var)
  jackknife.var=(n-1)^2/n*var
  jackknife.stderr=sqrt(jackknife.var)
  return(jackknife.stderr)
}

Now invoke those functions

jackknife <- function(estimator,the_data) {
  n <- data_size(the_data)
  omit_and_est <- function(omit) {
    estimator(omit.case(the_data,omit))
  }
  jackknife.ests <- matrix(sapply(1:n, omit_and_est), ncol=n)
  return(scale_and_sqrt_vars(jackknife.ests,n))
}