II. R code
Texte intégral
1This R file contains the functions that are needed to perform the panel threshold estimation in Hansen (1999) for unbalanced panels under the assumption of one threshold.
2Only parts of the paper are implemented – the threshold is calculated, bootstrap inference for the threshold is performed and slope coefficients for both regimes are calculated.
3For robust standard errors etc. any standard econometrics package can be used. In R, the PLM package is available.
4The user might want to tweak the number of gridpoints to allow for a finer or rougher search for the threshold, depending on computing power and time constraints this can be altered in lines 348 and 396.
Usage
51) run the entire code (this defines the functions)
62) load dataset into R and save it in a matrix. The format should be:
column (1) individual identifier from 1 to n
column (2) dependent variable
rest of the columns: independent variables
73) identify which column of the data matrix contains the threshold variable
identify the number of different individuals and the maximum number of time periods => these go into the functions as arguments
84) calculate the minimized SSR with:
minimized_SSR <- min_SSR(your_datamatrix, your_thresholdcolumn, howmany_individuals)
95) estimate with:
estimation_results <- PTR_estimates(your_datamatrix, your_thresholdcolumn, minimized_SSR, howmany_individuals, howmany_timeperiods)
106) bootstrap with:
boot_F_results <- PTR_F_boot(estimation_results[[3]], estimation_results[[4]], estimation_results[[5]], estimation_results[[6]], estimation_results[[7]], estimation_results[[8]], howmany_individuals, howmany_timeperiods, howmany_bootstrap_replications)
117) display the results with:
threshold_estimate <- estimation_results[[1]]
coefficient_estimates <- estimation_results[[2]]
threshold_estimate
coefficient_estimates
boot_F_results[[3]]
12This first function calculates the combined SSR above and below the threshold for any threshold it takes as inputs threshold, BVI_below, BVI_above, threshold_column, lastcol and number_individuals all of which are produced by the next function, min_SSR
SSR_calc <- function (threshold, BVI_below, BVI_above, threshold_column, lastcol, n){ for (i in 1:NROW(BVI_below)){
if (BVI_below[i, threshold_column] < threshold) BVI_below[i, NCOL(BVI_below)] <- 0
}
for (i in 1:NROW(BVI_above)){
if (BVI_above[i, threshold_column] > threshold) BVI_above[i, NCOL(BVI_above)] <- 1
}
BVI_below <- BVI_below[complete.cases(BVI_below),] BVI_above <- BVI_above[complete.cases(BVI_above),]
names_below <- as.matrix(c(unique(BVI_below[,1]))) names_above <- as.matrix(c(unique(BVI_above[,1])))
freq_below <- as.matrix(c(table(BVI_below[,1]))) freq_above <-as.matrix(c(table(BVI_above[,1])))
below_from <- matrix(data = NA, nrow = NROW(names_below), ncol = 1, byrow = TRUE) below_from[1,] <- 1
for (i in 2:(NROW(names_below))){
below_from[i,] <- below_from[i-1,]+freq_below[i-1,]
}
above_from <- matrix(data = NA, nrow = NROW(names_above), ncol = 1, byrow = TRUE) above_from[1,] <- 1
for (i in 2:(NROW(names_above))){
above_from[i,] <- above_from[i-1,]+freq_above[i-1,]
}
below_until <- matrix(data = NA, nrow = NROW(names_below), ncol = 1, byrow = TRUE) for (i in 1:(NROW(names_below))-1){
below_until[i,] <- below_from[i+1,]-1
}
below_until[NROW(below_until),] <- below_until[(NROW(below_until)-1),] + freq_below[NROW(below_until),]
above_until <- matrix(data = NA, nrow = NROW(names_above), ncol = 1, byrow = TRUE) for (i in 1:(NROW(names_above))-1){
above_until[i,] <- above_from[i+1,]-1
}
above_until[NROW(above_until),] <- above_until[(NROW(above_until)-1),] + freq_above[NROW(above_until),]
below_identifier_matrix <- cbind(names_below, freq_below, below_from, below_until) below_identifier_zero_individ <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){ below_identifier_zero_individ[i,1] <- i
}
for (i in 1:n){
for (j in 1:NROW(below_identifier_matrix)){
if (below_identifier_zero_individ[i,1] == below_identifier_matrix[j,1]) below_identifier_zero_individ[i,2:4] <- below_identifier_matrix[j,2:4]
}
}
below_identifier_matrix <- below_identifier_zero_individ below_identifier_aux1 <-below_identifier_matrix[,2] below_identifier_aux1[is.na(below_identifier_aux1)] <- 0 below_identifier_matrix <- cbind(below_identifier_matrix[,1], below_identifier_aux1,below_identifier_matrix[,3:4]) colnames(below_identifier_matrix) <- c("individ_name","individ_freq",
"individ_from", "individ_until")
above_identifier_matrix <- cbind(names_above, freq_above, above_from, above_until) above_identifier_zero_individ <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){ above_identifier_zero_individ[i,1] <- i
}
for (i in 1:n){
for (j in 1:NROW(above_identifier_matrix)){
if (above_identifier_zero_individ[i,1] == above_identifier_matrix[j,1]) above_identifier_zero_individ[i,2:4] <- above_identifier_matrix[j,2:4]
}
}
above_identifier_matrix <- above_identifier_zero_individ above_identifier_aux1 <- above_identifier_matrix[,2] above_identifier_aux1[is.na(above_identifier_aux1)] <- 0
above_identifier_matrix <- cbind(above_identifier_matrix[,1],above_identifier_aux1, above_identifier_matrix[,3:4])
colnames(above_identifier_matrix) <- c("individ_name","individ_freq",
"individ_from", "individ_until")
below_individuals <- list() above_individuals <- list()
for (i in 1:n){
if (below_identifier_matrix[i,2] == 0) next else if (below_identifier_matrix[i,2] == 1) next else
below_individuals[[i]] <- BVI_below[below_identifier_matrix[i,3]:
below_identifier_matrix[i,4],]
}
for (i in 1:n) {
if (above_identifier_matrix[i,2] == 0) next else if (above_identifier_matrix[i,2] == 1) next else
above_individuals[[i]] <- BVI_above[above_identifier_matrix[i,3]:
above_identifier_matrix[i,4],]
}
demeaned_below <- list() demeaned_above <- list()
for (i in 1:n){
if (below_identifier_matrix[i,2] == 0) next else if (below_identifier_matrix[i,2] == 1) next else
demeaned_below[[i]] <- below_individuals[[i]] for (j in 2:(lastcol-1)){
demeaned_below[[i]][,j] <-
below_individuals[[i]][,j] - mean(below_individuals[[i]][,j])
}
}
for (i in 1:n){
if (above_identifier_matrix[i,2] == 0) next else if (above_identifier_matrix[i,2] == 1) next else
demeaned_above[[i]] <- above_individuals[[i]] for (j in 2:(lastcol-1)){
demeaned_above[[i]][,j] <-
above_individuals[[i]][,j] - mean(above_individuals[[i]][,j])
}
}
enough_below <- 0
for (i in 1:NROW(below_identifier_matrix)){ if (below_identifier_matrix[i,2] > 0)
enough_below <- enough_below + 1
}
for (i in 1:1){
if (enough_below > 2) below_identifier_matrix_aux2 <- below_identifier_matrix[ complete.cases(below_identifier_matrix),][,1:2]
else
below_identifier_matrix_aux2 <- t(as.matrix (below_identifier_matrix[ complete.cases(below_identifier_matrix),]))[,1:2]
}
for (i in 1:NROW(below_identifier_matrix_aux2)){
if (below_identifier_matrix_aux2[i,2] == 1) below_identifier_matrix_aux2[i,] <- NA
}
enough_below <- 0
for (i in 1:NROW(below_identifier_matrix)){ if (below_identifier_matrix[i,2] > 1)
enough_below <- enough_below + 1
}
for (i in 1:1){
if (enough_below > 2)
below_identifier_matrix_aux2 <- below_identifier_matrix_aux2[ complete.cases(below_identifier_matrix_aux2),]
else
below_identifier_matrix_aux2 <- t(as.matrix (below_identifier_matrix_aux2[ complete.cases(below_identifier_matrix_aux2),]))
}
enough_above <- 0
for (i in 1:NROW(above_identifier_matrix)){ if (above_identifier_matrix[i,2] > 0)
enough_above <- enough_above + 1
}
for (i in 1:1){
if (enough_above > 2) above_identifier_matrix_aux2 <- above_identifier_matrix[ complete.cases(above_identifier_matrix),][,1:2]
else
above_identifier_matrix_aux2 <- t(as.matrix (above_identifier_matrix[ complete.cases(above_identifier_matrix),]))[,1:2]
}
for (i in 1:NROW(above_identifier_matrix_aux2)){
if (above_identifier_matrix_aux2[i,2] == 1) above_identifier_matrix_aux2[i,] <- NA
}
enough_above <- 0
for (i in 1:NROW(above_identifier_matrix)){ if (above_identifier_matrix[i,2] > 1)
enough_above <- enough_above + 1
}
for (i in 1:1){
if (enough_above > 2)
above_identifier_matrix_aux2 <- above_identifier_matrix_aux2[ complete.cases(above_identifier_matrix_aux2),]
else
above_identifier_matrix_aux2 <- t(as.matrix (above_identifier_matrix_aux2[ complete.cases(above_identifier_matrix_aux2),]))
}
below_identifier_matrix_aux2_from <- matrix(data = NA, nrow = NROW(below_identifier_matrix_aux2), ncol = 1) below_identifier_matrix_aux2_from[1,] <- 1
for (l in 1:1){
if (NROW(below_identifier_matrix_aux2_from) > 1) for (i in 2:NROW(below_identifier_matrix_aux2_from)){
below_identifier_matrix_aux2_from[i,]
<- below_identifier_matrix_aux2_from[i-1,] + below_identifier_matrix_aux2[i-1,2]
}
}
below_identifier_matrix_aux2_until <- matrix(data = NA, nrow = NROW(below_identifier_matrix_aux2), ncol = 1)
for (i in 1:NROW(below_identifier_matrix_aux2_until)){ below_identifier_matrix_aux2_until[i,]
<- below_identifier_matrix_aux2_from[i,] + below_identifier_matrix_aux2[i,2]-1
}
below_identifier_matrix_aux3 <- cbind(below_identifier_matrix_aux2, below_identifier_matrix_aux2_from,
below_identifier_matrix_aux2_until)
above_identifier_matrix_aux2_from <- matrix(data = NA, nrow = NROW(above_identifier_matrix_aux2), ncol = 1) above_identifier_matrix_aux2_from[1,] <- 1
for (l in 1:1){
if (NROW(above_identifier_matrix_aux2_from) > 1) for (i in 2:NROW(above_identifier_matrix_aux2_from)){
above_identifier_matrix_aux2_from[i,]
<- above_identifier_matrix_aux2_from[i-1,] + above_identifier_matrix_aux2[i-1,2]
}
}
above_identifier_matrix_aux2_until <- matrix(data = NA, nrow = NROW(above_identifier_matrix_aux2), ncol = 1)
for (i in 1:NROW(above_identifier_matrix_aux2_until)){ above_identifier_matrix_aux2_until[i,]
<- above_identifier_matrix_aux2_from[i,] + above_identifier_matrix_aux2[i,2]-1
}
above_identifier_matrix_aux3 <- cbind(above_identifier_matrix_aux2, above_identifier_matrix_aux2_from,
above_identifier_matrix_aux2_until)
below_identifier_matrix_twoplus <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE) above_identifier_matrix_twoplus <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){
for (j in 1:NROW(below_identifier_matrix_aux3)){ if (below_identifier_matrix[i,1]
== below_identifier_matrix_aux3[j,1]) below_identifier_matrix_twoplus[i,]
<- below_identifier_matrix_aux3[j,]
}
}
for (i in 1:n){
for (j in 1:NROW(above_identifier_matrix_aux3)){ if (above_identifier_matrix[i,1]
== above_identifier_matrix_aux3[j,1]) above_identifier_matrix_twoplus[i,]
<- above_identifier_matrix_aux3[j,]
}
}
BIGMATRIX_below <- matrix(data = NA, nrow = below_identifier_matrix_aux3 [NROW(below_identifier_matrix_aux3),4],
ncol = lastcol-1, byrow = TRUE) BIGMATRIX_above <- matrix(data = NA,
nrow = above_identifier_matrix_aux3[NROW(above_identifier_matrix_aux3),4], ncol = lastcol-1, byrow = TRUE)
for (i in 1:n){
if (below_identifier_matrix[i,2] == 0) next else if (below_identifier_matrix[i,2] == 1) next else
BIGMATRIX_below[below_identifier_matrix_twoplus[i,3]:
below_identifier_matrix_twoplus[i,4],] <- demeaned_below[[i]][,1:(lastcol-1)]
}
for (i in 1:n){
if (above_identifier_matrix[i,2] == 0) next else if (above_identifier_matrix[i,2] == 1) next else
BIGMATRIX_above[above_identifier_matrix_twoplus[i,3]: above_identifier_matrix_twoplus[i,4],] <-
demeaned_above[[i]][,1:(lastcol-1)]
}
y_tilde_below <- as.matrix(BIGMATRIX_below[,2])
X_tilde_below <- cbind(BIGMATRIX_below[,3:NCOL(BIGMATRIX_below)])
y_tilde_above <- as.matrix(BIGMATRIX_above[,2])
X_tilde_above <- cbind(BIGMATRIX_above[,3:NCOL(BIGMATRIX_above)])
SSR_below <- t(y_tilde_below)%*%(diag(NROW(y_tilde_below)) - X_tilde_below%*%solve(crossprod(X_tilde_below))
%*%t(X_tilde_below))%*%y_tilde_below
SSR_above <- t(y_tilde_above)%*%(diag(NROW(y_tilde_above)) - X_tilde_above%*%solve(crossprod(X_tilde_above))
%*%t(X_tilde_above))%*%y_tilde_above SSR <- SSR_below + SSR_above
SSR
}
13This second function applies SSR_calc k times, where k is the number of points in the search grid over the quantiles of the threshold variable it takes as inputs data_matrix, threshold_column (of the data_matrix) and the number of individuals it produces a single 1x2 vector, the first column of which contains the minimized SSR, the second contains the gridpoint.
min_SSR <- function (data_matrix, threshold_column, n){ threshold_column <- threshold_column
number_individuals <- n
BIG_valid_index <- data_matrix[!apply(is.na(data_matrix), 1, any),] sortindicator <- matrix(data = NA,
nrow = NROW(BIG_valid_index), ncol = 1, byrow = TRUE) BVI_sortindicator <- cbind(BIG_valid_index, sortindicator) BVI_below <- BVI_sortindicator
BVI_above <- BVI_sortindicator lastcol <- NCOL(BVI_sortindicator)
thresh_grid <- as.matrix(quantile(BIG_valid_index[,threshold_column], seq(0.05,0.95, by = 0.0025)))
which_gridpoint <- as.matrix(c(1:NROW(thresh_grid))) thresh_grid <- cbind(thresh_grid, which_gridpoint) gridpoints <- NROW(thresh_grid)
SSR <- list()
for (k in 1:gridpoints){
cat("grid point", k, "of", gridpoints, "\n")
if (.Platform$OS.type == "windows") flush.console() threshold <- thresh_grid[k,1]
SSR[[k]] <- matrix(data = NA, nrow = 1, ncol = 2, byrow = TRUE) SSR[[k]][,1] <- SSR_calc(threshold, BVI_below, BVI_above, threshold_column, lastcol, number_individuals)
SSR[[k]][,2] <- k
}
SSR_matrix <- matrix(data = NA, nrow = gridpoints, ncol = 2, byrow = TRUE) for (i in 1:gridpoints){
SSR_matrix[i,1] <- SSR[[i]][,1]
SSR_matrix[i,2] <- SSR[[i]][,2]
}
for(i in 1:NROW(SSR_matrix)){
if (SSR_matrix[i,1] == min(SSR_matrix[,1])) min_SSR <- t(as.matrix(SSR_matrix[i,]))
}
min_SSR
}
14This third function uses the information from the minimization to calculate the slope parameters by splitting the sample at the calculated threshold inputs are data_matrix, threshold_column, minimized_SSR from the previous function min_SSR, the output of which should be stored in an object that is then inserted as an argument into the function and again the number of individual
PTR_estimates <- function(data_matrix, threshold_column, minimized_SSR, number_individuals, time_periods){
BIG_valid_index <- data_matrix[!apply(is.na(data_matrix), 1, any),]
sortindicator <- matrix(data = NA, nrow = NROW(BIG_valid_index), ncol = 1, byrow = TRUE)
BVI_sortindicator <- cbind(BIG_valid_index, sortindicator) BVI_below <- BVI_sortindicator
BVI_above <- BVI_sortindicator
lastcol <- NCOL(BVI_sortindicator) n <- number_individuals
thresh_grid <- as.matrix(quantile(BIG_valid_index[,threshold_column], seq(0.05,0.95, by = 0.0025)))
which_gridpoint <- as.matrix(c(1:NROW(thresh_grid))) thresh_grid <- cbind(thresh_grid, which_gridpoint) gridpoints <- NROW(thresh_grid)
threshold <- thresh_grid[minimized_SSR[,2],1] for (i in 1:NROW(BVI_below)){
if (BVI_below[i, threshold_column] < threshold)
BVI_below[i, NCOL(BVI_below)] <- 0
}
for (i in 1:NROW(BVI_above)){
if (BVI_above[i, threshold_column] > threshold) BVI_above[i, NCOL(BVI_above)] <- 1
}
BVI_below <- BVI_below[complete.cases(BVI_below),] BVI_above <- BVI_above[complete.cases(BVI_above),]
names_below <- as.matrix(c(unique(BVI_below[,1]))) names_above <- as.matrix(c(unique(BVI_above[,1])))
freq_below <- as.matrix(c(table(BVI_below[,1]))) freq_above <-as.matrix(c(table(BVI_above[,1])))
below_from <- matrix(data = NA, nrow = NROW(names_below), ncol = 1, byrow = TRUE)
below_from[1,] <- 1
for (i in 2:(NROW(names_below))){
below_from[i,] <- below_from[i-1,]+freq_below[i-1,]
}
above_from <- matrix(data = NA, nrow = NROW(names_above), ncol = 1, byrow = TRUE)
above_from[1,] <- 1
for (i in 2:(NROW(names_above))){
above_from[i,] <- above_from[i-1,]+freq_above[i-1,]
}
below_until <- matrix(data = NA, nrow = NROW(names_below), ncol = 1, byrow = TRUE)
for (i in 1:(NROW(names_below))-1){ below_until[i,] <- below_from[i+1,]-1
}
below_until[NROW(below_until),] <- below_until[(NROW(below_until)-1),]
+ freq_below[NROW(below_until),]
above_until <- matrix(data = NA, nrow = NROW(names_above), ncol = 1, byrow = TRUE)
for (i in 1:(NROW(names_above))-1){ above_until[i,] <- above_from[i+1,]-1
}
above_until[NROW(above_until),] <- above_until[(NROW(above_until)-1),]
+ freq_above[NROW(above_until),]
below_identifier_matrix <- cbind(names_below, freq_below, below_from, below_until)
below_identifier_zero_individ <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){ below_identifier_zero_individ[i,1] <- i
}
for (i in 1:n){
for (j in 1:NROW(below_identifier_matrix)){
if (below_identifier_zero_individ[i,1] == below_identifier_matrix[j,1]) below_identifier_zero_individ[i,2:4] <- below_identifier_matrix[j,2:4]
}
}
below_identifier_matrix <- below_identifier_zero_individ below_identifier_aux1 <- below_identifier_matrix[,2] below_identifier_aux1[is.na(below_identifier_aux1)] <- 0 below_identifier_matrix <- cbind(below_identifier_matrix[,1], below_identifier_aux1,below_identifier_matrix[,3:4]) colnames(below_identifier_matrix) <- c("individ_name","individ_freq", "individ_from", "individ_until")
above_identifier_matrix <- cbind(names_above, freq_above, above_from, above_until) above_identifier_zero_individ <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){ above_identifier_zero_individ[i,1] <- i
}
for (i in 1:n){
for (j in 1:NROW(above_identifier_matrix)){
if (above_identifier_zero_individ[i,1] == above_identifier_matrix[j,1]) above_identifier_zero_individ[i,2:4] <- above_identifier_matrix[j,2:4]
}
}
above_identifier_matrix <- above_identifier_zero_individ above_identifier_aux1 <-above_identifier_matrix[,2] above_identifier_aux1[is.na(above_identifier_aux1)] <- 0 above_identifier_matrix <- cbind(above_identifier_matrix[,1], above_identifier_aux1,above_identifier_matrix[,3:4]) colnames(above_identifier_matrix) <- c("individ_name","individ_freq", "individ_from", "individ_until")
below_individuals <- list() above_individuals <- list()
for (i in 1:n){
if (below_identifier_matrix[i,2] == 0) next else if (below_identifier_matrix[i,2] == 1) next else
below_individuals[[i]] <- BVI_below[below_identifier_matrix[i,3]:
below_identifier_matrix[i,4],]
}
for (i in 1:n) {
if (above_identifier_matrix[i,2] == 0) next else if (above_identifier_matrix[i,2] == 1) next else
above_individuals[[i]] <- BVI_above[above_identifier_matrix[i,3]:
above_identifier_matrix[i,4],]
}
demeaned_below <- list() demeaned_above <- list()
for (i in 1:n){
if (below_identifier_matrix[i,2] == 0) next else if (below_identifier_matrix[i,2] == 1) next else
demeaned_below[[i]] <- below_individuals[[i]] for (j in 2:(lastcol-1)){
demeaned_below[[i]][,j] <-
below_individuals[[i]][,j] - mean(below_individuals[[i]][,j])
}
}
for (i in 1:n){
if (above_identifier_matrix[i,2] == 0) next else if (above_identifier_matrix[i,2] == 1) next else
demeaned_above[[i]] <- above_individuals[[i]]
for (j in 2:(lastcol-1)){
demeaned_above[[i]][,j] <-
above_individuals[[i]][,j] - mean(above_individuals[[i]][,j])
}
}
enough_below <- 0
for (i in 1:NROW(below_identifier_matrix)){ if (below_identifier_matrix[i,2] > 0)
enough_below <- enough_below + 1
}
for (i in 1:1){
if (enough_below > 2)
below_identifier_matrix_aux2 <- below_identifier_matrix[complete.cases (below_identifier_matrix),][,1:2]
else
below_identifier_matrix_aux2 <- t(as.matrix (below_identifier_matrix[ complete.cases(below_identifier_matrix),]))[,1:2]
}
for (i in 1:NROW(below_identifier_matrix_aux2)){
if (below_identifier_matrix_aux2[i,2] == 1) below_identifier_matrix_aux2[i,] <- NA
}
enough_below <- 0
for (i in 1:NROW(below_identifier_matrix)){ if (below_identifier_matrix[i,2] > 1)
enough_below <- enough_below + 1
}
for (i in 1:1){
if (enough_below > 2)
below_identifier_matrix_aux2 <- below_identifier_matrix_aux2[complete.cases (below_identifier_matrix_aux2),]
else
below_identifier_matrix_aux2 <- t(as.matrix (below_identifier_matrix_aux2[ complete.cases(below_identifier_matrix_aux2),]))
}
enough_above <- 0
for (i in 1:NROW(above_identifier_matrix)){ if (above_identifier_matrix[i,2] > 0)
enough_above <- enough_above + 1
}
for (i in 1:1){
if (enough_above > 2)
above_identifier_matrix_aux2 <- above_identifier_matrix[complete.cases (above_identifier_matrix),][,1:2]
else
above_identifier_matrix_aux2 <- t(as.matrix (above_identifier_matrix[ complete.cases(above_identifier_matrix),]))[,1:2]
}
for (i in 1:NROW(above_identifier_matrix_aux2)){
if (above_identifier_matrix_aux2[i,2] == 1) above_identifier_matrix_aux2[i,] <- NA
}
enough_above <- 0
for (i in 1:NROW(above_identifier_matrix)){ if (above_identifier_matrix[i,2] > 1)
enough_above <- enough_above + 1
}
for (i in 1:1){
if (enough_above > 2)
above_identifier_matrix_aux2 <- above_identifier_matrix_aux2[ complete.cases(above_identifier_matrix_aux2),]
else
above_identifier_matrix_aux2 <- t(as.matrix (above_identifier_matrix_aux2[ complete.cases(above_identifier_matrix_aux2),]))
}
below_identifier_matrix_aux2_from <- matrix(data = NA, nrow = NROW(below_identifier_matrix_aux2), ncol = 1) below_identifier_matrix_aux2_from[1,] <- 1
for (l in 1:1){
if (NROW(below_identifier_matrix_aux2_from) > 1) for (i in 2:NROW(below_identifier_matrix_aux2_from)){
below_identifier_matrix_aux2_from[i,]
<- below_identifier_matrix_aux2_from[i-1,] + below_identifier_matrix_aux2[i-1,2]
}
}
below_identifier_matrix_aux2_until <- matrix(data = NA, nrow = NROW(below_identifier_matrix_aux2), ncol = 1)
for (i in 1:NROW(below_identifier_matrix_aux2_until)){ below_identifier_matrix_aux2_until[i,]
<- below_identifier_matrix_aux2_from[i,] + below_identifier_matrix_aux2[i,2]-1
}
below_identifier_matrix_aux3 <- cbind(below_identifier_matrix_aux2, below_identifier_matrix_aux2_from,
below_identifier_matrix_aux2_until)
above_identifier_matrix_aux2_from <- matrix(data = NA, nrow = NROW(above_identifier_matrix_aux2), ncol = 1) above_identifier_matrix_aux2_from[1,] <- 1
for (l in 1:1){
if (NROW(above_identifier_matrix_aux2_from) > 1) for (i in 2:NROW(above_identifier_matrix_aux2_from)){
above_identifier_matrix_aux2_from[i,]
<- above_identifier_matrix_aux2_from[i-1,] + above_identifier_matrix_aux2[i-1,2]
}
}
above_identifier_matrix_aux2_until <- matrix(data = NA, nrow = NROW(above_identifier_matrix_aux2), ncol = 1)
for (i in 1:NROW(above_identifier_matrix_aux2_until)){ above_identifier_matrix_aux2_until[i,]
<- above_identifier_matrix_aux2_from[i,] + above_identifier_matrix_aux2[i,2]-1
}
above_identifier_matrix_aux3 <- cbind(above_identifier_matrix_aux2, above_identifier_matrix_aux2_from,
above_identifier_matrix_aux2_until)
below_identifier_matrix_twoplus <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE) above_identifier_matrix_twoplus <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){
for (j in 1:NROW(below_identifier_matrix_aux3)){ if (below_identifier_matrix[i,1]
== below_identifier_matrix_aux3[j,1]) below_identifier_matrix_twoplus[i,]
<- below_identifier_matrix_aux3[j,]
}
}
for (i in 1:n){
for (j in 1:NROW(above_identifier_matrix_aux3)){ if (above_identifier_matrix[i,1]
== above_identifier_matrix_aux3[j,1]) above_identifier_matrix_twoplus[i,]
<- above_identifier_matrix_aux3[j,]
}
}
BIGMATRIX_below <- matrix(data = NA,
nrow = below_identifier_matrix_aux3[NROW(below_identifier_matrix_aux3),4], ncol = lastcol-1, byrow = TRUE)
BIGMATRIX_above <- matrix(data = NA,
nrow = above_identifier_matrix_aux3[NROW(above_identifier_matrix_aux3),4], ncol = lastcol-1, byrow = TRUE)
for (i in 1:n){
if (below_identifier_matrix[i,2] == 0) next else if (below_identifier_matrix[i,2] == 1) next else
BIGMATRIX_below[below_identifier_matrix_twoplus[i,3]: below_identifier_matrix_twoplus[i,4],] <- demeaned_below[[i]][,1:(lastcol-1)]
}
for (i in 1:n){
if (above_identifier_matrix[i,2] == 0) next else if (above_identifier_matrix[i,2] == 1) next else
BIGMATRIX_above[above_identifier_matrix_twoplus[i,3]: above_identifier_matrix_twoplus[i,4],] <- demeaned_above[[i]][,1:(lastcol-1)]
}
y_tilde_below <- as.matrix(BIGMATRIX_below[,2])
X_tilde_below <- cbind(BIGMATRIX_below[,3:NCOL(BIGMATRIX_below)])
y_tilde_above <- as.matrix(BIGMATRIX_above[,2])
X_tilde_above <- cbind(BIGMATRIX_above[,3:NCOL(BIGMATRIX_above)])
beta_hat_below <- solve(crossprod(X_tilde_below))%*% crossprod(X_tilde_below,y_tilde_below) beta_hat_above <- solve(crossprod(X_tilde_above))%*% crossprod(X_tilde_above,y_tilde_above)
beta_hats <- as.matrix(cbind(beta_hat_below, beta_hat_above))
colnames(beta_hats) <- c("slope below threshold", "slope above threshold") rownames(beta_hats) <- as.matrix(colnames(data_matrix))[3:NCOL(data_matrix),]
results <- list()
results[[1]] <- as.matrix(thresh_grid[minimized_SSR[,2],1]) colnames(results[[1]]) <- "threshold estimate" results[[2]] <- beta_hats
results[[3]] <- y_tilde_below results[[4]] <- y_tilde_above results[[5]] <- X_tilde_below results[[6]] <- X_tilde_above results[[7]] <- BIGMATRIX_below results[[8]] <- BIGMATRIX_above
results
}
15This fourth function uses the information from the minimization to calculate the significance of the threshold.
16To this end, first, the actual F1 statistic is calculated.
17Then repeated draws are made from the residuals to create bootstrap samples under H0, which are then used to calculate the asymptotic p-value for the actual F1 statistic please refer to the original paper: note line 773 n or n*time_periods!
PTR_F_boot <- function(y_tilde_below, y_tilde_above, X_tilde_below, X_tilde_above, BIGMATRIX_below, BIGMATRIX_above, n, time_periods, bootstrap_replications){
SSR_below <- t(y_tilde_below)%*%(diag(NROW(y_tilde_below)) - X_tilde_below%*%solve(crossprod(X_tilde_below))%*% t(X_tilde_below))%*%y_tilde_below
SSR_above <- t(y_tilde_above)%*%(diag(NROW(y_tilde_above)) - X_tilde_above%*%solve(crossprod(X_tilde_above))%*% t(X_tilde_above))%*%y_tilde_above
SSR_H1 <- SSR_below + SSR_above
BIGMATRIX_H0 <- rbind(BIGMATRIX_below, BIGMATRIX_above) BIGMATRIX_H0 <- BIGMATRIX_H0[order(BIGMATRIX_H0[,1]),]
y_tilde_H0 <- as.matrix(BIGMATRIX_H0[,2])
X_tilde_H0 <- cbind(BIGMATRIX_H0[,3:NCOL(BIGMATRIX_H0)])
SSR_H0 <- t(y_tilde_H0)%*%(diag(NROW(y_tilde_H0)) - X_tilde_H0%*%solve(crossprod(X_tilde_H0))%*% t(X_tilde_H0))%*%y_tilde_H0
sigma_hat_squared <- SSR_H1/(n*time_periods) F1_stat <- (SSR_H0 - SSR_H1)/sigma_hat_squared
residuals_below <- (diag(NROW(y_tilde_below))
- X_tilde_below%*%solve(crossprod(X_tilde_below))
%*%t(X_tilde_below))%*%y_tilde_below regime_below <- matrix(data = 0, nrow = NROW(y_tilde_below), ncol = 1, byrow = TRUE)
bootmatrix_below <- cbind(as.matrix(BIGMATRIX_below[,1]), residuals_below, X_tilde_below, regime_below)
residuals_above <- (diag(NROW(y_tilde_above))
- X_tilde_above%*%solve(crossprod(X_tilde_above))
%*%t(X_tilde_above))%*%y_tilde_above regime_above <- matrix(data = 1, nrow = NROW(y_tilde_above), ncol = 1, byrow = TRUE)
bootmatrix_above <- cbind(as.matrix(BIGMATRIX_above[,1]), residuals_above, X_tilde_above, regime_above)
bootmatrix <- rbind(bootmatrix_below, bootmatrix_above) bootmatrix <- bootmatrix[order(bootmatrix[,1]),]
boot_individ_names <- as.matrix(c(unique(bootmatrix[,1]))) boot_individ_freq <- as.matrix(c(table(bootmatrix[,1])))
boot_individ_from <- matrix(data = NA,
nrow = NROW(boot_individ_names), ncol = 1, byrow = TRUE) boot_individ_from[1,] <- 1
for (i in 2:(NROW(boot_individ_names))){
boot_individ_from[i,] <- boot_individ_from[i-1,]+boot_individ_freq[i-1,]
}
boot_individ_until <- matrix(data = NA,
nrow = NROW(boot_individ_names), ncol = 1, byrow = TRUE) for (i in 1:(NROW(boot_individ_names)-1)){
boot_individ_until[i,] <- boot_individ_from[i+1,]-1
}
boot_individ_until[NROW(boot_individ_until),] <-
boot_individ_until[(NROW(boot_individ_until)-1),] + boot_individ_freq[NROW(boot_individ_until),]
boot_individ_identifier <- cbind(boot_individ_names, boot_individ_freq,
boot_individ_from, boot_individ_until)
boot_individ_identifier_zero_individ <- matrix(data = NA, nrow = n, ncol = 4, byrow = TRUE)
for (i in 1:n){ boot_individ_identifier_zero_individ[i,1] <- i
}
for (i in 1:n){
for (j in 1:NROW(boot_individ_identifier)){ if (boot_individ_identifier_zero_individ[i,1]
== boot_individ_identifier[j,1]) boot_individ_identifier_zero_individ[i,2:4]
<- boot_individ_identifier[j,2:4]
}
}
boot_individ_identifier <- boot_individ_identifier_zero_individ boot_individ_identifier_aux1 <- boot_individ_identifier[,2] boot_individ_identifier_aux1[is.na(boot_individ_identifier_aux1)]
<- 0
boot_individ_identifier <- cbind(boot_individ_identifier[,1],
boot_individ_identifier_aux1, boot_individ_identifier[,3:4])
bootstrap_results_matrix <- matrix(data = NA,
nrow = bootstrap_replications, ncol = 3, byrow = TRUE) colnames(bootstrap_results_matrix) <- c("iteration", "bootstrap F1 statistic", "smaller/larger than actual")
for (m in 1:bootstrap_replications){
cat("bootstrap iteration", m, "of", bootstrap_replications, "\n") if (.Platform$OS.type == "windows") flush.console()
bootsample <- bootmatrix[sample(1:NROW(bootmatrix), n, replace = TRUE),] beta_one <- as.matrix(diag(diag(NCOL(bootmatrix)-3)))
y_hat_one <- bootsample[,3:(NCOL(bootsample)-1)]%*%
beta_one + as.matrix(bootsample[,2])
bootsample <- cbind(bootsample[,1:2], y_hat_one, bootsample[,3:NCOL(bootsample)]) bootsample <- bootsample[order(bootsample[,1]),]
regime_below <- bootsample
for (i in 1:NROW(regime_below)){
if (regime_below[i,NCOL(regime_below)] == 1) regime_below[i,NCOL(regime_below)] <- NA
}
regime_below <- regime_below[complete.cases(regime_below),] regime_above <- bootsample
for (i in 1:NROW(regime_above)){
if (regime_above[i,NCOL(regime_above)] == 0) regime_above[i,NCOL(regime_above)] <- NA
}
regime_above <- regime_above[complete.cases(regime_above),]
y_boot_below <- as.matrix(regime_below[,3])
X_boot_below <- as.matrix(regime_below[,4:(NCOL(regime_below)-1)]) y_boot_above <- as.matrix(regime_above[,3])
X_boot_above <- as.matrix(regime_above[,4:(NCOL(regime_above)-1)]) y_boot_H0 <- as.matrix(bootsample[,3])
X_boot_H0 <- as.matrix(bootsample[,4:(NCOL(bootsample)-1)])
SSR_boot_below <- t(y_boot_below)%*%(diag(NROW(y_boot_below)) - X_boot_below%*%solve(crossprod(X_boot_below))
%*%t(X_boot_below))%*%y_boot_below
SSR_boot_above <- t(y_boot_above)%*%(diag(NROW(y_boot_above)) - X_boot_above%*%solve(crossprod(X_boot_above))
%*%t(X_boot_above))%*%y_boot_above SSR_boot_H1 <- SSR_boot_below + SSR_boot_above SSR_boot_H0 <- t(y_boot_H0)%*%(diag(NROW(y_boot_H0)) -
X_boot_H0%*%solve(crossprod(X_boot_H0))
%*%t(X_boot_H0))%*%y_boot_H0 sigma_hat_squared_boot <- SSR_boot_H1/(n*time_periods)
F1_stat_boot <- (SSR_boot_H0 - SSR_boot_H1)/sigma_hat_squared_boot
bootstrap_results_matrix[m,1] <- m bootstrap_results_matrix[m,2] <- F1_stat_boot
}
for (i in 1:bootstrap_replications){
if (bootstrap_results_matrix[i,2] > F1_stat) bootstrap_results_matrix[i,3] <- 1
if (bootstrap_results_matrix[i,2] < F1_stat) bootstrap_results_matrix[i,3] <- 0
}
p_value <- mean(bootstrap_results_matrix[,3])
F1_stat_for_list <- as.matrix(F1_stat) colnames(F1_stat_for_list) <- "actual F1 statistic" p_value_for_list <- as.matrix(p_value) colnames(p_value_for_list) <-
"bootstrap estimate of aym. p-value for the F1_stat" boot_results <- list()
boot_results[[1]] <- F1_stat_for_list boot_results[[2]] <- bootstrap_results_matrix boot_results[[3]] <- p_value_for_list boot_results
}
Le texte seul est utilisable sous licence Creative Commons - Attribution - Pas d'Utilisation Commerciale - Pas de Modification 4.0 International - CC BY-NC-ND 4.0. Les autres éléments (illustrations, fichiers annexes importés) sont « Tous droits réservés », sauf mention contraire.
The Development of International Refugee Protection through the Practice of the UN Security Council
Christiane Ahlborn
2010
The SWIFT Affair
Swiss Banking Secrecy and the Fight against Terrorist Financing
Johannes Köppel
2011
The Evolving Patterns of Lebanese Politics in Post-Syria Lebanon
The Perceptions of Hizballah among Members of the Free Patriotic Movement
Fouad Ilias
2010
La justice internationale à l'épreuve du terrorisme
Défis, enjeux et perspectives concernant la Commission d'enquête internationale indépendante (UNIIIC) et le Tribunal spécial pour le Liban
Sébastien Moretti
2009
Aut Dedere, aut Judicare: The Extradite or Prosecute Clause in International Law
Claire Mitchell
2009