Hi, droglenc,
I am working on performing some power simulations using the FSA package. I am running into problems with the Zippin method after a number of simulations. Coded as:
zippy1<-apply(y,MARGIN=c(1,2),FUN=removal,method="Zippin",just.ests=TRUE)
However, when I substitute the "CarleStrub" method I do not have these issues (i.e., the simulations run to completion).
I have attached the input data file (zipest2.xlsx) and copied and pasted the my R code below. Do you have any insight into what may be going on? If you need more information from me please do not hesitate to ask.
I truly appreciate any time you are willing to give to my problem.
Sincerely,
Daniel Hanks
zipEst2.xlsx
#simulation for multiple sites####
library(FSAdata)
library(FSA)
library(arm)
library(lme4)
zipEst2=read.csv(file="zipEst2.csv")
#below is simple model with no added covariates
#lme2=glmer(zipEst2$No~1+(1|zipEst2$Year)+(1|zipEst2$Site),family=poisson)
#summary(lme2)
abundance model
lme2=glmer(No~(1|Year)+(1|Site), data=zipEst2, # kanno: removed covariates
family=poisson, offset=log(Length/200)) # most sites sampled for 200 m
summary(lme2)
#not sure but I may need
#to add offest as
kanno: add it as a separate 'offset' statement with a log transformation
see page 112 of Gelmand & Hill (2007) - Kasey has a copy
detection model
zipEst2$p.logit=log(zipEst2$p/(1-zipEst2$p))
zipEst2$WidthStd <- scale(zipEst2$Width)
zipEst2$ElevationStd <- scale(zipEst2$Elevation)
zipEst2$DaysStd <- scale(zipEst2$Days)
models with 3 covariates
model.p1=lm(p.logit ~ WidthStd + ElevationStd + DaysStd, data=zipEst2)
summary(model.p1)
elevation is not important: remove it
model.p2=lm(p.logit ~ WidthStd + DaysStd, data=zipEst2)
summary(model.p2)
n.sites=20
n.years=5
n.passes=3
r=runif(n.sites, -0.01, 0) # kanno: draw from a range for multiple sites
trend=log(1+r)
p.logit=log(zipEst2$p/(1-zipEst2$p))#convert to logit scale
p.mu=mean(p.logit)#mean detection on logit scale
p.sd.site=sd(p.logit)#sd of detection on logit scale
#beginning to run simulations
n.sims=1000
library(arm); library(FSA)
simsN=sim(lme2,n.sims) # abundance
simsP=sim(model.p2, n.sims) # detection
p.vals=array(NA,c(n.sims,2)) # create an empty array to store p-values of trend
for(s in 1:n.sims){ # changed from k to s because k is used in the new function below
print(s)
mu=simsN@fixef[s,1]
site.sd=lme2@theta[1]
year.sd=lme2@theta[2]
#stochastic factors affecting abundance
year.ran=rnorm(n.years,0,year.sd)
site.ran=rnorm(n.sites,0,site.sd)
#factors affecting detection
beta.intercept=simsP@coef[s,1]
beta.width=simsP@coef[s,2]
beta.days=simsP@coef[s,3]
kanno: remove the following lines: we are using fiexf for covariates, thus no need for random draw
#width.ran=rnorm(n.sites,0,abs(width.sd))
#elev.ran=rnorm(n.sites,0,elev.sd)
#days.ran=(rnorm(n.sites,0,abs(days.sd)))
width.sd=array(rnorm(n.sitesn.years, 0,1), dim=c(n.sites,n.years))
days.sd=array(rnorm(n.sitesn.years, 0,1), dim=c(n.sites,n.years))
variation in p unexplained by covariates (noise)
p.resids.sd=sd(model.p2$residuals)
p.sample.ran=array(rnorm(n.sites*n.years,0,p.resids.sd),dim=c(n.sites,n.years))
N<-p<-lambda<-zippy<- array(NA,dim=c(n.sites,n.years),
dimnames=list(paste("site",1:n.sites),
paste("year",1:n.years)))
y=array(NA,dim=c(n.sites,n.years,n.passes),
dimnames=list(paste("site",1:n.sites),
paste("year",1:n.years),
paste("pass",1:n.passes)))
for(i in 1:n.sites){
for(j in 1:n.years){
#abundance
lambda[i,j]<-exp(mu+((trend[i])*(j-1))+
site.ran[i]+year.ran[j])
N[i,j]<-rpois(1,lambda[i,j])
#observed count
p[i,j]<-plogis(p.mu+
beta.widthwidth.sd[i,j] +
beta.daysdays.sd[i,j] +
p.sample.ran[i,j]) #DH: Do width.ran, etc. go here and
#does is capture the overdispersion?
kanno: these are used as coef
y[i,j,1]<-rbinom(1,N[i,j],p[i,j])
y[i,j,2]<-rbinom(1,N[i,j]-y[i,j,1],p[i,j])
y[i,j,3]<-rbinom(1,N[i,j]-y[i,j,1]-y[i,j,2],p[i,j])
}
}
print("Finished up to line 208")
####################################################################################################################
The following code removes bad data that spit out an error message "Catch data results in Zippin model failure."
See https://github.com/droglenc/FSA/blob/c377e5a246f620e84860a4b35f7fc8c835105599/R/removal.R
iRemovalKTX <- function(catch) { # line 230 of the above github site
number of samples
k <- length(catch)
total catch
T <- sum(catch)
cumulative catch prior to ith sample
same as sum(cumsum(catch)[-k]) as used in Schnute (1983)
i <- 1:k
X <- sum((k-i)*catch)
return named vector
return (c(k=k,T=T,X=X))
}
for(i in 1:n.sites){
for(j in 1:n.years){
print("Running iRemovelKTX")
int <- iRemovalKTX(y[i,j,]) # line 433 of the above github site
k <- int[["k"]]
T <- int[["T"]]
X <- int[["X"]]
print("Ran iRemovelKTX")
This condition is from equation 6 in Carle & Strub (1978)
if (X <=(((T-1)*(k-1))/2)-1) {
print("Trying to get a new X")
repeat {
y[i,j,1]<-rbinom(1,N[i,j],p[i,j])
y[i,j,2]<-rbinom(1,N[i,j]-y[i,j,1],p[i,j])
y[i,j,3]<-rbinom(1,N[i,j]-y[i,j,1]-y[i,j,2],p[i,j])
int <- iRemovalKTX(y[i,j,]) # line 433 of the above github site
k <- int[["k"]]
T <- int[["T"]]
X <- int[["X"]]
if (X > (((T-1)*(k-1))/2)-1) break
#dh: this was X<= so I changed to >
#so that it would break the loop if
#the condition was met. However, this
#hasn't seemed to make much difference
#in the production of actual results
#p.vals.
}
print("Got new X")
}
}
}
print("Ran up to line 265")
####################################################################################################
Now run Zippin method
zippy1<-apply(y,MARGIN=c(1,2),FUN=removal,method="Zippin",just.ests=TRUE)
library(reshape2)
print("Finished apply on line 266")
zippy2 <- melt(zippy1[1,,], value.name="ZipEst", varnames=c('Site','Year'))
firstPass <- melt(y[,,1], value.name="firstPcount", varnames=c('Site','Year'))
result <- merge(zippy2, firstPass)
print("Finished merge on line 271")
result$YearNum <- as.numeric(substring(result$Year,5))
library(dplyr)
result <- arrange(result, Site, YearNum)
print("Running glmer:")
lm.sim.single=glmer(firstPcount ~ YearNum + (1|Site) + (1|Year), data=result, family=poisson)
lm.sim.zip=glmer(ZipEst ~ YearNum + (1|Site) + (1|Year), data=result, family=poisson)
print("Finished running glmers")
p.vals[s,1]=coef(summary(lm.sim.single))[2,4]
p.vals[s,2]=coef(summary(lm.sim.zip))[2,4]
}
p.vals
(length(p.vals[,1][p.vals[,1]<0.05])/n.sims)
(length(p.vals[,2][p.vals[,2]<0.05])/n.sims)