####################################################### # WIS 6934: # # R Primer # ####################################################### ########################################### #DATA TYPES ########################################### x<-4 #making a vector x.vector<-vector(mode="numeric",length=4) x.vector<-1:10 x.vector<-c(0,4,0,1) #making a matrix x.matrix<-matrix(0, nrow =5, ncol=5) #making an array x.array<-array(0,dim=c(5,5,2)) #making a list, that is a combination of data objects already defined x.list<-list(x.vector,x.matrix) #accessing data x.vector x.vector[1:3] x.matrix x.matrix[2,2]<-5 x.matrix[,2]<-2 x.matrix[3,]<-3 diag(x.matrix)<-1 x.list[[1]] x.list[[2]] x.list[[1]][2] x.list[[2]][,3] ###################################### #GETTING HELP ###################################### help(package="MASS") args(glm) ?glm ??glm glm ###################################### #IMPORT DATA ###################################### setwd('/Users/MauLaptop/Downloads/birddata') #the path will change for your work birds <-read.table("VATH_NAD83.txt",header=T) #this is a dataframe ##################################### #EXPLORING DATA ##################################### names(birds) #accessing column 1 of data; works for matrices/arrays/dataframes birds[ , 1] #Or for dataframes: birds$SURVEYID #accessing row 1 of data birds[1, ] #number of dimensions dim(birds) #number of unique values in the factor MESIC unique(birds$Mesic) #If you want to know how R is interpreting the variables in the dataframe (e.g., are they continuous or categorical?), type: str(birds) #subsetting data, by rows bird.1994<- birds[birds$YEAR=="1994", ] #you could also do this with the 'subset' function: bird.1994.sub<- subset(birds, birds$YEAR=="1994") #For the mean value of Elevation mean(birds$Elev) #For the median value of Elevation median(birds$Elev) #Let?s say you want to calculate one of these functions (e.g., the mean) on a subset of the data. Simply type: tapply(birds$VATH, birds$Mesic, mean) #If we wanted to simultaneously calculate summary statistics for many variables #we could use the sapply or lapply functions, or the summary function: summary(birds) #Convert Elev to a category (<1000m or >1000m). First add column for cat data birds$Elev_Cat=0 birds$Elev_Cat[birds$Elev>1]=1 #Or, using a 'for' loop: for(i in 1:1908){ birds$Elev_Cat2[i]<-ifelse(birds$Elev[i]>1, 1, 0) } str(birds) birds$Elev_Cat<-factor(birds$Elev_Cat) str(birds) ################################### #GRAPHICS IN R ################################### #plot the points in geographic space plot(birds$EASTING, birds$NORTHING) #change labels plot(NORTHING~EASTING, data = birds, xlab="easting (UTMs)", ylab="northing (UTMs)") #scatter plot plot(birds$Elev,birds$CC2) #multipanel plots plot(birds[ ,6:8]) #make a 2 panel plot, on same row par(mfrow=c(1,2)) #sets up the graph window to store two graphs plot(birds$Elev~birds$CC1, data=birds) plot(birds$Elev~birds$CC2, data=birds) dev.off()#turn off two-panel format #################################################### #subset to presence and absence #################################################### VATHpres<-birds[birds$VATH==1,] #subset to presence-only VATHabs<-birds[birds$VATH==0,] #subset to absences pres.coord<-VATHpres[,15:16] plot(x = VATHpres$EASTING, y = VATHpres$NORTHING) points(x = VATHabs$EASTING, y = VATHabs$NORTHING, col='red') par(mfrow=c(1,2)) hist(VATHabs[,7], main="CC2 Absence", xlim=c(-1,1), col="grey") hist(VATHpres[,7], main="CC2 Presence", xlim=c(-1,1), col="red") dev.off()#turn off two-panel format ############################################################### #simple logistic GLM ############################################################### library(MASS) #for stepwise AIC comparison #check correlation between explanatory variables cor(birds[,5:8],method="pearson") #pearson cor(birds[,5:8], method="spearman") #spearman logistic.vath<-glm(VATH~CC1+CC2+Elev,family="binomial", data=birds) logistic.vath2<-glm(VATH~CC1+CC2+Elev+CC1:Elev+CC2:Elev,family="binomial", data=birds) logistic.vath3<-glm(VATH~CC1*CC2*Elev,family="binomial", data=birds) summary(logistic.vath) summary(logistic.vath2) summary(logistic.vath3) stepAIC(logistic.vath3) #backward selection, based on AIC #make a partial plot of predicted relationships #to do so, make a new dataframe with relevant data CC2 <- seq(min(birds$CC2), max(birds$CC2), 0.1) CC1 <- rep(mean(birds$CC1),length(CC2)) #set to mean Elev <- rep(mean(birds$Elev),length(CC2)) #set to mean newdata1 <- data.frame(CC1,CC2,Elev) logistic.pred<-predict(logistic.vath, newdata=newdata1, type = "response") #type=response for predicted probabilities plot(c(-1,1), c(0,1), type = "n", xlab = "CC2",ylab = "prob") lines(CC2,logistic.pred,col = "red") #redo, adding approximate CI logistic.pred<-predict(logistic.vath, newdata=newdata1, type = "response", se=TRUE) #type=response for predicted probabilities ucl<-logistic.pred$fit+1.96*logistic.pred$se.fit lcl<-logistic.pred$fit-1.96*logistic.pred$se.fit plot(c(-1,1), c(0,1), type = "n", xlab = "CC2",ylab = "prob") lines(CC2,logistic.pred$fit,col = "red") lines(CC2,lcl,col = "blue") lines(CC2,ucl,col = "blue") ########################################### #GEOGRAPHIC DATA / MAPPING IN R ########################################### library(raster) #for raster data library(rgdal) # for reading different types of GIS files library(maptools) #for vector data #open and plot some raster layers Elev<-raster("elev.asc") #elevation layer CC1<-raster("cc1.asc") #non-linear (modal) gradient in canopy cover CC2<-raster("cc2.asc") #linear gradient in canopy cover Elev # set the coordinate reference system (CRS) (define the projection) # see http://www.remotesensing.org/geotiff/proj_list/ for a list of most projections layers<-stack(CC1, CC2, Elev) #makes a multilayered file for sampling availability names(layers)<-c("CC1","CC2", "Elev") projection(layers) <- "+proj=utm +zone=48 +datum=WGS84" nlayers(layers) plot(Elev) ############################## #some raster calculations ############################## #aggregate cells: Elev_10km<-aggregate(Elev, fact=48, fun=mean) #fact is the number of cells in each direction to merge/aggregate; so, with a 210m grain, (5)*210 ~1km Elev_10km plot(Elev_10km) #summaries/plots hist(Elev_10km) contour(Elev_10km) persp(Elev_10km) #extract raster values from points pres.cov<-extract(layers, pres.coord) #calculate cov within buffers; THIS IS SLOW! pres.cov.1km<-extract(layers, pres.coord, buffer=500, fun=mean, na.rm=TRUE) #raster algebra example Elev_extreme<-Elev values(Elev_extreme)<-0 Elev_extreme[Elev<0.5]<-1 par(mfrow=c(1,2)) plot(Elev) plot(Elev_extreme) dev.off() #generate 200 random points/background data random.cov<-sampleRandom(layers,200,xy=TRUE) plot(Elev_10km) points(random.cov[,1:2]) #project the predicted map: logistic.raster<-predict(model=logistic.vath, object=layers, fun = predict,type = "response") plot(logistic.raster, xlab = "Longitude", ylab = "Latitude") points(pres.coord) logit.se.raster<-predict(model=logistic.vath, object=layers, fun = predict,type = "response",se=TRUE) #function for projecting the map and the SE predfun <- function(model, data) { v <- predict(model, data, se.fit=TRUE, type="response") cbind(p=as.vector(v$fit), se=as.vector(v$se.fit)) } #predfun returns list of two variables, so use index=1:2 logit.se.raster<- predict(layers, logistic.vath, fun=predfun, index=1:2) plot(logit.se.raster[[1]], xlab = "Longitude", ylab = "Latitude", main="prediction") #prediction on real scale plot(logit.se.raster[[2]], xlab = "Longitude", ylab = "Latitude",main="SE") #SE dev.off()