#I recommend that you print this document, then type in each command #manually and examine the output. This will help you learn the commands in R #load packages and libraries library(bbmle) source("mleFunctions.r") #read data into R from a csv file (csv files can be created in Excel data <- read.csv("eejData.csv") #look at the aspects of the data file #each line represents a seperate clumping of plants, i.e. a replicate #"mInfl" is the number of infloresences on observed on a clump of Monarada plants #"visits" are the number of individual pollinators that visited in a set amount of time names(data) plot(data$mInfl,data$visits) data #you can also view data in a spreadsheet format using edit() #note, close spreadsheet to return to R edit(data) #c() is used to make a list #use [] to reference columns within data, and use quotes around the names of the columns data[c("mInfl","visits")] #by attaching the data file, you won't need to use "data$" before column names attach(data) #now that the data is attached you can refer directly to column names mInfl #try to simulate a data set that looks similar mean(visits) rnbinom(1,size=1,mu=6.29) rnbinom(72,size=1,mu=6.29) #make the expected number of visits a function of mInfl rnbinom(length(mInfl),size=1,mu=2+1*mInfl) #note, periods in R are used to seperate words in a multi-word variable, sim.visits #could also be written as simvisits or simVisits. sim.visits <- rnbinom(length(mInfl),size=1,mu=2+1*mInfl) plot(mInfl,sim.visits) plot(mInfl,visits) #try guess and check to get better parameter estimates for the slope and intercept sim.visits <- rnbinom(length(mInfl),size=1,mu=2+0.005*mInfl) #first, fit simpliest model ll <- function (m,s) -sum(dnbinom(visits,size=s,mu=m,log = TRUE)) guess <- list(m = 6, s = 1) fit <- mle2(ll, start=guess, method="Nelder-Mead") summary(fit) comp <- compare(fit, name="nbinom null") #now fit linear model ll <- function (m,b,s) -sum(dnbinom(visits,size=s,mu = m*mInfl + b ,log = TRUE)) guess <- list(m = 0.01, b = 2 , s = 1) fit <- mle2(ll, start=guess, method="Nelder-Mead") #this will list the detailed warning messages warnings() summary(fit) #use coef() to access the best fit parameters coef(fit) #use square brackets and the name of the parameter in quotes to extract an individual parameter coef(fit)["m"] #the curve function will draw a function of x curve(coef(fit)["m"]*x+coef(fit)["b"]) plot(mInfl,visits, xlab = "Monarda inflorescences",ylab = "visits") #for curve, the option "add=TRUE" will draw the curve ontop of the scatter plot curve(coef(fit)["m"]*x+coef(fit)["b"], add=TRUE) #dev.copy2eps will save the current figure as an eps file that can be used in other programs dev.copy2eps(file="fig.eps",width=8,height=6) comp <- compare(fit, name="nbinom linear", prev=comp) comp compare.likRatio(comp,1,2) #let's try a nonlinear function as well #quadratic ll <- function (m,m2,b,s) -sum(dnbinom(visits,size=s,mu = m2*mInfl^2 + m*mInfl + b ,log = TRUE)) guess <- list(m = 0.01, m2 = -0.01 , b = 2 , s = 1) fit <- mle2(ll, start=guess, method="Nelder-Mead") #R has a hard time fitting the quadratic because of a problem with # negative predictions. There are a few ways to handle this. The best #way is to use a constrained optimization method that could use a complex #expression. Unfortunately, I don't know of a good program that has this yet. #Another method it to transform the model to be valid across all parameter values. #Use exp(), which can take positive and negative values and output positive values ll <- function (m,m2,b,s) -sum(dnbinom(visits,size=s,mu = exp(m2*mInfl^2 + m*mInfl + b),log = TRUE)) fit <- mle2(ll, start=guess, method="Nelder-Mead") #had to modify guess to get it to fit; very sensitive to guess #for example, try changing the starting guess of b to 2.0 #be sure to change it back to b = 1.0 and run again for following steps guess <- list(m = 0.01, m2 = -0.000001 , b = 1.0 , s = 2) fit <- mle2(ll, start=guess, method="Nelder-Mead") curve(exp(coef(fit)["m2"]*x^2+coef(fit)["m"]*x+coef(fit)["b"]), add=TRUE) compare(fit, name="nbinom quad", prev=comp) #in this case, I found Mathematica did a much better job of finding the #best MLEs. They are list below in guess. guess <- list(m = 0.00231316, m2 = -0.000000911895 , b = 1.00162 , s = 1.77) fit <- mle2(ll, start=guess, method="Nelder-Mead") compare(fit, name="nbinom quad", prev=comp) #however, we can use mle.random to start at random inital conditions guess <- list(m = 0.01, m2 = -0.000001 , b = 1.0 , s = 2) fit <- mle.random(ll,guess,c(0,-0.00001,0,0.5),c(0.04,0,2,3),100) #the final parameter estimates are almost identical to those found in Mathematica summary(fit) compare(fit, name="nbinom quad", prev=comp) #Your turn #Redo the above analysis on a different subset of the data. #See if you can find a relationship between visits by pollinators (visits) #and knapweed infloresences (knInfl).