#I recommend that you print this document, then type in each command
#manually and examine the output.  This will help you learn the commands in R

#load packages and libraries
library(bbmle)
source("mleFunctions.r")
#read data into R from a csv file (csv files can be created in Excel
data <- read.csv("eejData.csv")
#look at the aspects of the data file
#each line represents a seperate clumping of plants, i.e. a replicate
#"mInfl" is the number of infloresences on observed on a clump of Monarada plants
#"visits" are the number of individual pollinators that visited in a set amount of time
names(data)
plot(data$mInfl,data$visits)
data
#you can also view data in a spreadsheet format using edit()
#note, close spreadsheet to return to R
edit(data)
#c() is used to make a list
#use [] to reference columns within data, and use quotes around the names of the columns
data[c("mInfl","visits")]

#by attaching the data file, you won't need to use "data$" before column names 
attach(data)
#now that the data is attached you can refer directly to column names
mInfl
#try to simulate a data set that looks similar
mean(visits)
rnbinom(1,size=1,mu=6.29)
rnbinom(72,size=1,mu=6.29)
#make the expected number of visits a function of mInfl
rnbinom(length(mInfl),size=1,mu=2+1*mInfl)
#note, periods in R are used to seperate words in a multi-word variable, sim.visits
#could also be written as simvisits or simVisits.
sim.visits <- rnbinom(length(mInfl),size=1,mu=2+1*mInfl)
plot(mInfl,sim.visits)
plot(mInfl,visits)
#try guess and check to get better parameter estimates for the slope and intercept
sim.visits <- rnbinom(length(mInfl),size=1,mu=2+0.005*mInfl)

#first, fit simpliest model
ll <- function (m,s)
-sum(dnbinom(visits,size=s,mu=m,log = TRUE))
guess <- list(m = 6, s = 1)
fit <- mle2(ll, start=guess, method="Nelder-Mead")
summary(fit)
comp <- compare(fit, name="nbinom null")

#now fit linear model
ll <- function (m,b,s)
-sum(dnbinom(visits,size=s,mu = m*mInfl + b ,log = TRUE))
guess <- list(m = 0.01, b = 2 , s = 1)
fit <- mle2(ll, start=guess, method="Nelder-Mead")
#this will list the detailed warning messages
warnings()
summary(fit)
#use coef() to access the best fit parameters
coef(fit)
#use square brackets and the name of the parameter in quotes to extract an individual parameter
coef(fit)["m"]
#the curve function will draw a function of x
curve(coef(fit)["m"]*x+coef(fit)["b"])
plot(mInfl,visits, xlab = "Monarda inflorescences",ylab = "visits")
#for curve, the option "add=TRUE" will draw the curve ontop of the scatter plot
curve(coef(fit)["m"]*x+coef(fit)["b"], add=TRUE)
#dev.copy2eps will save the current figure as an eps file that can be used in other programs
dev.copy2eps(file="fig.eps",width=8,height=6)
comp <- compare(fit, name="nbinom linear", prev=comp)
comp
compare.likRatio(comp,1,2)

#let's try a nonlinear function as well
#quadratic
ll <- function (m,m2,b,s)
-sum(dnbinom(visits,size=s,mu = m2*mInfl^2 + m*mInfl + b ,log = TRUE))
guess <- list(m = 0.01, m2 = -0.01 , b = 2 , s = 1)
fit <- mle2(ll, start=guess, method="Nelder-Mead")

#R has a hard time fitting the quadratic because of a problem with 
# negative predictions.  There are a few ways to handle this.  The best
#way is to use a constrained optimization method that could use a complex
#expression.  Unfortunately, I don't know of a good program that has this yet.
#Another method it to transform the model to be valid across all parameter values.
#Use exp(), which can take positive and negative values and output positive values
ll <- function (m,m2,b,s)
-sum(dnbinom(visits,size=s,mu = exp(m2*mInfl^2 + m*mInfl + b),log = TRUE))
fit <- mle2(ll, start=guess, method="Nelder-Mead")

#had to modify guess to get it to fit; very sensitive to guess
#for example, try changing the starting guess of b to 2.0
#be sure to change it back to b = 1.0 and run again for following steps
guess <- list(m = 0.01, m2 = -0.000001 , b = 1.0 , s = 2)
fit <- mle2(ll, start=guess, method="Nelder-Mead")

curve(exp(coef(fit)["m2"]*x^2+coef(fit)["m"]*x+coef(fit)["b"]), add=TRUE)

compare(fit, name="nbinom quad", prev=comp)

#in this case, I found Mathematica did a much better job of finding the
#best MLEs.  They are list below in guess.
guess <- list(m = 0.00231316, m2 = -0.000000911895 , b = 1.00162 , s = 1.77)
fit <- mle2(ll, start=guess, method="Nelder-Mead")
compare(fit, name="nbinom quad", prev=comp)

#however, we can use mle.random to start at random inital conditions
guess <- list(m = 0.01, m2 = -0.000001 , b = 1.0 , s = 2)
fit <- mle.random(ll,guess,c(0,-0.00001,0,0.5),c(0.04,0,2,3),100)
#the final parameter estimates are almost identical to those found in Mathematica
summary(fit)
compare(fit, name="nbinom quad", prev=comp)

#Your turn

#Redo the above analysis on a different subset of the data.
#See if you can find a relationship between visits by pollinators (visits)
#and knapweed infloresences (knInfl).