#FUNCTION AVERAGING -- SIMULATION OF DECREASING TEMPORAL RESOLUTION OF MULTIPLE ASSEMBLAGES SAMPLED IN ONE HABITAT AT ONE TIME
#ABUNDANCES OF DEAD INDIVIDUALS ARE TRACKED IN SIMULATIONS
#(1) LOCAL COMMUNITY DYNAMICS FOLLOWS INDIVIDUAL-BASED NEUTRAL DISPERSAL-LIMITED MODEL AT LOCAL SCALES
#(2) STATIC AND NEUTRAL LOG-SERIES-LIKE METACOMMUNITY AT REGIONAL SCALES
#OUPUT IS REPRESENTED BY DATA FRAME "FRAME" with dispersion (beta diversity, with 95% confidence intervals), and richness (and 95% confidence intervals) IN COLUMNS
#PREDICTED FOR INDIVIDUAL VALUES OF TIME AVERAGING IN ROWS
#THE MODEL PRODUCES A TWO-DIMENSIONAL ARRAY "MATRIXOFDEADS" OF DEAD INDIVIDUALS (SPECIES IDENTITY TRACKED BY THEIR NUMBER) 
#THAT DIED OVER N YEARS OF TIME SERIES (ROWS) IN N SIMULATIONS (COLUMNS)
#THIS ARRAY IS THEN USED FOR AVERAGING PROCEDURE WHERE INDIVIDUALS ARE POOLED, AND DEAD INDIVIDUALS ARE EITHER 
#SAMPLED FROM UNIFORM (PER CAPITA PRESERVATION RATE = 1) 
#SAMPLE SIZE OF AVERAGED ASSEMBLAGES IS FIXED WITH INCREASING TIME AVERAGING
#SOURCE: TOMASOVYCH A. AND KIDWELL, S.M. 2010. Predicting the effects of increasing temporal scale on species composition, diversity, 
#and rank-abundance distributions. PALEOBIOLOGY.

#############################################################################################################################################
#LIBRARIES
library(vegan)
library(untb)

#INPUT
#metaabundances -- vector of species abundances in the metacommunity (species pool)
metaabundances<-c(28435,15050,10991,8227,6368,5180,4250,3438,2872,2402,1961,1702,               
1407,1212,962,802,658,566,488,436,360,305,271,229,199,171,148,134,116,95,80,69,
60,50,43,37,33,29,25,22,19,16,13,11,9,8,7,6,5,4,3,3,2,2,2,2,1,1,1,1,1)
#dispersal -- dispersal number in Etienne(2005), immigration rate m is m<-dispersal/(dispersal+Nlive-1)
dispersal<-20
#Nlive -- number of (living) individuals in nonaveraged assemblages
Nlive<-200
#Ndead  -- number of (fossil) individuals in averaged assemblages
Ndead<-500
#time -- number of years in the model, includes time to equilibrium
time<-500
#timetoequilibrium -- number of years needed to get steady state diversity in local nonaveraged (living) assemblages
timetoequilibrium<-200 
#this time to equilibrium does not go into the output
#lifespan -- average lifespan determines the number of individual turnovers per year
lifespan<-3
#simulations -- number of simulations, you can start with smaller values to see how quickly it runs
#however, the number of simulations should be much higher than the number of samples (local assemblages) per habitat (!!!) because 
#these samples are effectively randomly sampled from all simulated runs (because local assemblages are just connected to the metacommunity)
simulations<-100
#TAvalues --  vector of time averaging durations that are used in simulations
TAvalues<-c(5,25,50,100,250)
#samples -- number of samples of averaged assemblages per habitat
samples<-5
#method  -- dissimilarity used in quantifying multivariate dispersion (Bray-Curtis, Jaccard, Hellinger, Horn-Morisita)
method="hellinger"

#############################################################################################################################################
#FUNCTION
averaging<-function(method,metaabundances, Nlive, Ndead, dispersal, simulations, time, timetoequilibrium,lifespan, TAvalues, samples) {
metaabundances<-metaabundances; Nlive<-Nlive;Ndead<-Ndead;dispersal<-dispersal; simulations<-simulations;
time<-time; lifespan<-lifespan;TAvalues<-TAvalues; samples<-samples;method<-method;timetoequilibrium<-timetoequilibrium;

#BUILD INDIVIDUALS IN METACOMMUNITY USING ABUNDANCE VECTOR (OBSERVED OR PRODUCED BY MODELING)
metacommunity<-rep(0,0); 
for (i in 1:length(metaabundances)) {
                v<-rep(i, times=metaabundances[i])
                metacommunity<-append(metacommunity, v, after = length(metacommunity))
                }
numberofspecies<-length(metaabundances)
                
#INITIAL PARAMETERS
        REPLACE<-round(round(Nlive/lifespan))           #number of replacements per year
        m<-dispersal/(dispersal+Nlive-1)                #immigration rate
        local<-rep(NA, Nlive)                           #species in living assemblage

#MATRIXOFDEADS - ARRAY WITH VECTORS CONTAINING INDIVIDUALS THAT DIED DURING N YEARS (MINUS THE NUMBER OF INITIAL DISEQUILIBRIUM YEARS)
#IS DETERMINED BY THE NUMBER OF INDIVIDUALS DIED DURING ONE YEAR (DETERMINED BY LIFESPAN) TIMES NUMBER OF YEARS
        matrixofdeads<-array(0,dim=c((REPLACE*time)-(timetoequilibrium*REPLACE), simulations))
        deadinputs<-array(0,dim=c(REPLACE, time))                               #yearly inputs of dead species for N years
        liveinputs<-array(0, dim=c(Nlive, time))                                #yearly inputs of live species for N years
        livetimematrix<-array(0,dim=c(numberofspecies, time,simulations))       #array of abundances, rows - species, columns - snaphsot assemblages
        deadinputmatrix<-array(0,dim=c(numberofspecies, time,simulations))      #array of abundances, rows - species, columns - yearly inputs 



#LOCAL COMMUNITY DRIFT - INDIVIDUALS SUBJECTED TO MORTALITY 
#CAN CONTRIBUTE TO OFFSPRING IN THE NEXT GENERATION - MORAN MODEL

for (j in 1:simulations) {                                              #repeat local dynamics for N simulations
        for (i in 1:Nlive) 
                        {local[i]<-sample(metacommunity, 1, replace=T)
                        }                                               #initialize live assemblage from metacommunity
        simulationdeadvector<-rep(0,0)                                  #vector for died individuals for one simulation
        for (s in 1:time) {                                             #loop for N time steps (years)
        yeardeadvector<-rep(0,0)                                        #vector for died individuals for one time step
                for (t in 1:REPLACE) {                                  #simulate local dynamics within one year, for the number of replacements
                        random<-runif(1,0,1)

                        #DENSITY-INDEPENDENT MORTALITY
                        deadind<-sample(Nlive, 1, replace=F)            #density-independent mortality
                        deads<-local[deadind]                           #identify species identity of dead individuals

                        #LOOP FOR MIGRATION
                                if (random<m) 
                                        {local[deadind]<-sample(metacommunity, 1, replace=T)
                                }                                       #immigration from metacommunity
                                else    {local[deadind]<-sample(local, 1, replace=T)
                                }                                       #recruitment from local assemblage
                        yeardeadvector<-append(yeardeadvector, deads, after = length(yeardeadvector))   #build vector of dead inds
                        }
        liveinputs[,s]<-local                                           #yearly inputs of live species for N years
        deadinputs[1:(length(yeardeadvector)),s]<-yeardeadvector        #yearly inputs of dead species for N years

        #VECTOR OF ALL INDIVIDUALS THAT DIED DURING N YEARS
        simulationdeadvector<-append(simulationdeadvector, yeardeadvector, after = length(simulationdeadvector)) 

        #ABUNDANCES OF SPECIES IN YEARLY INPUTS INTO DEATH ASSEMBLAGES [SPECIES, YEARS, SIMULATIONS]
        deadinputmatrix[1:length(species.table(deadinputs[1:REPLACE,s])),s,j]<-species.table(deadinputs[1:REPLACE,s])

        #ABUNDANCES OF SPECIES IN LIVING ASSEMBLAGES [SPECIES, YEARS, SIMULATIONS]
        livetimematrix[1:length(species.table(liveinputs[1:Nlive,s])),s,j]<-species.table(liveinputs[1:Nlive,s])        
        }
matrixofdeads[1:length(simulationdeadvector[((timetoequilibrium*REPLACE)+1):(time*REPLACE)]),j]<-simulationdeadvector[((timetoequilibrium*REPLACE)+1):(time*REPLACE)]   
}                       



#SIMULATION OF TIME-AVERAGING
TAlevels<-length(TAvalues)
frame<-as.data.frame(array(NA, dim=c(TAlevels,6)))
for (x in 1:TAlevels) {
        J<-Nlive                                #sample sizes in living assemblages, timewindow<-1
        TA<-TAvalues[x]                         #max TA is given by (REPLACE*TA)+(timewindow*REPLACE)>>ngen*REPLACE
        TADEADINPUT<-(REPLACE*TA)               #number of individuals died during time averaging

#IF TIME AVERAGING DOES NOT ALLOW ACCUMULATION OF NDEAD INDIVIDUALS, REDUCE THE NDEAD TO THE MAXIMUM POSSIBLE N
        if (Ndead > TADEADINPUT) {Ndead <- TADEADINPUT}

#SAMPLE PRESERVED DEAD INDIVIDUALS UNIFORMLY FROM THE COMPLETE POOL OF DEAD INDIVIDUALS
        preserveddeadlist<-array(NA, dim=c(Ndead, simulations))         
        absolutedead<-array(0,dim=c(numberofspecies, simulations))              
        relativedead<-array(0,dim=c(numberofspecies,simulations))
        relativelive<-array(0,dim=c(numberofspecies,simulations))

for (i in 1:simulations) {
        deadlist<-matrixofdeads[(REPLACE):(REPLACE+TADEADINPUT),i]      #take dead individuals during one interval after attaining equilibrium
        preserveddeadlist[,i]<-sample(deadlist,Ndead,replace=F)         #counts of dead species in matrix of local samples for 9800 time intervals, one        #column - one time interval
        }

#ABSOLUTE SPECIES ABUNDANCES IN AVERAGED (FOSSIL)ASSEMBLAGES
for (i in 1:simulations) {
        absolutedead[1:length(species.table(preserveddeadlist[1:Ndead,i])),i]<-species.table(preserveddeadlist[1:Ndead,i])
        }
        absolutelive<-livetimematrix[,timetoequilibrium,]

#RELATIVE SPECIES ABUNDANCES IN NONAVERAGED (LIVING) AND AVERAGED (FOSSIL) ASSEMBLAGES
for (i in 1:simulations) {
        relativelive[,i]<-livetimematrix[,timetoequilibrium,i]/sum(livetimematrix[,timetoequilibrium,i])
        relativedead[,i]<-absolutedead[,i]/sum(absolutedead[,i])
        }
        
#ANALYZE A SUBSET OF ASSEMBLAGES - SELECT N ASSEMBLAGES FROM N SIMULATES ASSEMBLAGES, NO AUTOCORRELATION
disp<-rep(NA,simulations)
richness<-rep(NA,simulations)
for (z in 1:simulations) {
        vector<-sample(simulations,simulations-samples, replace=F)
        relativelivesubset<-relativelive[,-vector]
        relativedeadsubset<-relativedead[,-vector]
        absolutelivesubset<-array(NA, dim=c(numberofspecies, ncol(absolutelive)))
        absolutedeadsubset<-array(NA, dim=c(numberofspecies, ncol(absolutelive)))
                for (i in 1:simulations) {
                        absolutelivesubset[,i]<-as.integer(absolutelive[,i])
                        absolutedeadsubset[,i]<-as.integer(absolutedead[,i])
                        }
                absolutelivesubset<-absolutelivesubset[,-vector]
                absolutedeadsubset<-absolutedeadsubset[,-vector]


if (method == "hellinger")      {livedead<-as.data.frame(cbind(sqrt(relativedeadsubset), sqrt(relativelivesubset)))
                                distance<-vegdist(t(livedead), method="bray")
                                }
if (method == "bray")           {livedead<-as.data.frame(cbind(relativedeadsubset, relativelivesubset))
                                distance<-vegdist(t(livedead), method="bray")
                                }
if (method == "jaccard")        {livedead<-as.data.frame(cbind(relativedeadsubset, relativelivesubset))
                                distance<-vegdist(t(livedead), method="jaccard", binary=T)
                                }
if (method == "horn")           {livedead<-as.data.frame(cbind(relativedeadsubset, relativelivesubset))
                                distance<-vegdist(t(livedead), method="horn")
                                }

grouping<-c(rep(1,ncol(relativedeadsubset)), rep(2,ncol(relativelivesubset)))
disp[z]<-mean(betadisper(distance, grouping)$distance[1:samples])
weights<-rep(NA,samples)
stJ<-rep(NA,samples)
alpha<-rep(NA,samples)
for (i in 1:samples) {
	#weights are used for situations if local samples (assemblages) differ in sample size
        weights[i]<-sum(absolutedeadsubset[,i])/sum(absolutedeadsubset)
        stJ[i]<-sum(absolutedeadsubset[,i])
        alpha[i]<-specnumber(t(absolutedeadsubset[,i]))
        }
#average alpha richness (averaged over a given number of samples)
richness[z]<-sum(alpha*weights)
        }                                               

frame[x,]<-data.frame(mean(disp), quantile(disp, c(0.025)), quantile(disp, c(0.975)),
mean(richness), quantile(richness, c(0.025)), quantile(richness, c(0.975)))
colnames(frame)<-c("Beta diversity-dispersion", "95%LCI","95%UCI","Mean alpha richness", "95%LCI","95%UCI")
rownames(frame)<-TAvalues
}

out <- list(frame = frame)
class(out) <- "averaging"
out
}