4 Decades of Belgian Marine Monitoring

4demon.be

The Belgian scientific community has, in the last four decades, built up considerable expertise in marine sciences (see Compendium Coast and Sea (link is external)). Numerous scientific expeditions at sea have resulted in a vast quantity of scientific data related to different topics and important publications in the scientific literature about the marine environment of the Belgian Continental Shelf. Many valuable, historic data however still remain inaccessible to the larger scientific community, being available only on paper in various institutions. These sources are essential for understanding long-term changes in the quality of the marine environment.

The 4DEMON project aims to centralise, integrate and valorise data on contamination, eutrophication and ocean acidification compiled during expeditions in the BCS over the last four decades, forming an important Belgian scientific heritage.

Linear Mixed Model of contaminant in sediment in BCP

CB118

A linear mixed model is developed for raw data of CB118 for fraction 0-63 µm. Dataset : P4_V1_Raw_63_aggregated_for_model_V1_with_cooks_outliers_20171121.csv

names(raw_dataset)
##  [1] "PMX_PRR_CODE"  "START_DATE"    "FSE_RANGE"     "YEAR"         
##  [5] "quadri"        "CLUSTER_ZONE"  "STN_CODE"      "AMD_SEQNO_new"
##  [9] "VALUE"         "YEAR_INT"      "VALUE_log"     "outliers"
getwd()
## [1] "D:/Hong Minh/Documents/R/4DEMON_sediment/P4.1_Model_raw_63/Rscript"

Dataset plots

conta_subset_all<-raw_dataset[raw_dataset$PMX_PRR_CODE=="CB118",]
param<-"CB118"
unit<-"µg/g dw"

conta_subset_all$VALUE_conv<-conta_subset_all$VALUE*1000
unit<-"µg/kg dw"
conta_subset_all$VALUE<-conta_subset_all$VALUE_conv
conta_subset_all$VALUE_log<-log(conta_subset_all$VALUE)

################################ Plots overview ####################################

library(ggplot2)

plot_param<-ggplot(conta_subset_all)+aes(x=START_DATE,y=VALUE_log)+geom_point(size=2)
plot_param<-plot_param  + scale_x_datetime(date_breaks="1 year", date_labels = "%Y")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year") 
plot_param

table(conta_subset_all$outliers)
## 
##   0   1 
## 968   1
conta_subset_all[which(conta_subset_all$outliers==1),]
##       PMX_PRR_CODE          START_DATE FSE_RANGE YEAR quadri CLUSTER_ZONE
## 16685        CB118 1994-03-22 12:00:00      0-63 1994      1            1
##       STN_CODE AMD_SEQNO_new VALUE YEAR_INT VALUE_log outliers VALUE_conv
## 16685      435    SOX-GC-ECD  0.01     1994  -4.60517        1       0.01
conta_subset<-conta_subset_all[conta_subset_all$outliers=="0",]
plot_param<-ggplot(conta_subset)+aes(x=START_DATE,y=VALUE_log)+geom_point(size=2)
plot_param<-plot_param + scale_x_datetime(date_breaks="1 year", date_labels = "%Y")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year") 
plot_param

boxplot_YEAR<-ggplot(data=conta_subset,aes_string(x=conta_subset$YEAR,y=conta_subset$VALUE_log))+geom_boxplot()
boxplot_YEAR<-boxplot_YEAR+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")
boxplot_YEAR

smoother<-ggplot(data=conta_subset,aes(x=START_DATE, y=VALUE_log))+ geom_point(size=2)+ggtitle(paste(param))
smoother<-smoother+geom_smooth(colour='blue',span = 0.5)
smoother<-smoother+geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), se = FALSE,colour='red')
smoother<-smoother+ scale_x_datetime(date_breaks="1 year", date_labels = "%Y")+ theme(axis.text.x = element_text(angle = +90))
smoother<-smoother+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")
smoother
## `geom_smooth()` using method = 'loess'

boxplot_AMD<-ggplot(data=conta_subset,aes_string(x=conta_subset$AMD_SEQNO_new,y=conta_subset$VALUE_log))+geom_boxplot()
boxplot_AMD<-boxplot_AMD+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("Analysis methode")
boxplot_AMD

boxplot_CLUSTER<-ggplot(data=conta_subset,aes_string(x=conta_subset$CLUSTER_ZONE,y=conta_subset$VALUE_log))+geom_boxplot()
boxplot_CLUSTER<-boxplot_CLUSTER+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("Cluster zone")
boxplot_CLUSTER

boxplot_quadri<-ggplot(data=conta_subset,aes_string(x=conta_subset$quadri,y=conta_subset$VALUE_log))+geom_boxplot()
boxplot_quadri<-boxplot_quadri+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("quadri")
boxplot_quadri

Plof of means

## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
## Warning: Removed 16 rows containing missing values (geom_errorbar).

dat_model<-conta_subset 
minYEAR<-min(dat_model$YEAR_INT)
minYEAR
## [1] 1991
dat_model$YEAR_start_one<-dat_model$YEAR_INT-min(dat_model$YEAR_INT)+1
dat_model$YEAR_start_one2<-dat_model$YEAR_start_one^2
dat_model<- dat_model %>% select (YEAR,YEAR_start_one,YEAR_start_one2,quadri,CLUSTER_ZONE,STN_CODE, AMD_SEQNO_new,VALUE,VALUE_log)

pairs(dat_model)

bin<-max(abs(dat_model$VALUE_log)/100)
histo<-ggplot(data=dat_model)+geom_histogram(aes(x=VALUE_log),binwidth=bin)             
histo

all BCP zone

fit_a <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri + CLUSTER_ZONE + YEAR_start_one:CLUSTER_ZONE, random= ~1|STN_CODE, dat_model, na.action=na.omit)
summary(fit_a)
## Linear mixed-effects model fit by REML
##  Data: dat_model 
##        AIC      BIC    logLik
##   2231.976 2314.589 -1098.988
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept) Residual
## StdDev:   0.3851493  0.68414
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new +      quadri + CLUSTER_ZONE + YEAR_start_one:CLUSTER_ZONE 
##                                   Value Std.Error  DF   t-value p-value
## (Intercept)                   1.5561076 0.3144009 832  4.949438  0.0000
## YEAR_start_one               -0.1297949 0.0306019 832 -4.241395  0.0000
## YEAR_start_one2               0.0018998 0.0011166 832  1.701376  0.0892
## AMD_SEQNO_newPLE_GC_ECD       0.0101284 0.1301984 832  0.077792  0.9380
## AMD_SEQNO_newSOX-GC-ECD      -0.0794387 0.1635797 832 -0.485627  0.6274
## quadri2                      -0.1350616 0.1234061 832 -1.094449  0.2741
## quadri3                       0.0349902 0.0502793 832  0.695915  0.4867
## CLUSTER_ZONE2                 0.5594248 0.7825919 121  0.714836  0.4761
## CLUSTER_ZONE3                -1.0926421 0.3396872 121 -3.216612  0.0017
## CLUSTER_ZONE4                -0.3016210 0.4078944 121 -0.739459  0.4611
## CLUSTER_ZONE5                -1.4095379 0.3459588 121 -4.074294  0.0001
## YEAR_start_one:CLUSTER_ZONE2  0.0048832 0.0409292 832  0.119309  0.9051
## YEAR_start_one:CLUSTER_ZONE3  0.0706720 0.0167339 832  4.223293  0.0000
## YEAR_start_one:CLUSTER_ZONE4  0.0276063 0.0179819 832  1.535229  0.1251
## YEAR_start_one:CLUSTER_ZONE5  0.0772303 0.0168123 832  4.593670  0.0000
##  Correlation: 
##                              (Intr) YEAR_s_ YEAR__2 AMD_SEQNO_P
## YEAR_start_one               -0.377                            
## YEAR_start_one2               0.004 -0.898                     
## AMD_SEQNO_newPLE_GC_ECD      -0.327 -0.301   0.381             
## AMD_SEQNO_newSOX-GC-ECD      -0.390 -0.489   0.639   0.803     
## quadri2                      -0.067 -0.019   0.023  -0.118     
## quadri3                      -0.025 -0.179   0.196  -0.003     
## CLUSTER_ZONE2                -0.256 -0.075   0.203   0.150     
## CLUSTER_ZONE3                -0.664  0.160   0.139   0.041     
## CLUSTER_ZONE4                -0.524  0.191   0.048  -0.040     
## CLUSTER_ZONE5                -0.605  0.130   0.155   0.005     
## YEAR_start_one:CLUSTER_ZONE2  0.209  0.046  -0.181  -0.162     
## YEAR_start_one:CLUSTER_ZONE3  0.589 -0.246  -0.089  -0.055     
## YEAR_start_one:CLUSTER_ZONE4  0.484 -0.242  -0.055   0.054     
## YEAR_start_one:CLUSTER_ZONE5  0.532 -0.197  -0.129  -0.015     
##                              AMD_SEQNO_S quadr2 quadr3 CLUSTER_ZONE2
## YEAR_start_one                                                      
## YEAR_start_one2                                                     
## AMD_SEQNO_newPLE_GC_ECD                                             
## AMD_SEQNO_newSOX-GC-ECD                                             
## quadri2                      -0.099                                 
## quadri3                       0.150       0.059                     
## CLUSTER_ZONE2                 0.092       0.062  0.014              
## CLUSTER_ZONE3                 0.067       0.136  0.020  0.305       
## CLUSTER_ZONE4                -0.016       0.031  0.017  0.230       
## CLUSTER_ZONE5                 0.019       0.107  0.005  0.309       
## YEAR_start_one:CLUSTER_ZONE2 -0.101      -0.015 -0.034 -0.972       
## YEAR_start_one:CLUSTER_ZONE3 -0.085      -0.031 -0.072 -0.244       
## YEAR_start_one:CLUSTER_ZONE4  0.030      -0.009 -0.028 -0.222       
## YEAR_start_one:CLUSTER_ZONE5 -0.038      -0.011 -0.038 -0.258       
##                              CLUSTER_ZONE3 CLUSTER_ZONE4 CLUSTER_ZONE5
## YEAR_start_one                                                        
## YEAR_start_one2                                                       
## AMD_SEQNO_newPLE_GC_ECD                                               
## AMD_SEQNO_newSOX-GC-ECD                                               
## quadri2                                                               
## quadri3                                                               
## CLUSTER_ZONE2                                                         
## CLUSTER_ZONE3                                                         
## CLUSTER_ZONE4                 0.520                                   
## CLUSTER_ZONE5                 0.637         0.515                     
## YEAR_start_one:CLUSTER_ZONE2 -0.244        -0.184        -0.248       
## YEAR_start_one:CLUSTER_ZONE3 -0.884        -0.440        -0.526       
## YEAR_start_one:CLUSTER_ZONE4 -0.496        -0.809        -0.494       
## YEAR_start_one:CLUSTER_ZONE5 -0.541        -0.444        -0.893       
##                              YEAR__:CLUSTER_ZONE2 YEAR__:CLUSTER_ZONE3
## YEAR_start_one                                                        
## YEAR_start_one2                                                       
## AMD_SEQNO_newPLE_GC_ECD                                               
## AMD_SEQNO_newSOX-GC-ECD                                               
## quadri2                                                               
## quadri3                                                               
## CLUSTER_ZONE2                                                         
## CLUSTER_ZONE3                                                         
## CLUSTER_ZONE4                                                         
## CLUSTER_ZONE5                                                         
## YEAR_start_one:CLUSTER_ZONE2                                          
## YEAR_start_one:CLUSTER_ZONE3  0.257                                   
## YEAR_start_one:CLUSTER_ZONE4  0.232                0.551              
## YEAR_start_one:CLUSTER_ZONE5  0.267                0.597              
##                              YEAR__:CLUSTER_ZONE4
## YEAR_start_one                                   
## YEAR_start_one2                                  
## AMD_SEQNO_newPLE_GC_ECD                          
## AMD_SEQNO_newSOX-GC-ECD                          
## quadri2                                          
## quadri3                                          
## CLUSTER_ZONE2                                    
## CLUSTER_ZONE3                                    
## CLUSTER_ZONE4                                    
## CLUSTER_ZONE5                                    
## YEAR_start_one:CLUSTER_ZONE2                     
## YEAR_start_one:CLUSTER_ZONE3                     
## YEAR_start_one:CLUSTER_ZONE4                     
## YEAR_start_one:CLUSTER_ZONE5  0.558              
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -6.811214963 -0.448306218 -0.002899202  0.486633551  3.445178170 
## 
## Number of Observations: 968
## Number of Groups: 126
plot(fitted(fit_a),residuals(fit_a))

interaction time:cluster zone significant -> zone differenciation -> model by cluster zone

Cluster zone 1

plot_cluster_1<-ggplot(conta_subset[conta_subset$CLUSTER_ZONE==1,])+aes(x=START_DATE,y=VALUE_log,color=AMD_SEQNO_new)+geom_point(size=3)+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")   
plot_cluster_1

Outliers identification in cluster zone 1

** outlier 1 **

dat_model_clust1<-dat_model[dat_model$CLUSTER_ZONE==1,] # 99

fit_clust1_out1<-lmer(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri + (1|STN_CODE), dat_model_clust1, na.action=na.omit)
inf_model_start <-influence(model=fit_clust1_out1, obs=TRUE)
cooksd<-cooks.distance.estex(inf_model_start)
length(cooksd[cooksd>0.2,]) #0
## [1] 0

0 outliers removed

Model cluster zone 1

fit_clust_1_a <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri, random= ~1|STN_CODE, dat_model_clust1, na.action=na.omit)
summary(fit_clust_1_a)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust1 
##        AIC     BIC    logLik
##   241.8128 264.892 -111.9064
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.5799903 0.5751289
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new +      quadri 
##                              Value Std.Error DF   t-value p-value
## (Intercept)              1.5678226 0.4308473 83  3.638929  0.0005
## YEAR_start_one          -0.0507315 0.0709627 83 -0.714904  0.4767
## YEAR_start_one2         -0.0013147 0.0028237 83 -0.465593  0.6427
## AMD_SEQNO_newPLE_GC_ECD  0.0978715 0.2184846 83  0.447956  0.6554
## AMD_SEQNO_newSOX-GC-ECD -0.4197564 0.3363720 83 -1.247893  0.2156
## quadri2                  0.0475454 0.1711518 83  0.277797  0.7819
## quadri3                 -0.4152540 0.1822857 83 -2.278040  0.0253
##  Correlation: 
##                         (Intr) YEAR_s_ YEAR__2 AMD_SEQNO_P AMD_SEQNO_S
## YEAR_start_one          -0.217                                        
## YEAR_start_one2          0.043 -0.978                                 
## AMD_SEQNO_newPLE_GC_ECD -0.345 -0.326   0.359                         
## AMD_SEQNO_newSOX-GC-ECD -0.494 -0.618   0.714   0.672                 
## quadri2                 -0.001 -0.158   0.155  -0.086      -0.034     
## quadri3                 -0.026 -0.159   0.155  -0.152       0.122     
##                         quadr2
## YEAR_start_one                
## YEAR_start_one2               
## AMD_SEQNO_newPLE_GC_ECD       
## AMD_SEQNO_newSOX-GC-ECD       
## quadri2                       
## quadri3                  0.118
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -2.642754135 -0.512842719 -0.009908659  0.614462474  2.660831920 
## 
## Number of Observations: 103
## Number of Groups: 14
# AMD not significant

fit_clust_1_b <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2+ quadri , random= ~1|STN_CODE, dat_model_clust1, na.action=na.omit)
summary(fit_clust_1_b)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust1 
##        AIC      BIC    logLik
##   240.2047 258.2994 -113.1023
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.5863326 0.5827661
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + quadri 
##                      Value Std.Error DF   t-value p-value
## (Intercept)      1.3154572 0.3791477 85  3.469511  0.0008
## YEAR_start_one  -0.1204630 0.0558250 85 -2.157870  0.0338
## YEAR_start_one2  0.0020005 0.0019487 85  1.026570  0.3075
## quadri2          0.0654041 0.1726560 85  0.378812  0.7058
## quadri3         -0.2872244 0.1737648 85 -1.652950  0.1020
##  Correlation: 
##                 (Intr) YEAR_s_ YEAR__2 quadr2
## YEAR_start_one  -0.772                       
## YEAR_start_one2  0.664 -0.977                
## quadri2         -0.022 -0.218   0.244        
## quadri3          0.035 -0.063   0.025   0.102
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.98261705 -0.53115226 -0.02845738  0.70998356  2.39790118 
## 
## Number of Observations: 103
## Number of Groups: 14
# quadri not significant

fit_clust_1_c <- lme(VALUE_log~ YEAR_start_one+YEAR_start_one2,   random= ~1|STN_CODE, dat_model_clust1, na.action=na.omit)
summary(fit_clust_1_c)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust1 
##        AIC      BIC    logLik
##   235.6223 248.6482 -112.8112
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.5140428 0.5941527
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 
##                      Value Std.Error DF   t-value p-value
## (Intercept)      1.3788093 0.3716049 87  3.710417  0.0004
## YEAR_start_one  -0.1271261 0.0547931 87 -2.320114  0.0227
## YEAR_start_one2  0.0020674 0.0019070 87  1.084116  0.2813
##  Correlation: 
##                 (Intr) YEAR_s_
## YEAR_start_one  -0.811        
## YEAR_start_one2  0.704 -0.977 
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -2.889511891 -0.534469708 -0.003515531  0.677882827  2.643147431 
## 
## Number of Observations: 103
## Number of Groups: 14
# t² not siginifcant

fit_clust_1_d <- lme(VALUE_log~ YEAR_start_one,   random= ~1|STN_CODE, dat_model_clust1, na.action=na.omit)
summary(fit_clust_1_d)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust1 
##        AIC      BIC    logLik
##   224.0359 234.4964 -108.0179
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.5502967 0.5899419
## 
## Fixed effects: VALUE_log ~ YEAR_start_one 
##                     Value  Std.Error DF   t-value p-value
## (Intercept)     1.0924745 0.26921198 88  4.058045   1e-04
## YEAR_start_one -0.0687804 0.01181586 88 -5.821028   0e+00
##  Correlation: 
##                (Intr)
## YEAR_start_one -0.794
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -2.886595212 -0.541224066  0.004138715  0.661084548  2.555020998 
## 
## Number of Observations: 103
## Number of Groups: 14
# final model
int=(1.09); a=(-0.07) ;b=(0) 
x<-c(1:(max(conta_subset$YEAR_INT)-min(conta_subset$YEAR_INT)),1)
y<-int+a*x+b*x^2
year<-(x+min(conta_subset$YEAR_INT)-1)
plot(y~year)

boxplot_YEAR<-ggplot(data=dat_model_clust1,aes_string(x=as.factor(dat_model_clust1$YEAR),y=dat_model_clust1$VALUE_log))+geom_boxplot()
boxplot_YEAR<-boxplot_YEAR+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")+ ggtitle(paste(param,"boxplot by year in cluster zone 1",sep=" "))
boxplot_YEAR

dat_model_clust1$YEAR<-dat_model_clust1$YEAR_start_one+minYEAR
smoother_clust1<-ggplot(data=dat_model_clust1,aes(x=YEAR, y=VALUE_log))+ geom_point(size=2)+ggtitle(paste(param))
smoother_clust1<-smoother_clust1+geom_smooth(colour='blue',span = 0.5)
smoother_clust1<-smoother_clust1+geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), se = FALSE,colour='red')
smoother_clust1<-smoother_clust1+ theme(axis.text.x = element_text(angle = +90))
smoother_clust1<-smoother_clust1+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")+ ggtitle(paste(param,"smoothers in cluster zone 1",sep=" "))
smoother_clust1
## `geom_smooth()` using method = 'loess'

Cluster zone 2

plot_cluster_2<-ggplot(conta_subset[conta_subset$CLUSTER_ZONE==2,])+aes(x=START_DATE,y=VALUE_log,color=AMD_SEQNO_new)+geom_point(size=3)+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")   
plot_cluster_2

dat_model_clust2<-dat_model[dat_model$CLUSTER_ZONE==2,] # 95
table(factor(dat_model_clust2$AMD_SEQNO_new)) 
## 
## PLE_GC_ECD SOX-GC-ECD 
##         16         87

Outliers identification

** outlier 1 **

fit_clust2_out1<-lmer(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri + (1|STN_CODE), dat_model_clust2, na.action=na.omit)
inf_model_start <-influence(model=fit_clust2_out1, obs=TRUE)
cooksd<-cooks.distance.estex(inf_model_start)
length(cooksd[cooksd>0.2,]) # 0
## [1] 0

0 outliers removed (final dataset : dat_model_clust2)

table(factor(dat_model_clust2$AMD_SEQNO_new))
## 
## PLE_GC_ECD SOX-GC-ECD 
##         16         87
fit_clust2_a <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2+ AMD_SEQNO_new+ quadri, random= ~1|STN_CODE, dat_model_clust2, na.action=na.omit)
summary(fit_clust2_a)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust2 
##        AIC     BIC    logLik
##   224.7322 242.827 -105.3661
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:     0.54923 0.5487065
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new +      quadri 
##                             Value Std.Error DF    t-value p-value
## (Intercept)              3.461464  6.553930 79  0.5281508  0.5989
## YEAR_start_one          -0.312938  0.751500 79 -0.4164184  0.6782
## YEAR_start_one2          0.007583  0.020825 79  0.3641445  0.7167
## AMD_SEQNO_newSOX-GC-ECD  0.073677  0.335885 79  0.2193518  0.8269
## quadri3                  0.227412  0.116492 79  1.9521704  0.0545
##  Correlation: 
##                         (Intr) YEAR_s_ YEAR__2 AMD_SE
## YEAR_start_one          -0.996                       
## YEAR_start_one2          0.989 -0.998                
## AMD_SEQNO_newSOX-GC-ECD  0.622 -0.682   0.711        
## quadri3                  0.111 -0.133   0.137   0.261
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0784389 -0.4838849 -0.1583328  0.4636164  2.8477330 
## 
## Number of Observations: 103
## Number of Groups: 20
# AMD not significant

fit_clust2_b <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2 +quadri, random= ~1|STN_CODE, dat_model_clust2, na.action=na.omit)
summary(fit_clust2_b)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust2 
##        AIC      BIC    logLik
##   222.4316 238.0023 -105.2158
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.5477513 0.5458781
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + quadri 
##                      Value Std.Error DF    t-value p-value
## (Intercept)      2.5700671  5.104852 80  0.5034558  0.6160
## YEAR_start_one  -0.2008511  0.546745 80 -0.3673576  0.7143
## YEAR_start_one2  0.0043444  0.014562 80  0.2983415  0.7662
## quadri3          0.2207250  0.111880 80  1.9728797  0.0520
##  Correlation: 
##                 (Intr) YEAR_s_ YEAR__2
## YEAR_start_one  -0.998                
## YEAR_start_one2  0.993 -0.998         
## quadri3         -0.068  0.064  -0.072 
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0827757 -0.4704675 -0.1607906  0.4723773  2.8629242 
## 
## Number of Observations: 103
## Number of Groups: 20
# quadri not significant

fit_clust2_c <- lme(VALUE_log~ YEAR_start_one+YEAR_start_one2 , random= ~1|STN_CODE, dat_model_clust2, na.action=na.omit)
summary(fit_clust2_c)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust2 
##        AIC      BIC    logLik
##   221.7366 234.7625 -105.8683
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept) Residual
## StdDev:   0.5478578 0.554917
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 
##                     Value Std.Error DF    t-value p-value
## (Intercept)      3.238474  5.175474 81  0.6257349  0.5332
## YEAR_start_one  -0.267603  0.554459 81 -0.4826382  0.6307
## YEAR_start_one2  0.006339  0.014759 81  0.4295245  0.6687
##  Correlation: 
##                 (Intr) YEAR_s_
## YEAR_start_one  -0.998        
## YEAR_start_one2  0.993 -0.998 
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8223335 -0.5387783 -0.1685524  0.4982705  2.9839154 
## 
## Number of Observations: 103
## Number of Groups: 20
# t² not significant

fit_clust2_d <- lme(VALUE_log~ YEAR_start_one, random= ~1|STN_CODE, dat_model_clust2, na.action=na.omit)
summary(fit_clust2_d)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust2 
##        AIC      BIC    logLik
##   213.3238 223.7843 -102.6619
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:    0.544825 0.5527988
## 
## Fixed effects: VALUE_log ~ YEAR_start_one 
##                     Value Std.Error DF    t-value p-value
## (Intercept)     1.0305406 0.5906509 82  1.7447543  0.0848
## YEAR_start_one -0.0298518 0.0308539 82 -0.9675211  0.3361
##  Correlation: 
##                (Intr)
## YEAR_start_one -0.97 
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.7979314 -0.5610583 -0.1403271  0.4826256  2.9964659 
## 
## Number of Observations: 103
## Number of Groups: 20
# final t not significant

int=(1.03); a=(-0.03) ;b=(0) 
x<-c(1:(max(conta_subset$YEAR_INT)-min(conta_subset$YEAR_INT)),1)
y<-int+a*x+b*x^2
year<-(x+min(conta_subset$YEAR_INT)-1)
plot(y~year)

## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2009
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1

Cluster zone 3

plot_cluster_3<-ggplot(conta_subset[conta_subset$CLUSTER_ZONE==3,])+aes(x=START_DATE,y=VALUE_log,color=AMD_SEQNO_new)+geom_point(size=3)+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")   
plot_cluster_3

dat_model_clust3<-dat_model[dat_model$CLUSTER_ZONE==3,] # 316
table(factor(dat_model_clust3$AMD_SEQNO_new))
## 
##  PLE-GC-MS PLE_GC_ECD SOX-GC-ECD 
##          7         31        278
table(factor(dat_model_clust3$quadri))
## 
##   1   3 
## 156 160

** outlier 1 **

fit_clust3_out1<-lmer(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new+ quadri + (1|STN_CODE), dat_model_clust3, na.action=na.omit)
inf_model_start <-influence(model=fit_clust3_out1, obs=TRUE) 
cooksd<-cooks.distance.estex(inf_model_start)
length(cooksd[cooksd>0.2,]) #0
## [1] 0

0 outlier removed

Model cluster zone 3

fit_clust3_a <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  +AMD_SEQNO_new+ quadri, random= ~1|STN_CODE, dat_model_clust3, na.action=na.omit)
summary(fit_clust3_a)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust3 
##        AIC      BIC    logLik
##   830.2542 860.1468 -407.1271
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept) Residual
## StdDev:    0.357286 0.808001
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new +      quadri 
##                              Value Std.Error  DF    t-value p-value
## (Intercept)              0.0194983 0.5390482 270  0.0361717  0.9712
## YEAR_start_one          -0.0958709 0.0610946 270 -1.5692216  0.1178
## YEAR_start_one2          0.0036281 0.0022956 270  1.5804591  0.1152
## AMD_SEQNO_newPLE_GC_ECD  0.1777709 0.3818810 270  0.4655138  0.6419
## AMD_SEQNO_newSOX-GC-ECD  0.4443106 0.4335882 270  1.0247295  0.3064
## quadri3                  0.1367269 0.0978241 270  1.3976816  0.1634
##  Correlation: 
##                         (Intr) YEAR_s_ YEAR__2 AMD_SEQNO_P AMD_SEQNO_S
## YEAR_start_one          -0.335                                        
## YEAR_start_one2          0.153 -0.969                                 
## AMD_SEQNO_newPLE_GC_ECD -0.624 -0.241   0.303                         
## AMD_SEQNO_newSOX-GC-ECD -0.670 -0.419   0.528   0.862                 
## quadri3                 -0.043 -0.171   0.145   0.024       0.123     
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -5.54264001 -0.42521182  0.05649521  0.46213925  2.87713308 
## 
## Number of Observations: 316
## Number of Groups: 41
# AMD not significant

fit_clust3_b <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2 + quadri, random= ~1|STN_CODE, dat_model_clust3, na.action=na.omit)
summary(fit_clust3_b)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust3 
##        AIC      BIC    logLik
##   826.6656 849.1236 -407.3328
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.3438248 0.8096485
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + quadri 
##                      Value Std.Error  DF    t-value p-value
## (Intercept)      0.3637054 0.3956407 272  0.9192822  0.3588
## YEAR_start_one  -0.0595312 0.0534370 272 -1.1140445  0.2662
## YEAR_start_one2  0.0018645 0.0018202 272  1.0243787  0.3066
## quadri3          0.1117618 0.0959297 272  1.1650395  0.2450
##  Correlation: 
##                 (Intr) YEAR_s_ YEAR__2
## YEAR_start_one  -0.920                
## YEAR_start_one2  0.820 -0.972         
## quadri3          0.035 -0.095   0.041 
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -5.56632808 -0.42050028  0.04362066  0.46545908  2.98895330 
## 
## Number of Observations: 316
## Number of Groups: 41
# quadri not significant

fit_clust3_c <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2 , random= ~1|STN_CODE, dat_model_clust3, na.action=na.omit)
summary(fit_clust3_c)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust3 
##        AIC      BIC    logLik
##   823.1716 841.9027 -406.5858
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.3457939 0.8098367
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 
##                      Value Std.Error  DF    t-value p-value
## (Intercept)      0.3456681 0.3957955 273  0.8733504  0.3832
## YEAR_start_one  -0.0533673 0.0532392 273 -1.0024051  0.3170
## YEAR_start_one2  0.0017717 0.0018201 273  0.9734368  0.3312
##  Correlation: 
##                 (Intr) YEAR_s_
## YEAR_start_one  -0.921        
## YEAR_start_one2  0.820 -0.973 
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -5.58475762 -0.46069342  0.06196597  0.46713574  3.02770643 
## 
## Number of Observations: 316
## Number of Groups: 41
# t² not significant

fit_clust3_d <- lme(VALUE_log~ YEAR_start_one , random= ~1|STN_CODE, dat_model_clust3, na.action=na.omit)
summary(fit_clust3_d)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust3 
##        AIC      BIC    logLik
##   811.3243 826.3219 -401.6622
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.3539208 0.8084928
## 
## Fixed effects: VALUE_log ~ YEAR_start_one 
##                       Value Std.Error  DF    t-value p-value
## (Intercept)     0.025806106 0.2273873 274  0.1134896  0.9097
## YEAR_start_one -0.002632179 0.0122296 274 -0.2152302  0.8297
##  Correlation: 
##                (Intr)
## YEAR_start_one -0.939
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -5.56292266 -0.45039511  0.05982692  0.48537549  3.05056540 
## 
## Number of Observations: 316
## Number of Groups: 41
# t not significant


int=(0.026); a=(-0.003) ;b=(0) 
x<-c(1:(max(conta_subset$YEAR_INT)-min(conta_subset$YEAR_INT)),1)
y<-int+a*x+b*x^2
year<-(x+min(conta_subset$YEAR_INT)-1)
plot(y~year)

boxplot_YEAR<-ggplot(data=dat_model_clust3,aes_string(x=as.factor(dat_model_clust3$YEAR),y=dat_model_clust3$VALUE_log))+geom_boxplot()
boxplot_YEAR<-boxplot_YEAR+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")+ ggtitle(paste(param,"boxplot by year in cluster zone 3",sep=" "))
boxplot_YEAR

dat_model_clust3$YEAR<-dat_model_clust3$YEAR_start_one+minYEAR
smoother_clust3<-ggplot(data=dat_model_clust3,aes(x=YEAR, y=VALUE_log))+ geom_point(size=2)+ggtitle(paste(param))
smoother_clust3<-smoother_clust3+geom_smooth(colour='blue',span = 0.5)
smoother_clust3<-smoother_clust3+geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), se = FALSE,colour='red')
smoother_clust3<-smoother_clust3+ theme(axis.text.x = element_text(angle = +90))
smoother_clust3<-smoother_clust3+ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")+ ggtitle(paste(param," smoothers in cluster zone 3"))
smoother_clust3
## `geom_smooth()` using method = 'loess'

Cluster zone 4

plot_cluster_4<-ggplot(conta_subset[conta_subset$CLUSTER_ZONE==4,])+aes(x=START_DATE,y=VALUE_log,color=AMD_SEQNO_new)+geom_point(size=3)+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")   
plot_cluster_4

dat_model_clust4<-dat_model[dat_model$CLUSTER_ZONE==4,]

Outliers identification

fit_clust4_out1<-lmer(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri + (1|STN_CODE), dat_model_clust4, na.action=na.omit)
inf_model_start <-influence(model=fit_clust4_out1, obs=TRUE)
cooksd<-cooks.distance.estex(inf_model_start)
length(cooksd[cooksd>0.2,]) # 1
## [1] 1
dat_model_clust4[which.max(cooksd),] 
##       YEAR YEAR_start_one YEAR_start_one2 quadri CLUSTER_ZONE STN_CODE
## 17083 2011             21             441      1            4    115_a
##       AMD_SEQNO_new VALUE VALUE_log
## 17083    SOX-GC-ECD 2.468 0.9034081
n<-as.numeric(which.max(cooksd))
dat_model_clust4_out1<-dat_model_clust4[-(n),] # 83

** outlier 2 **

fit_clust4_out2<-lmer(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri + (1|STN_CODE), dat_model_clust4_out1, na.action=na.omit)
inf_model_start <-influence(model=fit_clust4_out2, obs=TRUE)
cooksd<-cooks.distance.estex(inf_model_start)
length(cooksd[cooksd>0.2,]) # 0
## [1] 0

1 outlier removed

Model cluster zone 4

fit_clust4_a <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri, random= ~1|STN_CODE, dat_model_clust4_out1, na.action=na.omit)
summary(fit_clust4_a)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust4_out1 
##        AIC      BIC   logLik
##   155.8806 177.2056 -68.9403
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.2214952 0.4572105
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new +      quadri 
##                              Value Std.Error DF   t-value p-value
## (Intercept)              1.8711097 0.3500733 75  5.344908  0.0000
## YEAR_start_one           0.0930218 0.0509872 75  1.824415  0.0721
## YEAR_start_one2         -0.0069381 0.0020549 75 -3.376380  0.0012
## AMD_SEQNO_newPLE_GC_ECD -0.4074058 0.1916803 75 -2.125445  0.0368
## AMD_SEQNO_newSOX-GC-ECD -1.4056106 0.3091784 75 -4.546277  0.0000
## quadri2                 -0.0409061 0.1513937 75 -0.270197  0.7878
## quadri3                 -0.0946301 0.1287993 75 -0.734710  0.4648
##  Correlation: 
##                         (Intr) YEAR_s_ YEAR__2 AMD_SEQNO_P AMD_SEQNO_S
## YEAR_start_one          -0.089                                        
## YEAR_start_one2         -0.131 -0.969                                 
## AMD_SEQNO_newPLE_GC_ECD -0.433 -0.466   0.528                         
## AMD_SEQNO_newSOX-GC-ECD -0.588 -0.673   0.800   0.729                 
## quadri2                  0.022  0.044  -0.063  -0.164      -0.152     
## quadri3                  0.083 -0.200   0.145  -0.013       0.057     
##                         quadr2
## YEAR_start_one                
## YEAR_start_one2               
## AMD_SEQNO_newPLE_GC_ECD       
## AMD_SEQNO_newSOX-GC-ECD       
## quadri2                       
## quadri3                  0.111
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.49317763 -0.53953475  0.02806577  0.61978424  2.96547045 
## 
## Number of Observations: 86
## Number of Groups: 5
# quadri not significant

fit_clust4_b <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new , random= ~1|STN_CODE, dat_model_clust4_out1, na.action=na.omit)
summary(fit_clust4_b)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust4_out1 
##        AIC      BIC    logLik
##   148.1935 164.9547 -67.09676
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:    0.196911 0.4545733
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new 
##                              Value Std.Error DF   t-value p-value
## (Intercept)              1.9151909 0.3422891 77  5.595244  0.0000
## YEAR_start_one           0.0855624 0.0494279 77  1.731056  0.0874
## YEAR_start_one2         -0.0067627 0.0020115 77 -3.362094  0.0012
## AMD_SEQNO_newPLE_GC_ECD -0.4191110 0.1876686 77 -2.233250  0.0284
## AMD_SEQNO_newSOX-GC-ECD -1.4136678 0.3021731 77 -4.678338  0.0000
##  Correlation: 
##                         (Intr) YEAR_s_ YEAR__2 AMD_SEQNO_P
## YEAR_start_one          -0.075                            
## YEAR_start_one2         -0.145 -0.969                     
## AMD_SEQNO_newPLE_GC_ECD -0.440 -0.474   0.530             
## AMD_SEQNO_newSOX-GC-ECD -0.605 -0.677   0.802   0.724     
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.48679849 -0.53996134  0.01864487  0.59834478  3.09900781 
## 
## Number of Observations: 86
## Number of Groups: 5
# t not significant

fit_clust4_c <- lme(VALUE_log~ YEAR_start_one2  + AMD_SEQNO_new , random= ~1|STN_CODE, dat_model_clust4_out1, na.action=na.omit)
summary(fit_clust4_c)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust4_out1 
##        AIC      BIC    logLik
##   144.9808 159.4211 -66.49041
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.1898865 0.4606917
## 
## Fixed effects: VALUE_log ~ YEAR_start_one2 + AMD_SEQNO_new 
##                              Value Std.Error DF   t-value p-value
## (Intercept)              1.9697200 0.3440046 78  5.725854  0.0000
## YEAR_start_one2         -0.0034027 0.0004987 78 -6.822416  0.0000
## AMD_SEQNO_newPLE_GC_ECD -0.2675305 0.1673314 78 -1.598807  0.1139
## AMD_SEQNO_newSOX-GC-ECD -1.0654776 0.2247184 78 -4.741390  0.0000
##  Correlation: 
##                         (Intr) YEAR__ AMD_SEQNO_P
## YEAR_start_one2         -0.895                   
## AMD_SEQNO_newPLE_GC_ECD -0.543  0.324            
## AMD_SEQNO_newSOX-GC-ECD -0.897  0.805  0.621     
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.48926839 -0.56203902  0.03585966  0.62303856  3.11603155 
## 
## Number of Observations: 86
## Number of Groups: 5
# final model

int=(1.97); a=(0) ;b=(-0.003) 
x<-c(1:(max(conta_subset$YEAR_INT)-min(conta_subset$YEAR_INT)),1)
y<-int+a*x+b*x^2
year<-(x+min(conta_subset$YEAR_INT)-1)
plot(y~year)

## `geom_smooth()` using method = 'loess'

Cluster zone 5

plot_cluster_5<-ggplot(conta_subset[conta_subset$CLUSTER_ZONE==5,])+aes(x=START_DATE,y=VALUE_log,color=AMD_SEQNO_new)+geom_point(size=3)+ ylab(paste("log",param,"in",unit,sep=" "))+xlab("year")   
plot_cluster_5

dat_model_clust5<-dat_model[dat_model$CLUSTER_ZONE==5,] # 339

Outlier identification

fit_clust5_out1<-lmer(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri + (1|STN_CODE), dat_model_clust5, na.action=na.omit)
inf_model_start <-influence(model=fit_clust5_out1, obs=TRUE)
cooksd<-cooks.distance.estex(inf_model_start)
length(cooksd[cooksd>0.2,]) # 0
## [1] 0

0 outlier removed

Model for cluster zone 5

fit_clust5_a <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + AMD_SEQNO_new + quadri, random= ~1|STN_CODE, dat_model_clust5, na.action=na.omit)
summary(fit_clust5_a)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust5 
##        AIC     BIC    logLik
##   799.3243 834.097 -390.6621
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.3933211 0.6476676
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + AMD_SEQNO_new +      quadri 
##                              Value Std.Error  DF    t-value p-value
## (Intercept)              0.4301764 0.3871028 307  1.1112719  0.2673
## YEAR_start_one          -0.0992226 0.0478832 307 -2.0721780  0.0391
## YEAR_start_one2          0.0035056 0.0017266 307  2.0304029  0.0432
## AMD_SEQNO_newPLE_GC_ECD  0.0463382 0.2129982 307  0.2175520  0.8279
## AMD_SEQNO_newSOX-GC-ECD -0.0419664 0.2560712 307 -0.1638857  0.8699
## quadri2                 -0.3058812 0.2306615 307 -1.3261040  0.1858
## quadri3                 -0.0177758 0.0774286 307 -0.2295764  0.8186
##  Correlation: 
##                         (Intr) YEAR_s_ YEAR__2 AMD_SEQNO_P AMD_SEQNO_S
## YEAR_start_one          -0.453                                        
## YEAR_start_one2          0.235 -0.964                                 
## AMD_SEQNO_newPLE_GC_ECD -0.454 -0.277   0.357                         
## AMD_SEQNO_newSOX-GC-ECD -0.489 -0.484   0.616   0.824                 
## quadri2                 -0.002  0.023  -0.011  -0.126      -0.084     
## quadri3                 -0.010 -0.267   0.271   0.021       0.196     
##                         quadr2
## YEAR_start_one                
## YEAR_start_one2               
## AMD_SEQNO_newPLE_GC_ECD       
## AMD_SEQNO_newSOX-GC-ECD       
## quadri2                       
## quadri3                  0.066
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -7.20559905 -0.39922600  0.01536275  0.44553466  3.30062867 
## 
## Number of Observations: 359
## Number of Groups: 46
# AMD not significant

fit_clust5_b <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2  + quadri, random= ~1|STN_CODE, dat_model_clust5, na.action=na.omit)
summary(fit_clust5_b)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust5 
##       AIC     BIC   logLik
##   792.458 819.543 -389.229
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept) Residual
## StdDev:   0.3933196 0.646098
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 + quadri 
##                      Value Std.Error  DF    t-value p-value
## (Intercept)      0.4201004 0.3351422 309  1.2534993  0.2110
## YEAR_start_one  -0.1093565 0.0405304 309 -2.6981385  0.0074
## YEAR_start_one2  0.0039646 0.0012774 309  3.1036311  0.0021
## quadri2         -0.2936842 0.2281620 309 -1.2871742  0.1990
## quadri3         -0.0033964 0.0732743 309 -0.0463517  0.9631
##  Correlation: 
##                 (Intr) YEAR_s_ YEAR__2 quadr2
## YEAR_start_one  -0.910                       
## YEAR_start_one2  0.796 -0.968                
## quadri2         -0.061  0.005   0.020        
## quadri3          0.077 -0.148   0.120   0.061
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -7.18907542 -0.40661603  0.02122247  0.44346023  3.26245895 
## 
## Number of Observations: 359
## Number of Groups: 46
# quadri not significant

fit_clust5_c <- lme(VALUE_log~ YEAR_start_one+ YEAR_start_one2,  random= ~1|STN_CODE, dat_model_clust5, na.action=na.omit)
summary(fit_clust5_c)
## Linear mixed-effects model fit by REML
##  Data: dat_model_clust5 
##        AIC      BIC    logLik
##   785.4806 804.8553 -387.7403
## 
## Random effects:
##  Formula: ~1 | STN_CODE
##         (Intercept)  Residual
## StdDev:   0.4128311 0.6431025
## 
## Fixed effects: VALUE_log ~ YEAR_start_one + YEAR_start_one2 
##                      Value Std.Error  DF   t-value p-value
## (Intercept)      0.3783126 0.3339323 311  1.132902  0.2581
## YEAR_start_one  -0.1080417 0.0400083 311 -2.700486  0.0073
## YEAR_start_one2  0.0039902 0.0012651 311  3.153998  0.0018
##  Correlation: 
##                 (Intr) YEAR_s_
## YEAR_start_one  -0.911        
## YEAR_start_one2  0.795 -0.968 
## 
## Standardized Within-Group Residuals:
##           Min            Q1           Med            Q3           Max 
## -7.1860992940 -0.4001844537  0.0008349032  0.4500959015  3.3022569885 
## 
## Number of Observations: 359
## Number of Groups: 46
# final model

int=(0.38); a=(-0.11) ;b=(0.004) 
x<-c(1:(max(conta_subset$YEAR_INT)-min(conta_subset$YEAR_INT)),1)
y<-int+a*x+b*x^2
year<-(x+min(conta_subset$YEAR_INT)-1)
plot(y~year)

## `geom_smooth()` using method = 'loess'