Skip to content

1. Code Tutorial: GAM

Hyesop edited this page Jun 21, 2019 · 1 revision

This code is a single R sourcecode but splitted for the reader's convenience.

Import NO2 in Winter 2014

Here, we use 5 packages tidyverse, sf, raster, mgcv, and gstat. Wow, really? Of course, that is it. The next step is to load the pre-cleaned NO2 then merge them all to station points.

options(scipen = 100)  # extend the system's decimal points to a hundred
memory.size() # check memory size: Windows-specific
memory.limit(99999) # expand memory size: Windows-specific

library(tidyverse) # data tidy and visualisation
library(sf) # geospatial analysis: Vector
library(raster) # geospatial analysis: Raster
library(mgcv) # GAM
library(gstat) # Kriging

load("../RData/no2.RData")
rm(list=setdiff(ls(), c("no2.win.12hr", "ratio.no2.win")))
stations <- read_sf("../seoul/stations_10km.shp")
stations_df <- stations %>% st_set_geometry(NULL)
seoul <- read_sf("../seoul/Seoul_City.shp") %>% as('Spatial') %>% fortify()
no2.winter <- merge(no2.win.12hr, stations_df, by.x = c("Station.ID", "X", "Y"), by.y = c("Station", "X", "Y"))

Model Construction

#JANUARY GAM MODEL
jan01d <- gam(no2_1_1_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan02d <- gam(no2_1_2_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan03d <- gam(no2_1_3_day   ~ te(X,Y), data = no2.winter, method = "REML")
jan04d <- gam(no2_1_4_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan05d <- gam(no2_1_5_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan06d <- gam(no2_1_6_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan07d <- gam(no2_1_7_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan08d <- gam(no2_1_8_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan09d <- gam(no2_1_9_day   ~ s(X,Y), data = no2.winter, method = "REML")
jan10d <- gam(no2_1_10_day  ~ s(X,Y), data = no2.winter, method = "REML")

jan01n <- gam(no2_1_1_night ~ s(X,Y), data = no2.winter, method = "REML")
jan02n <- gam(no2_1_2_night ~ te(X,Y), data = no2.winter, method = "REML")
jan03n <- gam(no2_1_3_night ~ s(X,Y), data = no2.winter, method = "REML")
jan04n <- gam(no2_1_4_night ~ s(X,Y), data = no2.winter, method = "REML")
jan05n <- gam(no2_1_5_night ~ s(X,Y), data = no2.winter, method = "REML")
jan06n <- gam(no2_1_6_night ~ s(X,Y), data = no2.winter, method = "REML")
jan07n <- gam(no2_1_7_night ~ s(X,Y), data = no2.winter, method = "REML")
jan08n <- gam(no2_1_8_night ~ s(X,Y), data = no2.winter, method = "REML")
jan09n <- gam(no2_1_9_night ~ s(X,Y), data = no2.winter, method = "REML")
jan10n <- gam(no2_1_10_night ~ s(X,Y), data = no2.winter, method = "REML")


###############
#--Create DF--#
###############
jan_mgcv <- data.frame(expand.grid(X = seq(min(no2.winter$X), max(no2.winter$X), length=200),
                                   Y = seq(min(no2.winter$Y), max(no2.winter$Y), length=200)))


#############
#--Predict--#
#############
jan_mgcv$jan01d<- jan01d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan01n<- jan01n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan02d<- jan02d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan02n<- jan02n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan03d<- jan03d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan03n<- jan03n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan04d<- jan04d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan04n<- jan04n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan05d<- jan05d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan05n<- jan05n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan06d<- jan06d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan06n<- jan06n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan07d<- jan07d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan07n<- jan07n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan08d<- jan08d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan08n<- jan08n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan09d<- jan09d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan09n<- jan09n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan10d<- jan10d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan10n<- jan10n %>% predict(jan_mgcv, type = "response")


##-- Find Mean and variance
stat <- jan_mgcv %>% dplyr::select(-c(X,Y)) %>% 
  gather(factor_key = T) %>% 
  group_by(key) %>% 
  summarise(mean= round(mean(value),1), 
            median= round(median(value),1), 
            sd= round(sd(value),1),
            max = round(max(value),2),
            min = round(min(value),2)) %>% 
  rename(Hour = key)


which( colnames(no2.winter)=="no2_1_10_day") # Find first column

top.jan <- no2.winter[,c(2:3, 5:24)]
coordinates(top.jan) <- ~X+Y
proj4string(top.jan) <- CRS("+init=epsg:5181")

ras.mgcv <- rasterFromXYZ(jan_mgcv) 
crs(ras.mgcv) <- CRS('+init=epsg:5181')


########
#-RMSE-#
########

obs <- as.data.frame(top.jan) %>% dplyr::select(X,Y,everything())
pred.df <- data.frame(X = obs$X, Y = obs$Y)
RMSE <- data.frame(X = obs$X, Y = obs$Y)

# for loop
for(i in 1:20){
  pred.df[,i+2] <- extract(ras.mgcv[[i]], top.jan)
  RMSE[,i+2] <- sqrt(abs(pred.df[,i+2] - obs[,i+2]))^2
}

colnames(RMSE) <- colnames(obs)
RMSE %>% dplyr::select(-c(1:2)) %>% as.matrix() %>% mean()
stat$rmse <- RMSE %>% dplyr::select(-c(1:2)) %>% colMeans() %>% round(digits = 3)

Visualisation

##-- Plotting

GAM.df <- as.data.frame(ras.mgcv, xy = TRUE)  %>% rename(X = "x", Y = "y") %>% 
  reshape2::melt(id = c("X", "Y"), variable.name = "Hour", value.name = "NO2") 

GAM.df %>% 
  ggplot() +
  geom_tile(aes(x = X, y = Y, fill = NO2)) +
  scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
  geom_contour(aes(x = X, y = Y, z = NO2),bins = 20, colour = "grey40", alpha = 0.7) +
  geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
  geom_text(data = stat, aes(x = 187000,  y = 434000, label = paste0("mean = " , mean)), size = 3) + 
  geom_text(data = stat, aes(x = 184000,  y = 430500, label = paste0("sd = " , sd)), size = 3) + 
  geom_text(data = stat, aes(x = 188900,  y = 469000, label = paste0("RMSE=" , rmse)), size = 3) + 
  facet_wrap(~ Hour, ncol = 8) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> GAM # 1200 x 550 

# Export PNG
png("GAM.png", width=1200, height=550, res=100)
GAM
dev.off()



#####################
#--Resample Raster--#
#####################
ras.road <- raster("../seoul/road_10km_re.tif")  # Import raster
res.mgcv <- resample(ras.mgcv, ras.road, method = "bilinear") # resample 
res.mgcv <- merge(ras.road, res.mgcv) # merge

# assign road
road_01 = road_02 = road_03 = road_04 = road_05 = 
  road_06 = road_07 = road_08 = road_09 = road_10 =
  road_11 = road_12 = road_13 = road_14 = road_15 = 
  road_16 = road_17 = road_18 = road_19 = road_20 = ras.road
# stack raster and remove individual raster files
road.stack <- stack(road_01, road_02, road_03, road_04, road_05, 
                    road_06, road_07, road_08, road_09, road_10,
                    road_11, road_12, road_13, road_14, road_15, 
                    road_16, road_17, road_18, road_19, road_20
)
rm(road_01, road_02, road_03, road_04, road_05, 
   road_06, road_07, road_08, road_09, road_10,
   road_11, road_12, road_13, road_14, road_15, 
   road_16, road_17, road_18, road_19, road_20
)

# add road ratio values to GAM raster
ratio.top.jan <- ratio.no2.win[1:20,]

for(i in 1:20){
  #road.stack[[i]] <- road.stack[[i]] * ratio.no2.sum$ratio[i]
  values(road.stack)[values(road.stack[[i]]) == 1] <- ratio.top.jan$Back.Road.Ratio[i]
  values(road.stack)[values(road.stack[[i]]) == 2] <- ratio.top.jan$Back.High.Ratio[i]
}

# add no2 and road values
r.poll.rd <- overlay(res.mgcv, road.stack, fun = function(x,y){ifelse(y != 0, x*y, x)})
names(r.poll.rd) <- c('jan01d', 'jan01n','jan02d', 'jan02n','jan03d', 'jan03n','jan04d', 'jan04n', 'jan05d', 'jan05n', 'jan06d', 'jan06n', 'jan07d', 'jan07n','jan08d', 'jan08n','jan09d', 'jan09n', 'jan10d', 'jan10n')

#####################
#ras.mgcv.df <- as.data.frame(r.poll.rd, xy = TRUE) # easy way
# however, since we resampled and changed our data
# with different resolution imamges and extent, the easier way doesn't work
ras <- xyFromCell(r.poll.rd, 1:ncell(r.poll.rd))
GAM.rd <- as.data.frame(r.poll.rd) 

##-- Find Mean and variance

ras.krige.stat <- data.frame(ras, GAM.rd)

stat1 <- ras.krige.stat %>% dplyr::select(-c(x,y)) %>% 
  gather(factor_key = T) %>% 
  group_by(key) %>% summarise(mean= round(mean(value),1), sd= round(sd(value),1), max = max(value),min = min(value)) %>% 
  rename(Hour = key)

#####
ras.GAM.rd <- data.frame(ras, GAM.rd) %>% 
  reshape2::melt(id = c("x", "y"), variable.name = "Hour", value.name = "NO2") 

ras.GAM.rd %>% 
  ggplot() +
  geom_tile(aes(x = x, y = y, fill = NO2)) +
  scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
  geom_text(data = stat1, aes(x = 187000,  y = 434000, label = paste0("mean = " , mean)), size = 3) + 
  geom_text(data = stat1, aes(x = 184000,  y = 430500, label = paste0("sd = " , sd)), size = 3) + 
  geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
  facet_wrap(~ Hour, ncol = 8) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> final


# Export PNG
png("GAM_final.png", width=1200, height=550, res=100)
final
dev.off()

Validation

##############
#--Extract--##
##############
# Attributes of Monitoring stations
back_df <- stations_df %>% 
  filter(Name %in% c('Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam')) %>% 
  dplyr::select(-c(Long, Lat, Province, City, `F/R`, Road_Dist, DEM))
road_df <- stations %>% filter(Name %in% c('Hangang-daero', 'Yeongdeungpo', 'Gangbyeonbuk-ro')) 

# Spatial Points
back <- top.jan[c(2,8,17,18,42,48),] #'Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam'
colnames(back@data) <- c("jan01d", "jan01n", "jan02d", "jan02n","jan03d", "jan03n", "jan04d", "jan04n", "jan05d", "jan05n", "jan06d", "jan06n", "jan07d", "jan07n", "jan08d", "jan08n", "jan09d", "jan09n", "jan10d", "jan10n")

# Silim Coordinates: 195793.5 442361.5
# Gwanakgu Office location: 193837.2 442737.9

# Final
back_pred <- back_df %>% cbind(extract(r.poll.rd, back))
back_obs <- back_df %>% cbind(back@data)


# Plot
plot_back_pred <- reshape2::melt(back_pred, 
                                 id = c("X","Y","Station","Name"), 
                                 variable.name = "Type", 
                                 value.name = "Value") %>% 
                  mutate(Group = "pred")

plot_back_obs <- reshape2::melt(back_obs, 
                                id = c("X","Y","Station","Name"), 
                                variable.name = "Type", 
                                value.name = "Value")%>% 
                  mutate(Group = "obs")
plot_back_fin <- rbind(plot_back_pred, plot_back_obs)



plot_back_fin %>% 
  ggplot(aes(x = Type, y = Value, group = Group)) +
  geom_line(aes(linetype = Group), size = 1, col = "grey") +
  geom_point(aes(colour = Name), size =2) +
  ylab("Concentration(ppb)") +
  facet_wrap(~ Name) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size = 12, angle = 90),
        axis.text.y=element_text(size = 12),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> graphs
  
# Export Graph
png("Pred_Obs.png", width=1200, height=550, res=100)
graphs
dev.off()

Clone this wiki locally