-
Notifications
You must be signed in to change notification settings - Fork 0
1. Code Tutorial: GAM
Hyesop edited this page Jun 21, 2019
·
1 revision
This code is a single R sourcecode but splitted for the reader's convenience.
Here, we use 5 packages tidyverse, sf, raster, mgcv, and gstat. Wow, really? Of course, that is it.
The next step is to load the pre-cleaned NO2 then merge them all to station points.
options(scipen = 100) # extend the system's decimal points to a hundred
memory.size() # check memory size: Windows-specific
memory.limit(99999) # expand memory size: Windows-specific
library(tidyverse) # data tidy and visualisation
library(sf) # geospatial analysis: Vector
library(raster) # geospatial analysis: Raster
library(mgcv) # GAM
library(gstat) # Kriging
load("../RData/no2.RData")
rm(list=setdiff(ls(), c("no2.win.12hr", "ratio.no2.win")))
stations <- read_sf("../seoul/stations_10km.shp")
stations_df <- stations %>% st_set_geometry(NULL)
seoul <- read_sf("../seoul/Seoul_City.shp") %>% as('Spatial') %>% fortify()
no2.winter <- merge(no2.win.12hr, stations_df, by.x = c("Station.ID", "X", "Y"), by.y = c("Station", "X", "Y"))#JANUARY GAM MODEL
jan01d <- gam(no2_1_1_day ~ s(X,Y), data = no2.winter, method = "REML")
jan02d <- gam(no2_1_2_day ~ s(X,Y), data = no2.winter, method = "REML")
jan03d <- gam(no2_1_3_day ~ te(X,Y), data = no2.winter, method = "REML")
jan04d <- gam(no2_1_4_day ~ s(X,Y), data = no2.winter, method = "REML")
jan05d <- gam(no2_1_5_day ~ s(X,Y), data = no2.winter, method = "REML")
jan06d <- gam(no2_1_6_day ~ s(X,Y), data = no2.winter, method = "REML")
jan07d <- gam(no2_1_7_day ~ s(X,Y), data = no2.winter, method = "REML")
jan08d <- gam(no2_1_8_day ~ s(X,Y), data = no2.winter, method = "REML")
jan09d <- gam(no2_1_9_day ~ s(X,Y), data = no2.winter, method = "REML")
jan10d <- gam(no2_1_10_day ~ s(X,Y), data = no2.winter, method = "REML")
jan01n <- gam(no2_1_1_night ~ s(X,Y), data = no2.winter, method = "REML")
jan02n <- gam(no2_1_2_night ~ te(X,Y), data = no2.winter, method = "REML")
jan03n <- gam(no2_1_3_night ~ s(X,Y), data = no2.winter, method = "REML")
jan04n <- gam(no2_1_4_night ~ s(X,Y), data = no2.winter, method = "REML")
jan05n <- gam(no2_1_5_night ~ s(X,Y), data = no2.winter, method = "REML")
jan06n <- gam(no2_1_6_night ~ s(X,Y), data = no2.winter, method = "REML")
jan07n <- gam(no2_1_7_night ~ s(X,Y), data = no2.winter, method = "REML")
jan08n <- gam(no2_1_8_night ~ s(X,Y), data = no2.winter, method = "REML")
jan09n <- gam(no2_1_9_night ~ s(X,Y), data = no2.winter, method = "REML")
jan10n <- gam(no2_1_10_night ~ s(X,Y), data = no2.winter, method = "REML")
###############
#--Create DF--#
###############
jan_mgcv <- data.frame(expand.grid(X = seq(min(no2.winter$X), max(no2.winter$X), length=200),
Y = seq(min(no2.winter$Y), max(no2.winter$Y), length=200)))
#############
#--Predict--#
#############
jan_mgcv$jan01d<- jan01d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan01n<- jan01n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan02d<- jan02d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan02n<- jan02n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan03d<- jan03d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan03n<- jan03n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan04d<- jan04d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan04n<- jan04n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan05d<- jan05d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan05n<- jan05n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan06d<- jan06d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan06n<- jan06n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan07d<- jan07d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan07n<- jan07n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan08d<- jan08d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan08n<- jan08n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan09d<- jan09d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan09n<- jan09n %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan10d<- jan10d %>% predict(jan_mgcv, type = "response")
jan_mgcv$jan10n<- jan10n %>% predict(jan_mgcv, type = "response")
##-- Find Mean and variance
stat <- jan_mgcv %>% dplyr::select(-c(X,Y)) %>%
gather(factor_key = T) %>%
group_by(key) %>%
summarise(mean= round(mean(value),1),
median= round(median(value),1),
sd= round(sd(value),1),
max = round(max(value),2),
min = round(min(value),2)) %>%
rename(Hour = key)
which( colnames(no2.winter)=="no2_1_10_day") # Find first column
top.jan <- no2.winter[,c(2:3, 5:24)]
coordinates(top.jan) <- ~X+Y
proj4string(top.jan) <- CRS("+init=epsg:5181")
ras.mgcv <- rasterFromXYZ(jan_mgcv)
crs(ras.mgcv) <- CRS('+init=epsg:5181')
########
#-RMSE-#
########
obs <- as.data.frame(top.jan) %>% dplyr::select(X,Y,everything())
pred.df <- data.frame(X = obs$X, Y = obs$Y)
RMSE <- data.frame(X = obs$X, Y = obs$Y)
# for loop
for(i in 1:20){
pred.df[,i+2] <- extract(ras.mgcv[[i]], top.jan)
RMSE[,i+2] <- sqrt(abs(pred.df[,i+2] - obs[,i+2]))^2
}
colnames(RMSE) <- colnames(obs)
RMSE %>% dplyr::select(-c(1:2)) %>% as.matrix() %>% mean()
stat$rmse <- RMSE %>% dplyr::select(-c(1:2)) %>% colMeans() %>% round(digits = 3)##-- Plotting
GAM.df <- as.data.frame(ras.mgcv, xy = TRUE) %>% rename(X = "x", Y = "y") %>%
reshape2::melt(id = c("X", "Y"), variable.name = "Hour", value.name = "NO2")
GAM.df %>%
ggplot() +
geom_tile(aes(x = X, y = Y, fill = NO2)) +
scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
geom_contour(aes(x = X, y = Y, z = NO2),bins = 20, colour = "grey40", alpha = 0.7) +
geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
geom_text(data = stat, aes(x = 187000, y = 434000, label = paste0("mean = " , mean)), size = 3) +
geom_text(data = stat, aes(x = 184000, y = 430500, label = paste0("sd = " , sd)), size = 3) +
geom_text(data = stat, aes(x = 188900, y = 469000, label = paste0("RMSE=" , rmse)), size = 3) +
facet_wrap(~ Hour, ncol = 8) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> GAM # 1200 x 550
# Export PNG
png("GAM.png", width=1200, height=550, res=100)
GAM
dev.off()
#####################
#--Resample Raster--#
#####################
ras.road <- raster("../seoul/road_10km_re.tif") # Import raster
res.mgcv <- resample(ras.mgcv, ras.road, method = "bilinear") # resample
res.mgcv <- merge(ras.road, res.mgcv) # merge
# assign road
road_01 = road_02 = road_03 = road_04 = road_05 =
road_06 = road_07 = road_08 = road_09 = road_10 =
road_11 = road_12 = road_13 = road_14 = road_15 =
road_16 = road_17 = road_18 = road_19 = road_20 = ras.road
# stack raster and remove individual raster files
road.stack <- stack(road_01, road_02, road_03, road_04, road_05,
road_06, road_07, road_08, road_09, road_10,
road_11, road_12, road_13, road_14, road_15,
road_16, road_17, road_18, road_19, road_20
)
rm(road_01, road_02, road_03, road_04, road_05,
road_06, road_07, road_08, road_09, road_10,
road_11, road_12, road_13, road_14, road_15,
road_16, road_17, road_18, road_19, road_20
)
# add road ratio values to GAM raster
ratio.top.jan <- ratio.no2.win[1:20,]
for(i in 1:20){
#road.stack[[i]] <- road.stack[[i]] * ratio.no2.sum$ratio[i]
values(road.stack)[values(road.stack[[i]]) == 1] <- ratio.top.jan$Back.Road.Ratio[i]
values(road.stack)[values(road.stack[[i]]) == 2] <- ratio.top.jan$Back.High.Ratio[i]
}
# add no2 and road values
r.poll.rd <- overlay(res.mgcv, road.stack, fun = function(x,y){ifelse(y != 0, x*y, x)})
names(r.poll.rd) <- c('jan01d', 'jan01n','jan02d', 'jan02n','jan03d', 'jan03n','jan04d', 'jan04n', 'jan05d', 'jan05n', 'jan06d', 'jan06n', 'jan07d', 'jan07n','jan08d', 'jan08n','jan09d', 'jan09n', 'jan10d', 'jan10n')
#####################
#ras.mgcv.df <- as.data.frame(r.poll.rd, xy = TRUE) # easy way
# however, since we resampled and changed our data
# with different resolution imamges and extent, the easier way doesn't work
ras <- xyFromCell(r.poll.rd, 1:ncell(r.poll.rd))
GAM.rd <- as.data.frame(r.poll.rd)
##-- Find Mean and variance
ras.krige.stat <- data.frame(ras, GAM.rd)
stat1 <- ras.krige.stat %>% dplyr::select(-c(x,y)) %>%
gather(factor_key = T) %>%
group_by(key) %>% summarise(mean= round(mean(value),1), sd= round(sd(value),1), max = max(value),min = min(value)) %>%
rename(Hour = key)
#####
ras.GAM.rd <- data.frame(ras, GAM.rd) %>%
reshape2::melt(id = c("x", "y"), variable.name = "Hour", value.name = "NO2")
ras.GAM.rd %>%
ggplot() +
geom_tile(aes(x = x, y = y, fill = NO2)) +
scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
geom_text(data = stat1, aes(x = 187000, y = 434000, label = paste0("mean = " , mean)), size = 3) +
geom_text(data = stat1, aes(x = 184000, y = 430500, label = paste0("sd = " , sd)), size = 3) +
geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
facet_wrap(~ Hour, ncol = 8) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> final
# Export PNG
png("GAM_final.png", width=1200, height=550, res=100)
final
dev.off()##############
#--Extract--##
##############
# Attributes of Monitoring stations
back_df <- stations_df %>%
filter(Name %in% c('Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam')) %>%
dplyr::select(-c(Long, Lat, Province, City, `F/R`, Road_Dist, DEM))
road_df <- stations %>% filter(Name %in% c('Hangang-daero', 'Yeongdeungpo', 'Gangbyeonbuk-ro'))
# Spatial Points
back <- top.jan[c(2,8,17,18,42,48),] #'Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam'
colnames(back@data) <- c("jan01d", "jan01n", "jan02d", "jan02n","jan03d", "jan03n", "jan04d", "jan04n", "jan05d", "jan05n", "jan06d", "jan06n", "jan07d", "jan07n", "jan08d", "jan08n", "jan09d", "jan09n", "jan10d", "jan10n")
# Silim Coordinates: 195793.5 442361.5
# Gwanakgu Office location: 193837.2 442737.9
# Final
back_pred <- back_df %>% cbind(extract(r.poll.rd, back))
back_obs <- back_df %>% cbind(back@data)
# Plot
plot_back_pred <- reshape2::melt(back_pred,
id = c("X","Y","Station","Name"),
variable.name = "Type",
value.name = "Value") %>%
mutate(Group = "pred")
plot_back_obs <- reshape2::melt(back_obs,
id = c("X","Y","Station","Name"),
variable.name = "Type",
value.name = "Value")%>%
mutate(Group = "obs")
plot_back_fin <- rbind(plot_back_pred, plot_back_obs)
plot_back_fin %>%
ggplot(aes(x = Type, y = Value, group = Group)) +
geom_line(aes(linetype = Group), size = 1, col = "grey") +
geom_point(aes(colour = Name), size =2) +
ylab("Concentration(ppb)") +
facet_wrap(~ Name) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_text(size = 12, angle = 90),
axis.text.y=element_text(size = 12),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> graphs
# Export Graph
png("Pred_Obs.png", width=1200, height=550, res=100)
graphs
dev.off()