######################################################################################## ################# OPTIMISE PARAMETERS FOR OUTLIER DETECTION METHODS ###################### ######################################################################################## # This script contains functions for parameter optimisation of the outlier # detection methods. # INPUTS: time series (x), outlier locations in the time series (x_out), # arrays of parameters per method # OUTPUTS: list with (a) data.frame with arrays of Jaccard's coefficient, outlier count, # used parameters and (b) list of outlier ids #### define working environment #### #setwd("D:/ETC-ACM/Tasks/1022_3/R/finalisedScripts") ########################################################### ################ WHOLE WINDOW STATISTICS ################# ########################################################### mw.jac <- function(x, x_out, statistics=c("mean","median"), q, factor.sd.glob, factor.sd.loc, tolerance=5){ jac = NULL stats = rep(statistics,length(q)*2*length(factor.sd.glob)) q_val = rep(q, each=2*length(factor.sd.glob)*2) thr_val = NULL thr_method = rep(rep(c("sd_global", "sd_local"), each=length(statistics)*length(factor.sd.glob)), length(q)) count = NULL id_list = NULL # loop through parameters for(window in q){ # get moving window values mw = mw.values(x,window) cat("window size:", window, "\n") # Get differences for each point from mean or median in the window diffs.mean = mw.diff(mw, mean) diffs.median = mw.diff(mw, median) for(s in factor.sd.glob){ mw.mean_out = mw.outliers.sd(x, window, mean, s, diffs.mean) mw.median_out = mw.outliers.sd(x, window, median, s, diffs.median) jac = c(jac, c(jaccard(x, mw.mean_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw.median_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, s, s) count = c(count, length(mw.mean_out), length(mw.median_out)) id_list = c(id_list, list(mw.mean_out, mw.median_out)) } for(s in factor.sd.loc){ mw.mean_out = mw.outliers.sd.local(x, window, mean, s, diffs.mean, mw) mw.median_out = mw.outliers.sd.local(x, window, median, s, diffs.median, mw) jac = c(jac, c(jaccard(x, mw.mean_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw.median_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, s, s) count = c(count, length(mw.mean_out), length(mw.median_out)) id_list = c(id_list, list(mw.mean_out, mw.median_out)) } } mw_jac = data.frame(jac=jac, count=count, stats=stats, q=q_val, thr=thr_val, thr_method=thr_method) return(list(mw_jac,id_list)) } ############################################################### ################ TWO SIDED WINDOW STATISTICS ################# ############################################################### mw2.jac <- function(x, x_out, statistics=c("t.test", "mean", "median", "min", "max", "var", "q05", "q25", "q75", "q95"), q, factor.sd, prob, tolerance=5){ jac = NULL stats = NULL q_val = NULL thr_val = NULL thr_method = NULL count = NULL id_list = NULL for(window in q){ # get moving window values mw = mw.values(x,window) cat("window size:", window, "\n") for(p in prob){ mw2.t_out = mw2.outliers.t.test(x, window, p, mw) jac = c(jac,c(jaccard(x, mw2.t_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, p) stats = c(stats, "t.test") thr_method = c(thr_method, "t.test") q_val=c(q_val, window) count = c(count, length(mw2.t_out)) id_list = c(id_list, list(mw2.t_out)) } for(s in factor.sd){ mw2.mean_out = mw2.outliers.sd(x, window, mean, s, mw) mw2.median_out = mw2.outliers.sd(x, window, median, s, mw) mw2.min_out = mw2.outliers.sd(x, window, min, s, mw) mw2.max_out = mw2.outliers.sd(x, window, max, s, mw) mw2.var_out = mw2.outliers.sd(x, window, sd, s, mw) mw2.quants_out = mw2.outliers.sd(x, window, c(0.05, 0.25, 0.75, 0.95), s, mw) jac = c(jac, c(jaccard(x, mw2.mean_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.median_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.min_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.max_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.var_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.quants_out[[1]], x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.quants_out[[2]], x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.quants_out[[3]], x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, mw2.quants_out[[4]], x_out, tolerance=tolerance))) thr_val = c(thr_val, rep(s,9)) stats = c(stats, statistics[-1]) thr_method = c(thr_method, rep("sd",9)) q_val=c(q_val, rep(window,9)) count = c(count, length(mw2.mean_out), length(mw2.median_out), length(mw2.min_out), length(mw2.max_out), length(mw2.var_out), length(mw2.quants_out[[1]]), length(mw2.quants_out[[2]]), length(mw2.quants_out[[3]]), length(mw2.quants_out[[4]])) id_list = c(id_list, list(mw2.mean_out, mw2.median_out, mw2.min_out, mw2.max_out, mw2.var_out), mw2.quants_out) } } mw2_jac = data.frame(jac=jac, count=count, stats=stats, q=q_val, thr=thr_val, thr_method=thr_method) return(list(mw2_jac,id_list)) } ############################################### ################ LAG 1 DIFFS ################# ############################################### lag1Diff.jac <- function(x, x_out, type="all", alpha=NULL, beta=NULL, gamma=NULL, tolerance=5){ library(VGAM) jac = NULL stats = NULL q_val = NULL thr_val = NULL thr_method = NULL count = NULL id_list = NULL for(window in q){ # get moving window values mw.lag1 = lag1.values(x,window) cat("window size:", window, "\n") if(type=="all"){ for(i in 1:length(gamma)){ lag1cNorm_out = lag1.outliers.all(x, window, qnorm, c(alpha[i], beta[i], gamma[i]), mw.lag1) lag1cLapl_out = lag1.outliers.all(x, window, qlaplace, c(alpha[i], beta[i], gamma[i]), mw.lag1) for(j in 1:3){ jac = c(jac,c(jaccard(x, lag1cNorm_out[j][[1]], x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, lag1cLapl_out[j][[1]], x_out, tolerance=tolerance))) count = c(count, length(lag1cNorm_out[j]), length(lag1cLapl_out[j])) id_list = c(id_list, list(lag1cNorm_out[j], lag1cLapl_out[j])) } thr_val = c(thr_val, alpha[i], alpha[i], beta[i], beta[i], gamma[i], gamma[i]) stats = c(stats, "normal", "laplace", "normal", "laplace", "normal", "laplace") thr_method = c(thr_method, "a", "a", "b", "b", "c", "c") q_val=c(q_val, rep(window, 6)) } } if(type=="I"){ for(c in gamma){ # case c - peak lag1cNorm_out = lag1.outliers(x, window, qnorm, c, "c", mw.lag1) lag1cLapl_out = lag1.outliers(x, window, qlaplace, c, "c", mw.lag1) jac = c(jac,c(jaccard(x, lag1cNorm_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, lag1cLapl_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, c, c) stats = c(stats, "normal", "laplace") thr_method = c(thr_method, "c", "c") q_val=c(q_val, window, window) count = c(count, length(lag1cNorm_out), length(lag1cLapl_out)) id_list = c(id_list, list(lag1cNorm_out, lag1cLapl_out)) } } if(type=="II"){ for(a in alpha){ # case a - large growth or decline of concentration lag1aNorm_out = lag1.outliers(x, window, qnorm, a, "a", mw.lag1) lag1aLapl_out = lag1.outliers(x, window, qlaplace, a, "a", mw.lag1) jac = c(jac,c(jaccard(x, lag1aNorm_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, lag1aLapl_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, a, a) stats = c(stats, "normal", "laplace") thr_method = c(thr_method, "a", "a") q_val=c(q_val, window, window) count = c(count, length(lag1aNorm_out), length(lag1aLapl_out)) id_list = c(id_list, list(lag1aNorm_out, lag1aLapl_out)) } for(b in beta){ # case b - ongoing increase or decline of concentration lag1bNorm_out = lag1.outliers(x, window, qnorm, b, "b", mw.lag1) lag1bLapl_out = lag1.outliers(x, window, qlaplace, b, "b", mw.lag1) jac = c(jac,c(jaccard(x, lag1bNorm_out, x_out, tolerance=tolerance))) jac = c(jac,c(jaccard(x, lag1bLapl_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, b, b) stats = c(stats, "normal", "laplace") thr_method = c(thr_method, "b", "b") q_val=c(q_val, window, window) count = c(count, length(lag1bNorm_out), length(lag1bLapl_out)) id_list = c(id_list, list(lag1bNorm_out, lag1bLapl_out)) } } } lag1Diff_jac = data.frame(jac=jac, count=count, stats=stats, q=q_val, thr=thr_val, thr_method=thr_method) return(list(lag1Diff_jac, id_list)) } #################################################### ################# MA FILTER ###################### #################################################### kz.jac <- function(x, x_out, adaptive=F, k=1, quantil, lim, tolerance=5){ jac = NULL stats = NULL q_val = NULL thr_val = NULL thr_method = NULL k_val = NULL count = NULL id_list = NULL for(window in q){ cat("window size:", window, "\n") for(it in k){ if(adaptive){ for(qu in quantil){ kza_out = kz.outliers(x, window, it, TRUE, qu) jac = c(jac,c(jaccard(x, kza_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, qu) stats = c(stats, "adaptive") thr_method = c(thr_method, "maxQuantile") q_val=c(q_val, window) k_val=c(k_val, it) count = c(count, length(kza_out)) id_list = c(id_list, list(kza_out)) } }else{ for(qu in quantil){ kza_out = kz.outliers(x, window, it, FALSE, qu) jac = c(jac,c(jaccard(x, kza_out, x_out, tolerance=tolerance))) thr_val = c(thr_val, qu) stats = c(stats, "nonadaptive") thr_method = c(thr_method, "maxQuantile") q_val=c(q_val, window) k_val=c(k_val, it) count = c(count, length(kza_out)) id_list = c(id_list, list(kza_out)) } } } } kza_jac = data.frame(jac=jac, count=count, stats=stats, q=q_val, thr=thr_val, thr_method=thr_method, k=k_val) return(list(kza_jac, id_list)) } # Save data save.image("optimisation_methods.RData")