# ------------------------------------------------------------------------
# TEST OF UTILITY R FUNCTIONS
# author : Juliette Fabre / OSU OREME
# creation : 06/2012
# last update : 12/2020
# ------------------------------------------------------------------------

# Reads the Excel file file_test.xls and checks the different validation rules:
# 
# Sheet 'Study' :
# STUDY: provided, unique and 20 characters max
# SPONSOR: provided, exist in the database
#
# Sheet 'Sample' :
# SAMPLE: provided, unique, 10 characters max
# SAMPLE_DATE: date-formatted, years are accepted
# STUDY: must be listed in the 'Study' sheet
# HYDRO_ENTITY_NAME: provided, 200 characters max, with a unique HYDRO_ENTITY_CODE for each hydro_entity_name
# HYDRO_ENTITY_CODE: 8 characters exactly
# X and Y: numeric

file_name <- "test_file.xls"

# Source functions
source("oreme_data_checking_v1.5.R")
source("oreme_tools_v1.2.R")
source("oreme_db_v1.4.R")

# Connect to database
connexion <- db_connect('MY_BASE', 'user', 'password', 'xxx.xx.xxx.xx')
con <- connexion$con

# Message containing errors
error_msg <- ""


# ------------------------------------------------------------------
# IMPORT DATA
# ------------------------------------------------------------------

library(XLConnect)
wb <- loadWorkbook(file_name)

# Read sheets
study <- readWorksheet(wb, sheet = "Study", startCol = 1)  
sample <- readWorksheet(wb, sheet = "Sample", startCol = 1)
names(study) <- tolower(names(study))
names(sample) <- tolower(names(sample))


# ----------------------------------------------------------
# CHECK DATA IN Study SHEET
# ----------------------------------------------------------

# study: provided, unique and 20 characters max
study$study <- trim(study$study)

check <- check_no_missing_value(col = "study", sheet = "Study", data = study)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

check <- check_nb_character(col = "study", sheet = "Study", data = study, nbchar = 20)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

check <- check_unicity(col = "study", sheet = "Study", data = study)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

# sponsor: provided, exist in the database (schema test, table sponsor, field sponsor_name)
study$sponsor <- trim(study$sponsor)

check <- check_no_missing_value(col = "sponsor", sheet = "Study", data = study)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

sponsor_base <- get_db_values("sponsor_name", "sponsor", "test", con) 
check <- check_belong(col = "sponsor", sheet = "Study", data = study, value_set = sponsor_base, value_description = "la base de données")
if(!check$res) error_msg <- paste0(error_msg, check$msg)



# ----------------------------------------------------------
# CHECK DATA IN Sample SHEET
# ----------------------------------------------------------

# sample: provided, unique, 10 characters max
sample$sample <- trim(sample$sample)

check <- check_no_missing_value(col = "sample", sheet = "Sample", data = sample)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

check <- check_unicity(col = "sample", sheet = "Sample", data = sample)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

check <- check_nb_character(col = "sample", sheet = "Sample", data = sample, nbchar = 10)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

# sample_date: date-formatted (years are accepted)
check <- check_date(col = "sample_date", sheet = "Sample", data = sample, year_accepted = T)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

# study: must be listed in the Study sheet
check <- check_belong(col = "study", sheet = "Sample", data = sample, value_set = study$study, value_description = "la feuille 'Study'")
if(!check$res) error_msg <- paste0(error_msg, check$msg)

# hydro_entity_name: provided, 200 characters max, with a unique hydro_entity_code for each hydro_entity_name
sample$hydro_entity_name <- trim(sample$hydro_entity_name)

check <- check_no_missing_value(col = "hydro_entity_name", sheet = "Sample", data = sample)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

check <- check_nb_character(col = "hydro_entity_name", sheet = "Sample", data = sample, nbchar = 200)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

hydro_entity <- unique(sample[, c("hydro_entity_name", "hydro_entity_code")])
check <- check_unicity(col = "hydro_entity_name", sheet = "Sample", data = hydro_entity, check_consistency = T)
if(!check$res) error_msg <- paste0(error_msg, check$msg)

# hydro_entity_code: 8 characters exactly
check <- check_nb_character(col = "hydro_entity_code", sheet = "Sample", data = sample, nbchar = 8, test = "equal")
if(!check$res) error_msg <- paste0(error_msg, check$msg)

# x and y: numeric
check <- check_numeric(col = "x", sheet = "Sample", data = sample)
if(!check$res) error_msg <- paste0(error_msg, check$msg)
check <- check_numeric(col = "y", sheet = "Sample", data = sample)
if(!check$res) error_msg <- paste0(error_msg, check$msg)


# --------------
# EXPORT RESULTS
# --------------

if(nchar(trim(error_msg)))
{
  cat("Le fichier n'est pas validé, voir le fichier erreur.txt\n\n") 
  write.table(error_msg, 'erreur.txt', col.names = F, quote = F, row.names = F)
} else cat("Fichier validé\n")


# DISCONNECTION
disc <- disconnect_db(connexion)

