# ------------------------------------------------------------------------
# DATABASE FUNCTIONS
# description: functions for PostgreSQL databases: connection, data 
# extraction, data formatting following a specific table, etc
# author : Juliette Fabre / OSU OREME
# creation : 11/2012
# last update : 12/2020
# ------------------------------------------------------------------------

library(RPostgreSQL)

# Connect to a PostgreSQL database by providing connection parameters
#
# Arguments:
# - dbname: name of the database
# - user: user login
# - password: user password
# - host: name of the host or IP address
# - port
#
# Values:
# - drv: driver object
# - con: PostgreSQL connection

connect_db <- function(dbname, user, password, host, port)
{
	drv <- dbDriver("PostgreSQL")
  con <- dbConnect(drv, dbname = dbname, user = user, password = password, host = host, port = port)	
	return(list(drv = drv, con = con))
}

# Disconnect from a database
#
# Argument: 
# - connexion: object resulting from the connect_db function

disconnect_db <- function(connexion)
{
	dbDisconnect(connexion$con)
	dbUnloadDriver(connexion$drv)
}

# Connect to a database via RODBC and set to a specific schema
#
# Argument: 
# - schema: name of the schema, optional, default "public"
#
# Values:
# - object resulting from the odbcConnect function

connect_db_rodbc <- function(schema = 'public')
{	
  library(RODBC)
	channel <- odbcConnect("", uid = "", pwd = "")
	sqlQuery(channel, paste("SET search_path TO ", add_quotes(schema), sep = ""))
	return(channel)
}

# Connect to a PostgreSQL database by providing connection parameters and using a pool to optimize DB access
#
# Arguments:
# - dbname: name of the database
# - user: user login
# - password: user password
# - host: name of the host or IP address
# - port
#
# Values:
# - drv: driver object
# - con: Pool object

connect_db_pool <- function(dbname, user, password, host, port)
{
  library(pool)
  pool <- dbPool(
    drv = RPostgreSQL::PostgreSQL(),
    dbname = dbname,
    host = host,
    port = port,
    user = user,
    password = password,
    minSize = 1,
    maxSize = Inf,    # default
    idleTimeout = 600000 
  )
  drv <- dbDriver("PostgreSQL")	
  return(list(con=pool,drv=drv))
}

# Return the values of given fields from a given table
#
# Arguments:
# - fields: vector of fields to select, or "*" for all fields
# - table: name of the table
# - schema: name of the schema, optional, default "public"
# - con: object resulting from the connect_db function
#
# Values:
# - data_frame or vector with there is only one selected field

get_db_values <- function(fields, table, schema = "public", con)
{
  if(! (length(fields) == 1 && fields == "*")) fields2 <- add_quotes(fields) else fields2 <- fields
  req <- paste("SELECT ", paste(fields2, collapse = ", "), " FROM ", add_quotes(schema), ".", add_quotes(table), sep = "")
  if(length(fields) > 1 || fields == "*") values <- dbGetQuery(con, req) else values <- dbGetQuery(con, req)[[fields]]
  return(values)
}

# Return the field names of a given table
#
# Arguments:
# - table: name of the table
# - schema: name of the schema, optional, default "public"
# - con: object resulting from the connect_db function
#
# Values:
# - vector

get_db_colnames <- function(table, schema = "public", con)
{
	req <- paste("SELECT column_name FROM information_schema.columns WHERE table_schema = '", schema, "' AND table_name = '", table, "'", sep = "")
	names <- dbGetQuery(con, req)$column_name
	return(names)
}

# Reorganize columns of a dataframe depending on the field names of a given table
#
# Arguments:
# - table: name of the table
# - schema: name of the schema, optional, default "public"
# - con: object resulting from the connect_db function
#
# Values:
# - dataframe

order_db_colnames <- function(data, table, schema = "public", con) 
{
	data <- data[, get_db_colnames(table, schema, con)]
	return(data)
}

# Add to a dataframe the columns that appear in a given vector of names and that are missing in the dataframe
#
# Arguments:
# - data : dataframe
# - vector of names that should be checked in dataframe and added if missing
# - missing_val : value of the column, default NA 
# Values:
# - dataframe

add_missing_col <- function(data, x, missing_val = NA)
{
  missing_col <- x[!x %in% names(data)]
  for(col in missing_col) data[[col]] <- missing_val
  return(data)
}