#!/usr/bin/env bash
#
# sh RLHealthCheck.sh /data/radiantone/vds
# RadiantOne healthcheck and data collection script (Linux version)
#

#set -euo pipefail

# --- Parameters --------------------------------------------------------------

RLI_HOME="${1:-/data/radiantone/vds}"   # pass as first arg or edit default

if [ ! -d "$RLI_HOME" ]; then
  echo "ERROR: RLI_HOME path '$RLI_HOME' does not exist." >&2
  exit 1
fi

export RLI_HOME_TMP="$RLI_HOME"

HOSTNAME_FQDN="$(hostname -f 2>/dev/null || hostname)"
TIMESTAMP="$(date +'%Y%m%d_%H%M%S')"

HEALTHCHECK_DIR="${RLI_HOME}/RLHealthCheck_${HOSTNAME_FQDN}_${TIMESTAMP}"
LOG_DIR="${HEALTHCHECK_DIR}/logs"
mkdir -p "$HEALTHCHECK_DIR" "$LOG_DIR"

LOG_FILE="${LOG_DIR}/RLHealthCheck_${HOSTNAME_FQDN}_${TIMESTAMP}.log"

# Redirect all stdout/stderr to log and console (tee)
# exec > >(tee -a "$LOG_FILE") 2>&1

echo "Starting RadiantOne healthcheck for ${HOSTNAME_FQDN} at ${TIMESTAMP}"
echo "RLI_HOME = ${RLI_HOME}"
echo "Healthcheck directory = ${HEALTHCHECK_DIR}"
echo "Log file = ${LOG_FILE}"

# --- Helper: run external command with logging ------------------------------

run_logged_cmd() {
  local desc="$1"
  local workdir="$2"
  local cmd="$3"

  echo
  echo ">>> ${desc}"
  echo "    Working dir: ${workdir}"
  echo "    Command    : ${cmd}"

  (
    cd "$workdir"
    # shellcheck disable=SC2086
    if eval "$cmd"; then
      echo "    Status     : SUCCESS"
    else
      echo "    Status     : FAILED (exit code $?)"
    fi
  )
}

# --- CPU and memory info ----------------------------------------------------

echo
echo "[1] Collecting CPU and memory information..."

CPU_INFO_FILE="${HEALTHCHECK_DIR}/CPU-info.txt"
TASKMGR_INFO_FILE="${HEALTHCHECK_DIR}/TaskManager-info.txt"
MEM_INFO_FILE="${HEALTHCHECK_DIR}/Memory_info.txt"
TOP_PROC_FILE="${HEALTHCHECK_DIR}/Top_CPU_Processes.txt"
ULIMIT_FILE="${HEALTHCHECK_DIR}/ulimit.txt"

# CPU info (logical processors, cores, model)
run_logged_cmd \
  "CPU info (logical processors, cores, name)" \
  "/" \
  "lscpu > '$CPU_INFO_FILE' 2>&1"

# Rough equivalent of Task Manager process info
run_logged_cmd \
  "Task Manager info (pid, command line)" \
  "/" \
  "ps -eo pid,cmd --no-headers > '$TASKMGR_INFO_FILE' 2>&1"

# Memory info
run_logged_cmd \
  "Memory info (total/available physical memory)" \
  "/" \
  "free -h > '$MEM_INFO_FILE' 2>&1"

echo ">>> Top processes by CPU"
echo "    Output : $TOP_PROC_FILE"
if ps -eo pid,pcpu,pmem,etime,cmd --sort=-pcpu | head -n 20 > "$TOP_PROC_FILE" 2>&1; then
  echo "    Status : SUCCESS"
else
  echo "    Status : FAILED"
fi

# Ulimit info
run_logged_cmd \
  "ulimit info (fd,proc)" \
  "/" \
  "ulimit -a > '$ULIMIT_FILE' 2>&1"
  
# --- cluster checks ---------------------------------------------------------

ADVANCED_DIR="${RLI_HOME}/bin/advanced"
echo
echo "[2] Running cluster and license checks (cluster.sh / vdsconfig.sh)..."

# Use cluster.sh if present, else cluster.sh (if you run them via Wine, etc.)
CLUSTER_CMD="cluster.sh"
if [ ! -x "${ADVANCED_DIR}/${CLUSTER_CMD}" ] && [ -x "${ADVANCED_DIR}/cluster.sh" ]; then
  CLUSTER_CMD="cluster.sh"
fi

run_logged_cmd \
  "cluster.sh system-check" \
  "$ADVANCED_DIR" \
  "${ADVANCED_DIR}/cluster.sh system-check > $HEALTHCHECK_DIR/hc_system_check_report.txt 2>&1"

run_logged_cmd \
  "cluster.sh remote-check" \
  "$ADVANCED_DIR" \
  "${ADVANCED_DIR}/cluster.sh remote-check > $HEALTHCHECK_DIR/hc_remote_check_report.txt 2>&1"

run_logged_cmd \
  "cluster.sh check" \
  "$ADVANCED_DIR" \
  "${ADVANCED_DIR}/cluster.sh check > $HEALTHCHECK_DIR/hc_cluster_check_report.txt 2>&1"

run_logged_cmd \
  "cluster.sh list" \
  "$ADVANCED_DIR" \
  "${ADVANCED_DIR}/cluster.sh list > $HEALTHCHECK_DIR/hc_list_check_report.txt 2>&1"

run_logged_cmd \
  "cluster.sh latency" \
  "$ADVANCED_DIR" \
  "${ADVANCED_DIR}/cluster.sh latency > $HEALTHCHECK_DIR/hc_latency_check_report.txt 2>&1"

# --- vdsconfig checks -------------------------------------------------------

BIN_DIR="${RLI_HOME}/bin"
echo
echo "[3] Collecting license checks vdsconfig..."

VDSCONFIG_CMD="vdsconfig.sh"
if [ ! -x "${BIN_DIR}/${VDSCONFIG_CMD}" ] && [ -x "${BIN_DIR}/vdsconfig.sh" ]; then
  VDSCONFIG_CMD="vdsconfig.sh"
fi

run_logged_cmd \
  "vdsconfig product-info" \
  "$BIN_DIR" \
  "${BIN_DIR}/vdsconfig.sh product-info > $HEALTHCHECK_DIR\hc_license_info.txt 2>&1"

run_logged_cmd \
  "vdsconfig list-properties" \
  "$BIN_DIR" \
  "${BIN_DIR}/vdsconfig.sh list-properties > $HEALTHCHECK_DIR\hc_list_properties.txt 2>&1"

# --- Collect logs and key files ---------------------------------------------

echo
echo "[4] Collecting VDS logs and key files..."

VDS_LOGS_DIR="${RLI_HOME}/vds_server/logs"

if [ -d "$VDS_LOGS_DIR" ]; then
  ACCESS_LOG="${VDS_LOGS_DIR}/vds_server_access.log"
  if [ -f "$ACCESS_LOG" ]; then
    cp -f "$ACCESS_LOG" "${LOG_DIR}/vds_server_access.log"
    echo "Copied: $ACCESS_LOG"
  fi

  # latest vds_server_access-*.zip
  LATEST_ACCESS_ZIP="$(ls -1t "${VDS_LOGS_DIR}"/vds_server_access-*.zip 2>/dev/null | head -n 1 || true)"
  if [ -n "$LATEST_ACCESS_ZIP" ]; then
    cp -f "$LATEST_ACCESS_ZIP" "$LOG_DIR"
    echo "Copied: $LATEST_ACCESS_ZIP"
  fi

  SERVER_LOG="${VDS_LOGS_DIR}/vds_server.log"
  if [ -f "$SERVER_LOG" ]; then
    cp -f "$SERVER_LOG" "${LOG_DIR}/vds_server.log"
    echo "Copied: $SERVER_LOG"
  fi

  # latest vds_server-*.zip
  LATEST_SERVER_ZIP="$(ls -1t "${VDS_LOGS_DIR}"/vds_server-*.zip 2>/dev/null | head -n 1 || true)"
  if [ -n "$LATEST_SERVER_ZIP" ]; then
    cp -f "$LATEST_SERVER_ZIP" "$LOG_DIR"
    echo "Copied: $LATEST_SERVER_ZIP"
  fi

  # gc* logs
  find "$VDS_LOGS_DIR" -maxdepth 1 -type f -name 'gc*' -print0 | xargs -0 -r cp -t "$LOG_DIR"
  echo "Copied: gc* (if any)"

  EVENTS_LOG="${VDS_LOGS_DIR}/vds_events.log"
  if [ -f "$EVENTS_LOG" ]; then
    cp -f "$EVENTS_LOG" "${LOG_DIR}/vds_events.log"
    echo "Copied: $EVENTS_LOG"
  fi
else
  echo "WARN: Logs directory not found: $VDS_LOGS_DIR"
fi

VDSSERVER_CONF="${BIN_DIR}/runVDSServer.sh"
if [ -f "$VDSSERVER_CONF" ]; then
  cp -f "$VDSSERVER_CONF" "${HEALTHCHECK_DIR}/runVDSServer.sh"
  echo "Copied: $VDSSERVER_CONF"
fi

CONF_DIR="${RLI_HOME}/vds_server/conf"
if [ -d "$CONF_DIR" ]; then
  cp -a "$CONF_DIR" "${HEALTHCHECK_DIR}/"
  echo "Copied conf directory: $CONF_DIR"
fi

# --- monitoring diagnostics --------------------------------------------------

echo
echo "[5] Collecting monitoring diagnostics..."

MONITORING_CMD="monitoring.sh"
if [ ! -x "${BIN_DIR}/${MONITORING_CMD}" ] && [ -x "${BIN_DIR}/monitoring.sh" ]; then
  MONITORING_CMD="monitoring.sh"
fi

run_logged_cmd \
  "monitoring -d hdap-store" \
  "$BIN_DIR" \
  "${BIN_DIR}/monitoring.sh -d hdap-store > $HEALTHCHECK_DIR\hc_monitoring-stores.txt 2>&1"

run_logged_cmd \
  "monitoring -d node-monitor" \
  "$BIN_DIR" \
  "${BIN_DIR}/monitoring.sh -d node-monitor > $HEALTHCHECK_DIR\hc_monitoring-node.txt 2>&1"

run_logged_cmd \
  "monitoring -d process-info" \
  "$BIN_DIR" \
  "${BIN_DIR}/monitoring.sh -d process-info > $HEALTHCHECK_DIR\hc_monitoring-process-info.txt 2>&1"

run_logged_cmd \
  "monitoring -d hdap-store (storage statistics)" \
  "$BIN_DIR" \
  "${BIN_DIR}/monitoring.sh -d hdap-store -p propertyId:indexSize -p propertyId:numOfEntries > $HEALTHCHECK_DIR\hc_storage-statistics.txt 2>&1"

run_logged_cmd \
  "monitoring -d cloud-replication" \
  "$BIN_DIR" \
  "${BIN_DIR}/monitoring.sh -d cloud-replication > $HEALTHCHECK_DIR\hc_cloud-replication.txt 2>&1"

# --- Compress healthcheck folder --------------------------------------------

echo
echo "[6] Compressing healthcheck folder..."

ZIP_NAME="RLHealthCheck_${HOSTNAME_FQDN}_${TIMESTAMP}.tar.gz"
ZIP_PATH="${RLI_HOME}/${ZIP_NAME}"

if [ -f "$ZIP_PATH" ]; then
  rm -f "$ZIP_PATH"
fi

(
  cd "$RLI_HOME"
  tar -czf "$ZIP_NAME" "$(basename "$HEALTHCHECK_DIR")"
)

echo "Healthcheck archive created: $ZIP_PATH"

echo
echo "Healthcheck completed."

