diff --git a/.env.TEMPLATE b/.env.TEMPLATE new file mode 100644 index 0000000..513fb17 --- /dev/null +++ b/.env.TEMPLATE @@ -0,0 +1,8 @@ +DEFAULT_ENV=dev + +# if 0, doesn't open a browser to the frontend webapp on a normal stack launch +DO_OPEN_BROWSER=1 + +POSTGRES_USER=molevolvr +POSTGRES_PASSWORD= +POSTGRES_DB=molevolvr diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f10862a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.env diff --git a/README.md b/README.md index e69de29..3e0d0b3 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,44 @@ +# MolEvolvR Stack + +This repo contains the implementation of the MolEvolvR stack, i.e.: +- `app`: the frontend webapp, written in React +- `backend`: a backend written in [Plumber](https://www.rplumber.io/index.html) +- `cluster`: the containerized SLURM "cluster" on which jobs are run +- `services`: a collection of services on which the stack relies: + - `postgres`: configuration for a PostgreSQL database, which stores job information + +Most of the data processing is accomplished via the `MolEvolvR` package, which +is currently available at https://github.com/JRaviLab/molevol_scripts. The stack +simply provides a user-friendly interface for accepting and monitoring the +progress of jobs, and orchestrates running the jobs on SLURM. The jobs +themselves call methods of the package at each stage of processing. + +## Running the Stack in Development + +To run the stack, you'll need to [install Docker and Docker Compose](https://www.docker.com/). + +First, copy `.env.TEMPLATE` to `.env` and fill in the necessary values. You +should supply a random password for the `POSTGRES_PASSWORD` variable. Of note +is the `DEFAULT_ENV` variable, which gives `run_stack.sh` a default environment +in which to operate; in development, this should be set to `dev`. + +Then, you can run the following command to bring up the stack: + +```bash +./run_stack.sh +``` + +This will start the stack in development mode, which automatically reloads the +backend or frontend when there are changes to their source. + +You should then be able to access the frontend at `http://localhost:5173`. + +## Production + +To run the stack in production, you can run the following + +```bash +./run_stack.sh prod +``` + +This will start the stack in production mode. diff --git a/app/.dockerignore b/app/.dockerignore new file mode 100644 index 0000000..3c3629e --- /dev/null +++ b/app/.dockerignore @@ -0,0 +1 @@ +node_modules diff --git a/app/Dockerfile b/app/Dockerfile new file mode 100644 index 0000000..7508ba7 --- /dev/null +++ b/app/Dockerfile @@ -0,0 +1,50 @@ +# from https://bun.sh/guides/ecosystem/docker, with modifications +# to run a hot-reloading development server + +# use the official Bun image +# see all versions at https://hub.docker.com/r/oven/bun/tags +FROM oven/bun:1 AS base +WORKDIR /app + + +# ----------------------------------------------------------- +# install dependencies for dev and prod into temp directories +FROM base AS install + +COPY package.json bun.lockb /temp/dev/ +RUN cd /temp/dev/ && \ + bun install --frozen-lockfile + +# FA: the production-only install is currently commented out since we always +# require the dev dependencies, specifically vite, to run *or* build the app. +# i'm leaving it here because perhaps someday we'll think of a reason why we +# want just the production dependencies. + +# # install with --production (exclude devDependencies) +# COPY package.json bun.lockb /temp/prod/ +# RUN cd /temp/prod && \ +# bun install --frozen-lockfile --production + +# ----------------------------------------------------------- +# copy node_modules from dev stage, copy entire app +# source into the image +FROM base AS dev +COPY --from=install /temp/dev/node_modules node_modules +COPY . . +# run the app in hot-reloading development mode +# set up vite to accept connections on any interface, e.g. from outside the +# container, and to always run on port 5713) +CMD [ "vite", "--host", "--port", "5713" ] + + +# ----------------------------------------------------------- +# copy production dependencies and source code into final image +FROM base AS release +COPY --from=install /temp/dev/node_modules node_modules +COPY . . + +# produce a bundle that'll then be served via a reverse http proxy, e.g. nginx +# (you'll want /app/dist to be mapped to a volume that's served by the reverse +# http proxy) +CMD [ "vite", "build" ] + \ No newline at end of file diff --git a/app/bun.lockb b/app/bun.lockb index 2a29046..c28634f 100755 Binary files a/app/bun.lockb and b/app/bun.lockb differ diff --git a/app/package.json b/app/package.json index ecded39..e4c1cd7 100644 --- a/app/package.json +++ b/app/package.json @@ -18,6 +18,7 @@ "@radix-ui/react-slider": "^1.1.2", "@radix-ui/react-tabs": "^1.0.4", "@radix-ui/react-tooltip": "^1.0.7", + "@tanstack/react-query": "^5.36.2", "@tanstack/react-table": "^8.15.3", "classnames": "^2.5.1", "csv-stringify": "^6.4.6", @@ -34,7 +35,8 @@ "react-time-ago": "^7.3.1", "react-to-text": "^2.0.1", "react-use": "^17.5.0", - "use-debounce": "^10.0.0" + "use-debounce": "^10.0.0", + "use-query-params": "^2.2.1" }, "devDependencies": { "@ianvs/prettier-plugin-sort-imports": "^4.2.1", diff --git a/backend/docker/Dockerfile b/backend/docker/Dockerfile new file mode 100644 index 0000000..c64ec02 --- /dev/null +++ b/backend/docker/Dockerfile @@ -0,0 +1,42 @@ +# syntax=docker/dockerfile:1.7 + +# this Dockerfile should be used with the ./backend/ folder as the context +# and ./backend/docker/Dockerfile as the dockerfile + +FROM rocker/tidyverse:4.3 + +# install ccache, a compiler cache +RUN apt-get update && apt-get install -y ccache + +# install some common cmd line tools +RUN apt-get update && apt-get install -y curl + +# acquire drip, a plumber auto-reloader, and install +ENV DRIP_URL="https://rdrip.netlify.app/builds/drip_0.1.0_linux_amd64.zip" +RUN mkdir -p /tmp/software/ && \ + wget -L -O /tmp/software/drip.zip ${DRIP_URL} && \ + unzip /tmp/software/drip.zip -d /tmp/software && \ + mv /tmp/software/drip /usr/local/bin && \ + chmod +x /usr/local/bin/drip + +# acquire atlas, a schema manager +RUN curl -sSf https://atlasgo.sh | sh + +# configure ccache env vars +ENV PATH="/usr/lib/ccache:${PATH}" +ENV CCACHE_DIR="/tmp/ccache" + +# install dependencies into the image +COPY ./docker/install.R /tmp/install.r +RUN Rscript /tmp/install.r + +# RUN --mount=type=cache,target=/usr/local/lib/R/site-library \ +# Rscript /tmp/install.r + +WORKDIR /app + +# copy the app into the image +COPY . /app + +# run the app +CMD ["/app/launch_api.sh"] diff --git a/backend/docker/install.R b/backend/docker/install.R new file mode 100644 index 0000000..76c03d7 --- /dev/null +++ b/backend/docker/install.R @@ -0,0 +1,11 @@ +# install packages depended on by the molevolvr API server +install.packages( + c( + "plumber", # REST API framework + "DBI", # Database interface + "RPostgres", # PostgreSQL-specific impl. for DBI + "dbplyr", # dplyr for databases + "box" # allows R files to be referenced as modules + ), + Ncpus = 6 +) diff --git a/backend/dummy.R b/backend/dummy.R new file mode 100644 index 0000000..151297f --- /dev/null +++ b/backend/dummy.R @@ -0,0 +1,7 @@ +# Load the plumber package +library(plumber) + +#* @get / +function() { + "Hello, world!" +} diff --git a/backend/entrypoint.R b/backend/entrypoint.R new file mode 100644 index 0000000..8530e69 --- /dev/null +++ b/backend/entrypoint.R @@ -0,0 +1,25 @@ +options(box.path = "/app") + +box::use( + plumber[plumb], + server/tcp_utils[wait_for_port] +) + +# receive the target port as the env var API_PORT, or 9050 if unspecified +target_port <- as.integer(Sys.getenv("API_PORT", unset=9050)) + +# workaround for https://github.com/siegerts/drip/issues/3, in which +# reloading fails due to the port being in use. we just wait, polling +# occasionally, for up to 60 seconds for the port to become free. +if (wait_for_port(target_port, poll_interval = 1, verbose = TRUE)) { + pr <- plumb("./dummy.R")$run( + host="0.0.0.0", + port=target_port, + debug=TRUE + ) +} +else { + stop( + paste0("Failed to start the API server; port ", target_port, " still occupied after wait timeout exceeded" + ) +} diff --git a/backend/launch_api.sh b/backend/launch_api.sh new file mode 100755 index 0000000..9560958 --- /dev/null +++ b/backend/launch_api.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# pass off to drip to control serving and reloading the API +drip diff --git a/backend/server/tcp_utils.R b/backend/server/tcp_utils.R new file mode 100644 index 0000000..4b27dfd --- /dev/null +++ b/backend/server/tcp_utils.R @@ -0,0 +1,41 @@ +#' Utility functions for working with TCP ports + +#' Check if a port is in use +#' @param port The port to check +#' @param host The IP for which to check the port +#' @return TRUE if the port is in use, FALSE otherwise +is_port_in_use <- function(port, host = "127.0.0.1") { + connection <- try(suppressWarnings(socketConnection(host = host, port = port, timeout = 1, open = "r+")), silent = TRUE) + if (inherits(connection, "try-error")) { + return(FALSE) # Port is not in use + } else { + close(connection) + return(TRUE) # Port is in use + } +} + +#' Wait for a port to become free +#' @param port The port to wait for +#' @param timeout The maximum time to wait in seconds +#' @param poll_interval The interval between checks in seconds +#' @param host The IP for which to check the port +#' @param verbose Whether to print messages to the console +#' @return TRUE if the port is free, FALSE if the timeout is reached +wait_for_port <- function(port, timeout = 60, poll_interval = 5, host = "127.0.0.1", verbose = TRUE) { + start_time <- Sys.time() + end_time <- start_time + timeout + + while (Sys.time() < end_time) { + if (!is_port_in_use(port, host)) { + if (verbose) { cat("Port", port, "is now free\n") } + return(TRUE) + } + if (verbose) { cat("Port", port, "is in use. Checking again in", poll_interval, "seconds...\n") } + Sys.sleep(poll_interval) + } + + if (verbose) { + cat(paste0("Timeout of ", timeout, "s reached, but port ", port, " is still in use, aborting\n")) + } + return(FALSE) +} diff --git a/cluster/README.md b/cluster/README.md new file mode 100644 index 0000000..c67e635 --- /dev/null +++ b/cluster/README.md @@ -0,0 +1,4 @@ +# MolEvolvR Cluster + +This folder will eventually contain Dockerfiles for building images for a SLURM +controller and worker nodes. diff --git a/docker-compose.override.yml b/docker-compose.override.yml new file mode 100644 index 0000000..1ab4de6 --- /dev/null +++ b/docker-compose.override.yml @@ -0,0 +1,36 @@ +services: + backend: + volumes: + - ./backend/:/app/ + # - ./backend/api/:/app/api/ + # - ./backend/schema/:/app/schema/ + # - ./backend/entrypoint.R:/app/entrypoint.R + # - ./backend/run_tests.sh:/app/run_tests.sh + ports: + - "9050:9050" + environment: + - "POSTGRES_DEV_HOST=dev-db" + - "PLUMBER_DEBUG=1" + depends_on: + - "dev-db" + + app: + build: + context: ./app + target: dev + volumes: + - ./app/src:/app/src + environment: + - 'VITE_API=http://localhost:9050' + ports: + - "5713:5713" + + db: + ports: + - "5460:5432" + + # used by atlas to create migrations + dev-db: + image: postgres:16 + env_file: + - .env diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..600f84e --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,28 @@ +volumes: + app_bundle: + caddy_data: + caddy_config: + +services: + app: + image: molevolvr-frontend + build: + context: ./app + target: release + volumes: + - app_bundle:/app/dist + depends_on: + - backend + + caddy: + image: caddy:2 + ports: + - "80:80" + - "443:443" + volumes: + - app_bundle:/srv + - ./services/caddy/Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + - caddy_config:/config + depends_on: + - app diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6b28837 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,32 @@ +services: + backend: + image: molevolvr-backend + platform: linux/amd64 + build: + context: ./backend + dockerfile: ./docker/Dockerfile + env_file: + - .env + environment: + - API_PORT=9050 + - "POSTGRES_HOST=db" + depends_on: + db: + condition: service_healthy + + app: + image: molevolvr-frontend + build: ./app + depends_on: + - backend + + db: + image: postgres:16 + env_file: + - .env + healthcheck: + test: ["CMD-SHELL", "sh -c 'pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}'"] + interval: 30s + timeout: 60s + retries: 5 + start_period: 80s diff --git a/run_stack.sh b/run_stack.sh new file mode 100755 index 0000000..d17d3df --- /dev/null +++ b/run_stack.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash + +# NOTES: +# ------- +# This script launches the molevolvr stack in the specified target environment. +# It's invoked as ./run_stack [target_env] [docker-compose args]; if +# [target_env] is not specified, it will attempt to infer it from the repo's +# directory name, and aborts with an error message if it doesn't find a match. +# the remainder of the arguments are passed along to docker compose. +# +# for example, to launch the stack in the "prod" environment with the "up -d" +# command, you would run: ./run_stack prod up -d +# +# the available environments differ in a variety of ways, including: +# - which services they run (prod runs 'nginx', for example, but the dev-y envs +# don't) +# - cores and memory constraints that are applied to the SLURM containers, in +# environments where the job scheduler is enabled +# - what external resources they mount as volumes into the container; for +# example, each environment mounts a different job results folder, but +# environments that process jobs use the same blast and iprscan folders, since +# they're gigantic +# +# these differences between environments are implemented by invoking different +# sets of docker-compose.yml files. with the exception of the "app" environment, +# the "root" compose file, docker-compose.yml, is always used first, and then +# depending on the environment other compose files are added in, which merge +# with the root compose configuration. since the app environment only runs the +# app, it has a separate compose file, docker-compose.apponly.yml, rather than +# merging with the root and killing nearly all the services except the app +# service. +# +# see the following for details on the semantics of merging compose files: +# https://docs.docker.com/compose/multiple-compose-files/merge/ +# +# the current environments are as follows (contact FSA for details): +# - prod: the production environment, which runs the full stack, including the +# shiny app, the job scheduler, and the accounting database. it's the most +# resource-intensive environment, and is intended for use in production. +# - dev/staging: these are effectively dev environments that specific users run +# on the server for testing purposes. +# - app: a development environment that runs only the frontend and backend, and +# not the job scheduler or the accounting database. it's intended for use in +# frontend development, where you don't need to submit jobs or query the +# accounting database. + + +# if 1, skips invoking ./build_images.sh before running the stack +SKIP_BUILD=${SKIP_BUILD:-0} + +# command to run after the stack has launched, e.g. +# in cases where you want to tail some containers after launch +# (by default, it does nothing) +POST_LAUNCH_CMD=":" +# if 1, clears the screen before running the post-launch command +DO_CLEAR="0" +# if 1, opens the browser window to the app after launching the stack +DO_OPEN_BROWSER=${DO_OPEN_BROWSER:-1} + +# the URL to open when we invoke the browser +FRONTEND_URL=${FRONTEND_URL:-"http://localhost:5713"} + +# helper function to print a message and exit with a specific code +# in one command +function fatal() { + echo "${1:-fatal error, aborting}" + exit ${2:-1} +} + +# cross-platform helper function to open a browser window +function open_browser() { + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + xdg-open "$1" + elif [[ "$OSTYPE" == "darwin"* ]]; then + open "$1" + elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then + start "$1" + else + echo "WARNING: Unsupported OS: $OSTYPE, unable to open browser" + fi +} + +# =========================================================================== +# === entrypoint +# =========================================================================== + +# source the .env file and export its contents +# among other things, we'll use the DEFAULT_ENV var in it to set the target env +set -a +source .env +set +a + +# check if the first argument is a valid target env, and if not attempt +# to infer it from the script's parents' directory name +case $1 in + "prod"|"staging"|"dev"|"app") + TARGET_ENV=$1 + shift + echo "* Selected target environment: ${TARGET_ENV}" + ;; + *) + # attempt to resolve the target env from the host environment + # (e.g., the hostname, possibly the repo root directory name, etc.) + + # get the name of the script's parent directory + PARENT_DIR=$(basename $(dirname $(realpath $0))) + HOSTNAME=$(hostname) + + # check if the parent directory name contains a valid target env + if [[ "${HOSTNAME}" = "jravilab" ]]; then + TARGET_ENV="prod" + STRATEGY="via hostname ${HOSTNAME}" + elif [[ ! -z "${DEFAULT_ENV}" ]]; then + TARGET_ENV="${DEFAULT_ENV}" + STRATEGY="via DEFAULT_ENV" + else + echo -e \ + "ERROR: No valid target env specified, and could not infer" \ + "target environment from parent directory name:\n${PARENT_DIR}" + exit 1 + fi + + echo "* Inferred target environment: ${TARGET_ENV} (${STRATEGY:-n/a})" +esac + +case ${TARGET_ENV} in + "prod") + DEFAULT_ARGS="up -d" + COMPOSE_CMD="docker compose -f docker-compose.yml -f docker-compose.prod.yml" + DO_CLEAR="0" + # never launch the browser in production + DO_OPEN_BROWSER=0 + # watch the logs after, since we detached after bringing up the stack + POST_LAUNCH_CMD="${COMPOSE_CMD} logs -f" + ;; + "dev") + DEFAULT_ARGS="up -d" + COMPOSE_CMD="docker compose -f docker-compose.yml -f docker-compose.override.yml" + DO_CLEAR="1" + # watch the logs after, since we detached after bringing up the stack + POST_LAUNCH_CMD="${COMPOSE_CMD} logs -f" + ;; + "app") + # launches just the services necessary to run the shiny app, for frontend development. + # note that you won't be able to submit jobs or query the accounting database. + DEFAULT_ARGS="up" + COMPOSE_CMD="docker compose -f docker-compose.apponly.yml" + DO_CLEAR="1" + SKIP_BUILD="1" # don't build images for the app environment, since it uses so few of them + # watch the logs after, since we detached after bringing up the stack + # POST_LAUNCH_CMD="${COMPOSE_CMD} logs -f app" + ;; + *) + echo "ERROR: Unknown target environment: ${TARGET_ENV}" + exit 1 +esac + +# ensure that docker compose can see the target env, so it can, e.g., namespace hosts to their environment +export TARGET_ENV=${TARGET_ENV} + +# if any arguments were specified after the target env, use those instead of the default +if [ $# -gt 0 ]; then + DEFAULT_ARGS="$@" + DO_CLEAR="0" # don't clear so we can see the output +fi + +# check if a "control" command is the current first argument; if so, skip the build +if [[ "$1" =~ ^(down|restart|logs)$ ]]; then + echo "* Skipping build, since we're running a control command: $1" + SKIP_BUILD=1 + # also skip the post-launch command so we don't get stuck, e.g., tailing + POST_LAUNCH_CMD="" + # also skip opening a browser window + DO_OPEN_BROWSER=0 +fi + +# if SKIP_BUILD is 0 and 'down' isn't the docker compose command, build images +# for the target env. +# each built image is tagged with its target env, so they don't collide with +# each other; in the case of prod, the tag is "latest". +if [ "${SKIP_BUILD}" -eq 0 ]; then + if [ "${TARGET_ENV}" == "prod" ] || [ "${TARGET_ENV}" == "app" ]; then + IMAGE_TAG="latest" + else + IMAGE_TAG="${TARGET_ENV}" + fi + + echo "* Building images for ${TARGET_ENV} (tag: ${IMAGE_TAG})" + # ./build_images.sh ${IMAGE_TAG} || fatal "Failed to build images for ${TARGET_ENV}" + ${COMPOSE_CMD} build || fatal "Failed to build images for ${TARGET_ENV}" +fi + +echo "Running: ${COMPOSE_CMD} ${DEFAULT_ARGS}" +${COMPOSE_CMD} ${DEFAULT_ARGS} && \ +( [[ ${DO_CLEAR} = "1" ]] && clear || exit 0 ) && \ +( + [[ ${DO_OPEN_BROWSER} = "1" ]] \ + && open_browser "${FRONTEND_URL}" \ + || exit 0 +) && +${POST_LAUNCH_CMD} diff --git a/services/caddy/Caddyfile b/services/caddy/Caddyfile new file mode 100644 index 0000000..b183e66 --- /dev/null +++ b/services/caddy/Caddyfile @@ -0,0 +1,5 @@ +# serve /srv +:80 { + root * /srv + file_server +} diff --git a/services/postgres/README.md b/services/postgres/README.md new file mode 100644 index 0000000..3cbf910 --- /dev/null +++ b/services/postgres/README.md @@ -0,0 +1,9 @@ +# PostgreSQL Configuration + +This folder will eventually contain configuration for the PostgreSQL instance +that runs within the MolEvolvR stack. + +The instance is responsible for: +- keeping records of analysis submissions +- tracking job status between the backend and SLURM controller +- recording any artifacts that aren't better stored on the filesystem.