diff --git a/.gitignore b/.gitignore
index 20404fe4..2709347a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ stores/
mlflow/
results/
workspaces/
+efs/
# VSCode
.vscode/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 14eba51f..2b188ae7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.4.0
+ rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
diff --git a/Makefile b/Makefile
index c7396272..ca840bd1 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@ style:
# Cleaning
.PHONY: clean
clean: style
+ python notebooks/clear_cell_nums.py
find . -type f -name "*.DS_Store" -ls -delete
find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
find . | grep -E ".pytest_cache" | xargs rm -rf
diff --git a/README.md b/README.md
index f9b1e0e5..2d6fe872 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ We'll start by setting up our cluster with the environment and compute configura
- Project: `madewithml`
- Cluster environment name: `madewithml-cluster-env`
# Toggle `Select from saved configurations`
- - Compute config: `madewithml-cluster-compute`
+ - Compute config: `madewithml-cluster-compute-g5.4xlarge`
```
> Alternatively, we can use the [CLI](https://docs.anyscale.com/reference/anyscale-cli) to create the workspace via `anyscale workspace create ...`
@@ -423,7 +423,7 @@ anyscale cluster-env build deploy/cluster_env.yaml --name $CLUSTER_ENV_NAME
The compute configuration determines **what** resources our workloads will be executes on. We've already created this [compute configuration](./deploy/cluster_compute.yaml) for us but this is how we can create it ourselves.
```bash
-export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute"
+export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute-g5.4xlarge"
anyscale cluster-compute create deploy/cluster_compute.yaml --name $CLUSTER_COMPUTE_NAME
```
diff --git a/deploy/cluster_compute.yaml b/deploy/cluster_compute.yaml
index 91e40c7e..3a8bef6d 100644
--- a/deploy/cluster_compute.yaml
+++ b/deploy/cluster_compute.yaml
@@ -1,12 +1,12 @@
-cloud: madewithml-us-east-2
-region: us-east2
+cloud: education-us-west-2
+region: us-west-2
head_node_type:
name: head_node_type
- instance_type: m5.2xlarge # 8 CPU, 0 GPU, 32 GB RAM
+ instance_type: g5.4xlarge
worker_node_types:
- name: gpu_worker
- instance_type: g4dn.xlarge # 4 CPU, 1 GPU, 16 GB RAM
- min_workers: 0
+ instance_type: g5.4xlarge
+ min_workers: 1
max_workers: 1
use_spot: False
aws:
diff --git a/deploy/cluster_env.yaml b/deploy/cluster_env.yaml
index 3ba9b1f1..14ba9e26 100644
--- a/deploy/cluster_env.yaml
+++ b/deploy/cluster_env.yaml
@@ -1,4 +1,4 @@
-base_image: anyscale/ray:2.6.0-py310-cu118
+base_image: anyscale/ray:2.7.0optimized-py310-cu118
env_vars: {}
debian_packages:
- curl
diff --git a/deploy/jobs/workloads.sh b/deploy/jobs/workloads.sh
index 4778ea12..c7cb3ef5 100644
--- a/deploy/jobs/workloads.sh
+++ b/deploy/jobs/workloads.sh
@@ -1,6 +1,5 @@
#!/bin/bash
export PYTHONPATH=$PYTHONPATH:$PWD
-export RAY_AIR_REENABLE_DEPRECATED_SYNC_TO_HEAD_NODE=1
mkdir results
# Test data
diff --git a/madewithml/config.py b/madewithml/config.py
index 4b1849eb..2de5eb6b 100644
--- a/madewithml/config.py
+++ b/madewithml/config.py
@@ -11,6 +11,11 @@
LOGS_DIR = Path(ROOT_DIR, "logs")
LOGS_DIR.mkdir(parents=True, exist_ok=True)
EFS_DIR = Path(f"/efs/shared_storage/madewithml/{os.environ.get('GITHUB_USERNAME', '')}")
+try:
+ Path(EFS_DIR).mkdir(parents=True, exist_ok=True)
+except OSError:
+ EFS_DIR = Path(ROOT_DIR, "efs")
+ Path(EFS_DIR).mkdir(parents=True, exist_ok=True)
# Config MLflow
MODEL_REGISTRY = Path(f"{EFS_DIR}/mlflow")
diff --git a/notebooks/benchmarks.ipynb b/notebooks/benchmarks.ipynb
index e3f91e76..0d8a7ebf 100644
--- a/notebooks/benchmarks.ipynb
+++ b/notebooks/benchmarks.ipynb
@@ -58,7 +58,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "e2c96931-d511-4c6e-b582-87d24455a11e",
"metadata": {
"tags": []
@@ -79,7 +79,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "953a577e-3cd0-4c6b-81f9-8bc32850214d",
"metadata": {
"tags": []
@@ -101,7 +101,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "1790e2f5-6b8b-425c-8842-a2b0ea8f3f07",
"metadata": {
"tags": []
@@ -113,7 +113,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "6b9bfadb-ba49-4f5a-b216-4db14c8888ab",
"metadata": {
"tags": []
@@ -208,7 +208,7 @@
"4 A PyTorch Implementation of \"Watch Your Step: ... other "
]
},
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -222,7 +222,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "aa5b95d5-d61e-48e4-9100-d9d2fc0d53fa",
"metadata": {
"tags": []
@@ -234,7 +234,7 @@
"['computer-vision', 'other', 'natural-language-processing', 'mlops']"
]
},
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -247,7 +247,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "3c828129-8248-4e38-93a4-cabb097e7ba5",
"metadata": {
"tags": []
@@ -279,7 +279,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "8e3c3f44-2c19-4c32-9bc5-e9a7a917d19d",
"metadata": {},
"outputs": [],
@@ -295,7 +295,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "4950bdb4",
"metadata": {},
"outputs": [
@@ -337,7 +337,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "b2aae14c-9870-4a27-b5ad-90f339686620",
"metadata": {
"tags": []
@@ -364,7 +364,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "03ee23e5",
"metadata": {},
"outputs": [
@@ -401,7 +401,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "71c43e8c",
"metadata": {},
"outputs": [
@@ -416,7 +416,7 @@
" 'description': 'A PyTorch implementation of \"Capsule Graph Neural Network\" (ICLR 2019).'}]"
]
},
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -429,7 +429,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "c9359a91-ac19-48a4-babb-e65d53f39b42",
"metadata": {
"tags": []
@@ -462,7 +462,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "5fac795e",
"metadata": {},
"outputs": [
@@ -486,7 +486,7 @@
"['other', 'computer-vision', 'computer-vision']"
]
},
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -507,7 +507,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "e4cb38a8-44cb-4cea-828c-590f223d4063",
"metadata": {
"tags": []
@@ -543,7 +543,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"id": "de2d0416",
"metadata": {},
"outputs": [],
@@ -576,7 +576,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "ff3c37fb",
"metadata": {},
"outputs": [],
@@ -618,7 +618,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "972fee2f-86e2-445e-92d0-923f5690132a",
"metadata": {},
"outputs": [],
@@ -647,7 +647,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "9ee4e745-ef56-4b76-8230-fcbe56ac46aa",
"metadata": {
"tags": []
@@ -663,7 +663,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "73780054-afeb-4ce6-8255-51bf91f9f820",
"metadata": {
"tags": []
@@ -709,7 +709,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "24af6d04-d29e-4adb-a289-4c34c2cc7ec8",
"metadata": {
"tags": []
@@ -780,7 +780,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "e22ed1e1-b34d-43d1-ae8b-32b1fd5be53d",
"metadata": {
"tags": []
@@ -815,7 +815,7 @@
" 'tag': 'mlops'}]"
]
},
- "execution_count": 22,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -833,7 +833,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"id": "294548a5-9edf-4dea-ab8d-dc7464246810",
"metadata": {
"tags": []
@@ -864,7 +864,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "29bca273-3ea8-4ce0-9fa9-fe19062b7c5b",
"metadata": {
"tags": []
@@ -917,7 +917,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"id": "3e59a3b9-69d9-4bb5-8b88-0569fcc72f0c",
"metadata": {
"tags": []
@@ -1001,7 +1001,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"id": "15ea136e",
"metadata": {},
"outputs": [],
@@ -1020,7 +1020,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"id": "ec0b498a-97c1-488c-a6b9-dc63a8a9df4d",
"metadata": {
"tags": []
@@ -1065,7 +1065,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"id": "4cc80311",
"metadata": {},
"outputs": [],
@@ -1080,7 +1080,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"id": "6771b1d2",
"metadata": {},
"outputs": [
diff --git a/notebooks/clear_cell_nums.py b/notebooks/clear_cell_nums.py
new file mode 100644
index 00000000..fc60b131
--- /dev/null
+++ b/notebooks/clear_cell_nums.py
@@ -0,0 +1,23 @@
+from pathlib import Path
+
+import nbformat
+
+
+def clear_execution_numbers(nb_path):
+ with open(nb_path, "r", encoding="utf-8") as f:
+ nb = nbformat.read(f, as_version=4)
+ for cell in nb["cells"]:
+ if cell["cell_type"] == "code":
+ cell["execution_count"] = None
+ for output in cell["outputs"]:
+ if "execution_count" in output:
+ output["execution_count"] = None
+ with open(nb_path, "w", encoding="utf-8") as f:
+ nbformat.write(nb, f)
+
+
+if __name__ == "__main__":
+ NOTEBOOK_DIR = Path(__file__).parent
+ notebook_fps = list(NOTEBOOK_DIR.glob("**/*.ipynb"))
+ for fp in notebook_fps:
+ clear_execution_numbers(fp)
diff --git a/notebooks/madewithml.ipynb b/notebooks/madewithml.ipynb
index cad9d158..f26dbcce 100644
--- a/notebooks/madewithml.ipynb
+++ b/notebooks/madewithml.ipynb
@@ -67,21 +67,34 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
- "import ray\n",
+ "import ray"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import sys; sys.path.append(\"..\")\n",
+ "import warnings; warnings.filterwarnings(\"ignore\")\n",
"from dotenv import load_dotenv; load_dotenv()\n",
- "import warnings; warnings.filterwarnings(\"ignore\")"
+ "%load_ext autoreload\n",
+ "%autoreload 2"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -90,16 +103,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2023-09-17 22:40:03,729\tINFO worker.py:1471 -- Connecting to existing Ray cluster at address: 10.0.35.174:6379...\n",
- "2023-09-17 22:40:03,738\tINFO worker.py:1646 -- Connected to Ray cluster. View the dashboard at \u001b[1m\u001b[32mhttps://session-klxewghyvu1191sq8t885l6ynr.i.anyscaleuserdata.com \u001b[39m\u001b[22m\n",
- "2023-09-17 22:40:03,753\tINFO packaging.py:346 -- Pushing file package 'gcs://_ray_pkg_33f9aafa2eafc632d810a161969b543f.zip' (5.14MiB) to Ray cluster...\n",
- "2023-09-17 22:40:03,766\tINFO packaging.py:359 -- Successfully pushed file package 'gcs://_ray_pkg_33f9aafa2eafc632d810a161969b543f.zip'.\n"
+ "2023-12-07 11:26:30,445\tINFO worker.py:1633 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "a78e1d2dc86847b1a21dded4c84a12c9",
+ "model_id": "afcfdccd644b41d0b7af7f86f68dbdf3",
"version_major": 2,
"version_minor": 0
},
@@ -123,15 +133,15 @@
"
\n",
" \n",
" Python version: | \n",
- " 3.10.8 | \n",
+ " 3.10.11 | \n",
"
\n",
" \n",
" Ray version: | \n",
- " 3.0.0.dev0 | \n",
+ " 2.7.0 | \n",
"
\n",
" \n",
" Dashboard: | \n",
- " http://session-klxewghyvu1191sq8t885l6ynr.i.anyscaleuserdata.com | \n",
+ " http://127.0.0.1:8265 | \n",
"
\n",
"\n",
"
\n",
@@ -140,10 +150,10 @@
"\n"
],
"text/plain": [
- "RayContext(dashboard_url='session-klxewghyvu1191sq8t885l6ynr.i.anyscaleuserdata.com', python_version='3.10.8', ray_version='3.0.0.dev0', ray_commit='6aa4ad9fbe0241a88e580e3c1a01e96ac3cce75a', protocol_version=None)"
+ "RayContext(dashboard_url='127.0.0.1:8265', python_version='3.10.11', ray_version='2.7.0', ray_commit='b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc', protocol_version=None)"
]
},
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -157,7 +167,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -165,17 +175,14 @@
{
"data": {
"text/plain": [
- "{'GPU': 2.0,\n",
- " 'memory': 137438953472.0,\n",
+ "{'memory': 30507458560.0,\n",
+ " 'CPU': 12.0,\n",
" 'node:__internal_head__': 1.0,\n",
- " 'CPU': 32.0,\n",
- " 'accelerator_type:A10G': 2.0,\n",
- " 'object_store_memory': 38456435097.0,\n",
- " 'node:10.0.35.174': 1.0,\n",
- " 'node:10.0.34.101': 1.0}"
+ " 'node:127.0.0.1': 1.0,\n",
+ " 'object_store_memory': 2147483648.0}"
]
},
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -193,7 +200,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -218,7 +225,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -255,7 +262,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -266,7 +273,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -345,8 +352,8 @@
""
],
"text/plain": [
- " id created_on title \\\n",
- "0 6 2020-02-20 06:43:18 Comparison between YOLO and RCNN on real world... \n",
+ " id created_on title \n",
+ "0 6 2020-02-20 06:43:18 Comparison between YOLO and RCNN on real world... \\\n",
"1 7 2020-02-20 06:47:21 Show, Infer & Tell: Contextual Inference for C... \n",
"2 9 2020-02-24 16:24:45 Awesome Graph Classification \n",
"3 15 2020-02-28 23:55:26 Awesome Monte Carlo Tree Search \n",
@@ -360,7 +367,7 @@
"4 A PyTorch Implementation of \"Watch Your Step: ... other "
]
},
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -383,7 +390,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -394,7 +401,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -410,7 +417,7 @@
"Name: count, dtype: int64"
]
},
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -422,7 +429,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -435,7 +442,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -451,7 +458,7 @@
"Name: count, dtype: int64"
]
},
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -463,7 +470,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -479,7 +486,7 @@
"Name: count, dtype: int64"
]
},
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -509,7 +516,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {
"id": "tHdQmqTBNkSV",
"tags": []
@@ -525,7 +532,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -539,7 +546,7 @@
" ('mlops', 63)]"
]
},
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -552,7 +559,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -564,7 +571,7 @@
"outputs": [
{
"data": {
- "image/png": "",
+ "image/png": "",
"text/plain": [
"