diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c97ff1a8d..2794fd384 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ repos:
- id: prettier
exclude: |
(?x)(
- docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml
+ docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
)
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
@@ -42,6 +42,10 @@ repos:
- id: mixed-line-ending
args: [--fix=lf]
- id: trailing-whitespace
+ exclude: |
+ (?x)(
+ tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
+ )
- id: check-case-conflict
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.1
diff --git a/lamindb/_finish.py b/lamindb/_finish.py
index a30ccc970..31b652126 100644
--- a/lamindb/_finish.py
+++ b/lamindb/_finish.py
@@ -8,6 +8,8 @@
from lamin_utils import logger
from lamindb_setup.core.hashing import hash_file
+from lamindb.core.exceptions import NotebookNotSaved
+
if TYPE_CHECKING:
from pathlib import Path
@@ -16,6 +18,20 @@
from ._query_set import QuerySet
+def get_r_save_notebook_message() -> str:
+ return f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`"
+
+
+def get_shortcut() -> str:
+ import platform
+
+ return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
+
+
+def get_seconds_since_modified(filepath) -> float:
+ return datetime.now().timestamp() - filepath.stat().st_mtime
+
+
# this is from the get_title function in nbproject
# should be moved into lamindb sooner or later
def prepare_notebook(
@@ -82,6 +98,29 @@ def notebook_to_script(
script_path.write_text(py_content)
+# removes NotebookNotSaved error message from notebook html
+def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
+ import re
+
+ cleaned_content = (
+ file_path.read_text()
+ ) # at this point cleaned_content is still raw
+ pattern_title = r"
(.*?)"
+ title_match = re.search(pattern_title, cleaned_content)
+ title_text = None
+ if title_match:
+ title_text = title_match.group(1)
+ pattern_h1 = f"]*>{re.escape(title_text)}
"
+ cleaned_content = re.sub(pattern_title, "", cleaned_content)
+ cleaned_content = re.sub(pattern_h1, "", cleaned_content)
+ cleaned_content = cleaned_content.replace(
+ f"NotebookNotSaved: {get_r_save_notebook_message()}", ""
+ )
+ cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
+ cleaned_path.write_text(cleaned_content)
+ return title_text, cleaned_path
+
+
def save_context_core(
*,
run: Run,
@@ -104,7 +143,9 @@ def save_context_core(
# for scripts, things are easy
is_consecutive = True
is_ipynb = filepath.suffix == ".ipynb"
+ is_r_notebook = filepath.suffix in {".qmd", ".Rmd"}
source_code_path = filepath
+ report_path: Path | None = None
# for notebooks, we need more work
if is_ipynb:
try:
@@ -139,12 +180,21 @@ def save_context_core(
".ipynb", ".py"
)
notebook_to_script(transform, filepath, source_code_path)
+ elif is_r_notebook:
+ if filepath.with_suffix(".nb.html").exists():
+ report_path = filepath.with_suffix(".nb.html")
+ elif filepath.with_suffix(".html").exists():
+ report_path = filepath.with_suffix(".html")
+ else:
+ logger.warning(
+ f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}"
+ )
ln.settings.creation.artifact_silence_missing_run_warning = True
# track source code
hash, _ = hash_file(source_code_path) # ignore hash_type for now
if (
transform._source_code_artifact_id is not None
- or transform.source_code is not None # equivalent to transform.hash is not None
+ or transform.hash is not None # .hash is equivalent to .transform
):
# check if the hash of the transform source code matches
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
@@ -165,7 +215,7 @@ def save_context_core(
logger.warning("Please re-run `ln.track()` to make a new version")
return "rerun-the-notebook"
else:
- logger.important("source code is already saved")
+ logger.debug("source code is already saved")
else:
transform.source_code = source_code_path.read_text()
transform.hash = hash
@@ -198,10 +248,15 @@ def save_context_core(
run.finished_at = datetime.now(timezone.utc)
# track report and set is_consecutive
- if not is_ipynb:
- run.is_consecutive = True
- run.save()
- else:
+ if report_path is not None:
+ if not from_cli:
+ if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
+ # this can happen when auto-knitting an html with RStudio
+ raise NotebookNotSaved(get_r_save_notebook_message())
+ if is_r_notebook:
+ title_text, report_path = clean_r_notebook_html(report_path)
+ if title_text is not None:
+ transform.name = title_text
if run.report_id is not None:
hash, _ = hash_file(report_path) # ignore hash_type for now
if hash != run.report.hash:
@@ -210,7 +265,7 @@ def save_context_core(
)
if response == "y":
run.report.replace(report_path)
- run.report.save(upload=True)
+ run.report.save(upload=True, print_progress=False)
else:
logger.important("keeping old report")
else:
@@ -224,11 +279,13 @@ def save_context_core(
)
report_file.save(upload=True, print_progress=False)
run.report = report_file
- run.is_consecutive = is_consecutive
- run.save()
logger.debug(
f"saved transform.latest_run.report: {transform.latest_run.report}"
)
+ run.is_consecutive = is_consecutive
+
+ # save both run & transform records if we arrive here
+ run.save()
transform.save()
# finalize
@@ -250,11 +307,9 @@ def save_context_core(
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
)
if not from_cli:
- thing, name = (
- ("notebook", "notebook.ipynb") if is_ipynb else ("script", "script.py")
- )
+ thing = "notebook" if (is_ipynb or is_r_notebook) else "script"
logger.important(
- f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
+ f"if you want to update your {thing} without re-running it, use `lamin save {filepath}`"
)
# because run & transform changed, update the global context
context._run = run
diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py
index 465566b49..cd31b8b75 100644
--- a/lamindb/core/_context.py
+++ b/lamindb/core/_context.py
@@ -90,7 +90,7 @@ def raise_missing_context(transform_type: str, key: str) -> bool:
f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n"
f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()'
)
- if transform_type == "notebook":
+ if is_run_from_ipython:
print(f"→ {message}")
response = input("→ Ready to re-run? (y/n)")
if response == "y":
@@ -343,7 +343,7 @@ def track(
)
if run is not None: # loaded latest run
run.started_at = datetime.now(timezone.utc) # update run time
- self._logging_message_track += f", started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
+ self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
if run is None: # create new run
run = Run(
@@ -579,15 +579,11 @@ def finish(self, ignore_non_consecutive: None | bool = None) -> None:
`lamin save script.py` or `lamin save notebook.ipynb` → `docs `__
"""
- from lamindb._finish import save_context_core
-
- def get_seconds_since_modified(filepath) -> float:
- return datetime.now().timestamp() - filepath.stat().st_mtime
-
- def get_shortcut() -> str:
- import platform
-
- return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
+ from lamindb._finish import (
+ get_seconds_since_modified,
+ get_shortcut,
+ save_context_core,
+ )
if self.run is None:
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
@@ -609,7 +605,7 @@ def get_shortcut() -> str:
self.transform.save()
if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
raise NotebookNotSaved(
- f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.finish()`"
+ f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`"
)
save_context_core(
run=self.run,
diff --git a/lamindb/core/exceptions.py b/lamindb/core/exceptions.py
index 1efe12065..1c053f512 100644
--- a/lamindb/core/exceptions.py
+++ b/lamindb/core/exceptions.py
@@ -79,7 +79,7 @@ class IntegrityError(Exception):
pass
-class NoTitleError(Exception):
+class NoTitleError(SystemExit):
"""Notebook has no title."""
pass
diff --git a/sub/lamin-cli b/sub/lamin-cli
index f4fc5511a..5b890c9d3 160000
--- a/sub/lamin-cli
+++ b/sub/lamin-cli
@@ -1 +1 @@
-Subproject commit f4fc5511a8d0a3b329e54e6ca1007e4a384a8567
+Subproject commit 5b890c9d396b61833681583dc51e23749c0ed1e3
diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
new file mode 100644
index 000000000..506767ef8
--- /dev/null
+++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+ library(laminr)
+
+db <- connect()
+
+
+
+ → connected lamindb: laminlabs/lamindata
+
+
+
+ db$track("lOScuxDTDE0q0000")
+
+
+
+ → loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
+
+
+
+
+
+
+
+ db$finish()
+
+
+
+ MoreOUTPUT
+
+
+
+
diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.html b/tests/core/notebooks/basic-r-notebook.Rmd.html
new file mode 100644
index 000000000..b0a391c9a
--- /dev/null
+++ b/tests/core/notebooks/basic-r-notebook.Rmd.html
@@ -0,0 +1,42 @@
+
+
+
+ My exemplary R analysis
+ My exemplary R analysis
+
+
+
+
+
+
+ library(laminr)
+
+db <- connect()
+
+
+
+ → connected lamindb: laminlabs/lamindata
+
+
+
+ db$track("lOScuxDTDE0q0000")
+
+
+
+ → loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
+
+
+
+
+
+
+
+ db$finish()
+
+
+
+ MoreOUTPUT NotebookNotSaved: Please save the notebook in RStudio (shortcut `SHORTCUT`) within 2 sec before calling `db$finish()`
+
+
+
+
diff --git a/tests/core/test_context.py b/tests/core/test_context.py
index f4262d4fd..b9977b227 100644
--- a/tests/core/test_context.py
+++ b/tests/core/test_context.py
@@ -3,10 +3,12 @@
import lamindb as ln
import pytest
+from lamindb._finish import clean_r_notebook_html, get_shortcut
from lamindb.core._context import context, get_uid_ext
from lamindb.core.exceptions import TrackNotCalled, ValidationError
-scripts_dir = Path(__file__).parent.resolve() / "scripts"
+SCRIPTS_DIR = Path(__file__).parent.resolve() / "scripts"
+NOTEBOOKS_DIR = Path(__file__).parent.resolve() / "notebooks"
def test_track_with_multi_parents():
@@ -134,7 +136,7 @@ def test_create_or_load_transform():
def test_run_scripts_for_versioning():
# regular execution
result = subprocess.run( # noqa: S602
- f"python {scripts_dir / 'script-to-test-versioning.py'}",
+ f"python {SCRIPTS_DIR / 'script-to-test-versioning.py'}",
shell=True,
capture_output=True,
)
@@ -144,7 +146,7 @@ def test_run_scripts_for_versioning():
# updated key (filename change)
result = subprocess.run( # noqa: S602
- f"python {scripts_dir / 'script-to-test-filename-change.py'}",
+ f"python {SCRIPTS_DIR / 'script-to-test-filename-change.py'}",
shell=True,
capture_output=True,
)
@@ -154,7 +156,7 @@ def test_run_scripts_for_versioning():
# version already taken
result = subprocess.run( # noqa: S602
- f"python {scripts_dir / 'duplicate1/script-to-test-versioning.py'}",
+ f"python {SCRIPTS_DIR / 'duplicate1/script-to-test-versioning.py'}",
shell=True,
capture_output=True,
)
@@ -167,7 +169,7 @@ def test_run_scripts_for_versioning():
# regular version bump
result = subprocess.run( # noqa: S602
- f"python {scripts_dir / 'duplicate2/script-to-test-versioning.py'}",
+ f"python {SCRIPTS_DIR / 'duplicate2/script-to-test-versioning.py'}",
shell=True,
capture_output=True,
)
@@ -179,7 +181,7 @@ def test_run_scripts_for_versioning():
# inconsistent version
result = subprocess.run( # noqa: S602
- f"python {scripts_dir / 'duplicate3/script-to-test-versioning.py'}",
+ f"python {SCRIPTS_DIR / 'duplicate3/script-to-test-versioning.py'}",
shell=True,
capture_output=True,
)
@@ -232,3 +234,17 @@ def test_track_notebook_or_script_manually(type):
error.exconly()
== "ValueError: Use `ln.track()` without passing transform in a notebook or script - metadata is automatically parsed"
)
+
+
+def test_clean_r_notebook_html():
+ orig_notebook_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html"
+ content = orig_notebook_path.read_text()
+ orig_notebook_path.write_text(content.replace("SHORTCUT", get_shortcut()))
+ comparison_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.cleaned.html"
+ compare = comparison_path.read_text()
+ comparison_path.unlink()
+ title_text, cleaned_path = clean_r_notebook_html(orig_notebook_path)
+ assert comparison_path == cleaned_path
+ assert title_text == "My exemplary R analysis"
+ assert compare == comparison_path.read_text()
+ orig_notebook_path.write_text(content.replace(get_shortcut(), "SHORTCUT"))