diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c97ff1a8d..2794fd384 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: - id: prettier exclude: | (?x)( - docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml + docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html ) - repo: https://github.com/kynan/nbstripout rev: 0.6.1 @@ -42,6 +42,10 @@ repos: - id: mixed-line-ending args: [--fix=lf] - id: trailing-whitespace + exclude: | + (?x)( + tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html + ) - id: check-case-conflict - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.7.1 diff --git a/lamindb/_finish.py b/lamindb/_finish.py index a30ccc970..31b652126 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -8,6 +8,8 @@ from lamin_utils import logger from lamindb_setup.core.hashing import hash_file +from lamindb.core.exceptions import NotebookNotSaved + if TYPE_CHECKING: from pathlib import Path @@ -16,6 +18,20 @@ from ._query_set import QuerySet +def get_r_save_notebook_message() -> str: + return f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`" + + +def get_shortcut() -> str: + import platform + + return "CMD + s" if platform.system() == "Darwin" else "CTRL + s" + + +def get_seconds_since_modified(filepath) -> float: + return datetime.now().timestamp() - filepath.stat().st_mtime + + # this is from the get_title function in nbproject # should be moved into lamindb sooner or later def prepare_notebook( @@ -82,6 +98,29 @@ def notebook_to_script( script_path.write_text(py_content) +# removes NotebookNotSaved error message from notebook html +def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]: + import re + + cleaned_content = ( + file_path.read_text() + ) # at this point cleaned_content is still raw + pattern_title = r"(.*?)" + title_match = re.search(pattern_title, cleaned_content) + title_text = None + if title_match: + title_text = title_match.group(1) + pattern_h1 = f"]*>{re.escape(title_text)}" + cleaned_content = re.sub(pattern_title, "", cleaned_content) + cleaned_content = re.sub(pattern_h1, "", cleaned_content) + cleaned_content = cleaned_content.replace( + f"NotebookNotSaved: {get_r_save_notebook_message()}", "" + ) + cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}") + cleaned_path.write_text(cleaned_content) + return title_text, cleaned_path + + def save_context_core( *, run: Run, @@ -104,7 +143,9 @@ def save_context_core( # for scripts, things are easy is_consecutive = True is_ipynb = filepath.suffix == ".ipynb" + is_r_notebook = filepath.suffix in {".qmd", ".Rmd"} source_code_path = filepath + report_path: Path | None = None # for notebooks, we need more work if is_ipynb: try: @@ -139,12 +180,21 @@ def save_context_core( ".ipynb", ".py" ) notebook_to_script(transform, filepath, source_code_path) + elif is_r_notebook: + if filepath.with_suffix(".nb.html").exists(): + report_path = filepath.with_suffix(".nb.html") + elif filepath.with_suffix(".html").exists(): + report_path = filepath.with_suffix(".html") + else: + logger.warning( + f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}" + ) ln.settings.creation.artifact_silence_missing_run_warning = True # track source code hash, _ = hash_file(source_code_path) # ignore hash_type for now if ( transform._source_code_artifact_id is not None - or transform.source_code is not None # equivalent to transform.hash is not None + or transform.hash is not None # .hash is equivalent to .transform ): # check if the hash of the transform source code matches # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point) @@ -165,7 +215,7 @@ def save_context_core( logger.warning("Please re-run `ln.track()` to make a new version") return "rerun-the-notebook" else: - logger.important("source code is already saved") + logger.debug("source code is already saved") else: transform.source_code = source_code_path.read_text() transform.hash = hash @@ -198,10 +248,15 @@ def save_context_core( run.finished_at = datetime.now(timezone.utc) # track report and set is_consecutive - if not is_ipynb: - run.is_consecutive = True - run.save() - else: + if report_path is not None: + if not from_cli: + if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING: + # this can happen when auto-knitting an html with RStudio + raise NotebookNotSaved(get_r_save_notebook_message()) + if is_r_notebook: + title_text, report_path = clean_r_notebook_html(report_path) + if title_text is not None: + transform.name = title_text if run.report_id is not None: hash, _ = hash_file(report_path) # ignore hash_type for now if hash != run.report.hash: @@ -210,7 +265,7 @@ def save_context_core( ) if response == "y": run.report.replace(report_path) - run.report.save(upload=True) + run.report.save(upload=True, print_progress=False) else: logger.important("keeping old report") else: @@ -224,11 +279,13 @@ def save_context_core( ) report_file.save(upload=True, print_progress=False) run.report = report_file - run.is_consecutive = is_consecutive - run.save() logger.debug( f"saved transform.latest_run.report: {transform.latest_run.report}" ) + run.is_consecutive = is_consecutive + + # save both run & transform records if we arrive here + run.save() transform.save() # finalize @@ -250,11 +307,9 @@ def save_context_core( f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}" ) if not from_cli: - thing, name = ( - ("notebook", "notebook.ipynb") if is_ipynb else ("script", "script.py") - ) + thing = "notebook" if (is_ipynb or is_r_notebook) else "script" logger.important( - f"if you want to update your {thing} without re-running it, use `lamin save {name}`" + f"if you want to update your {thing} without re-running it, use `lamin save {filepath}`" ) # because run & transform changed, update the global context context._run = run diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index 465566b49..cd31b8b75 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -90,7 +90,7 @@ def raise_missing_context(transform_type: str, key: str) -> bool: f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n" f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()' ) - if transform_type == "notebook": + if is_run_from_ipython: print(f"→ {message}") response = input("→ Ready to re-run? (y/n)") if response == "y": @@ -343,7 +343,7 @@ def track( ) if run is not None: # loaded latest run run.started_at = datetime.now(timezone.utc) # update run time - self._logging_message_track += f", started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}" + self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}" if run is None: # create new run run = Run( @@ -579,15 +579,11 @@ def finish(self, ignore_non_consecutive: None | bool = None) -> None: `lamin save script.py` or `lamin save notebook.ipynb` → `docs `__ """ - from lamindb._finish import save_context_core - - def get_seconds_since_modified(filepath) -> float: - return datetime.now().timestamp() - filepath.stat().st_mtime - - def get_shortcut() -> str: - import platform - - return "CMD + s" if platform.system() == "Darwin" else "CTRL + s" + from lamindb._finish import ( + get_seconds_since_modified, + get_shortcut, + save_context_core, + ) if self.run is None: raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`") @@ -609,7 +605,7 @@ def get_shortcut() -> str: self.transform.save() if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING: raise NotebookNotSaved( - f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.finish()`" + f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`" ) save_context_core( run=self.run, diff --git a/lamindb/core/exceptions.py b/lamindb/core/exceptions.py index 1efe12065..1c053f512 100644 --- a/lamindb/core/exceptions.py +++ b/lamindb/core/exceptions.py @@ -79,7 +79,7 @@ class IntegrityError(Exception): pass -class NoTitleError(Exception): +class NoTitleError(SystemExit): """Notebook has no title.""" pass diff --git a/sub/lamin-cli b/sub/lamin-cli index f4fc5511a..5b890c9d3 160000 --- a/sub/lamin-cli +++ b/sub/lamin-cli @@ -1 +1 @@ -Subproject commit f4fc5511a8d0a3b329e54e6ca1007e4a384a8567 +Subproject commit 5b890c9d396b61833681583dc51e23749c0ed1e3 diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html new file mode 100644 index 000000000..506767ef8 --- /dev/null +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -0,0 +1,42 @@ + + + + + + + + + + + +
library(laminr)
+
+db <- connect()
+ + + +
→ connected lamindb: laminlabs/lamindata
+ + + +
db$track("lOScuxDTDE0q0000")
+ + + +
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
+ + + + + + + +
db$finish()
+ + + +
MoreOUTPUT 
+ + + + diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.html b/tests/core/notebooks/basic-r-notebook.Rmd.html new file mode 100644 index 000000000..b0a391c9a --- /dev/null +++ b/tests/core/notebooks/basic-r-notebook.Rmd.html @@ -0,0 +1,42 @@ + + + + My exemplary R analysis +

My exemplary R analysis

+ + + + + + +
library(laminr)
+
+db <- connect()
+ + + +
→ connected lamindb: laminlabs/lamindata
+ + + +
db$track("lOScuxDTDE0q0000")
+ + + +
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
+ + + + + + + +
db$finish()
+ + + +
MoreOUTPUT NotebookNotSaved: Please save the notebook in RStudio (shortcut `SHORTCUT`) within 2 sec before calling `db$finish()`
+ + + + diff --git a/tests/core/test_context.py b/tests/core/test_context.py index f4262d4fd..b9977b227 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -3,10 +3,12 @@ import lamindb as ln import pytest +from lamindb._finish import clean_r_notebook_html, get_shortcut from lamindb.core._context import context, get_uid_ext from lamindb.core.exceptions import TrackNotCalled, ValidationError -scripts_dir = Path(__file__).parent.resolve() / "scripts" +SCRIPTS_DIR = Path(__file__).parent.resolve() / "scripts" +NOTEBOOKS_DIR = Path(__file__).parent.resolve() / "notebooks" def test_track_with_multi_parents(): @@ -134,7 +136,7 @@ def test_create_or_load_transform(): def test_run_scripts_for_versioning(): # regular execution result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -144,7 +146,7 @@ def test_run_scripts_for_versioning(): # updated key (filename change) result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'script-to-test-filename-change.py'}", + f"python {SCRIPTS_DIR / 'script-to-test-filename-change.py'}", shell=True, capture_output=True, ) @@ -154,7 +156,7 @@ def test_run_scripts_for_versioning(): # version already taken result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'duplicate1/script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'duplicate1/script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -167,7 +169,7 @@ def test_run_scripts_for_versioning(): # regular version bump result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'duplicate2/script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'duplicate2/script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -179,7 +181,7 @@ def test_run_scripts_for_versioning(): # inconsistent version result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'duplicate3/script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'duplicate3/script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -232,3 +234,17 @@ def test_track_notebook_or_script_manually(type): error.exconly() == "ValueError: Use `ln.track()` without passing transform in a notebook or script - metadata is automatically parsed" ) + + +def test_clean_r_notebook_html(): + orig_notebook_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html" + content = orig_notebook_path.read_text() + orig_notebook_path.write_text(content.replace("SHORTCUT", get_shortcut())) + comparison_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.cleaned.html" + compare = comparison_path.read_text() + comparison_path.unlink() + title_text, cleaned_path = clean_r_notebook_html(orig_notebook_path) + assert comparison_path == cleaned_path + assert title_text == "My exemplary R analysis" + assert compare == comparison_path.read_text() + orig_notebook_path.write_text(content.replace(get_shortcut(), "SHORTCUT"))