From 2d5dbe2a733c5c4235713a8d832f66c4f8ff58c5 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 25 Nov 2024 11:43:50 +0100 Subject: [PATCH 01/21] =?UTF-8?q?=E2=9C=A8=20Support=20auto-knitted=20html?= =?UTF-8?q?=20reports=20with=20.nb.html=20suffix=20during=20ln.finish()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index a30ccc970..0a426d48c 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -104,7 +104,9 @@ def save_context_core( # for scripts, things are easy is_consecutive = True is_ipynb = filepath.suffix == ".ipynb" + is_r_notebook = filepath.suffix in {".qmd", ".Rmd"} source_code_path = filepath + report_path: Path | None = None # for notebooks, we need more work if is_ipynb: try: @@ -139,6 +141,13 @@ def save_context_core( ".ipynb", ".py" ) notebook_to_script(transform, filepath, source_code_path) + elif is_r_notebook: + if not filepath.with_suffix(".nb.html").exists(): + logger.warning( + f"no auto-knitted file {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}" + ) + else: + report_path = filepath.with_suffix(".nb.html") ln.settings.creation.artifact_silence_missing_run_warning = True # track source code hash, _ = hash_file(source_code_path) # ignore hash_type for now @@ -198,7 +207,7 @@ def save_context_core( run.finished_at = datetime.now(timezone.utc) # track report and set is_consecutive - if not is_ipynb: + if not is_ipynb and not is_r_notebook: run.is_consecutive = True run.save() else: From 0732cecf8ad04460b13b0035684cefdc79baee09 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 25 Nov 2024 12:11:31 +0100 Subject: [PATCH 02/21] =?UTF-8?q?=F0=9F=92=9A=20Fix=20CLI=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 0a426d48c..f4c9985f1 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -207,8 +207,8 @@ def save_context_core( run.finished_at = datetime.now(timezone.utc) # track report and set is_consecutive - if not is_ipynb and not is_r_notebook: - run.is_consecutive = True + if report_path is None: + run.is_consecutive = is_consecutive run.save() else: if run.report_id is not None: From 0e3118480dec6e306086bd9e08bf0e6394cafd43 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Wed, 27 Nov 2024 12:14:14 +0100 Subject: [PATCH 03/21] =?UTF-8?q?=F0=9F=9A=B8=20Fix=20logging=20message=20?= =?UTF-8?q?about=20updating=20transform=20when=20running=20in=20R?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index f4c9985f1..521982d09 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -259,11 +259,9 @@ def save_context_core( f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}" ) if not from_cli: - thing, name = ( - ("notebook", "notebook.ipynb") if is_ipynb else ("script", "script.py") - ) + thing = "notebook" if (is_ipynb or is_r_notebook) else "script" logger.important( - f"if you want to update your {thing} without re-running it, use `lamin save {name}`" + f"if you want to update your {thing} without re-running it, use `lamin save {filepath}`" ) # because run & transform changed, update the global context context._run = run From 74b782fb08edd0e0b5b0fc8989d5fc71ce1dd134 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Wed, 27 Nov 2024 12:20:36 +0100 Subject: [PATCH 04/21] =?UTF-8?q?=F0=9F=8E=A8=20Ensure=20that=20no=20outda?= =?UTF-8?q?ted=20autoknitted=20R=20html=20report=20gets=20saved?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 12 ++++++++++++ lamindb/core/_context.py | 5 +---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 521982d09..2411f6d9f 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -8,6 +8,8 @@ from lamin_utils import logger from lamindb_setup.core.hashing import hash_file +from lamindb.core.exceptions import NotebookNotSaved + if TYPE_CHECKING: from pathlib import Path @@ -16,6 +18,10 @@ from ._query_set import QuerySet +def get_seconds_since_modified(filepath) -> float: + return datetime.now().timestamp() - filepath.stat().st_mtime + + # this is from the get_title function in nbproject # should be moved into lamindb sooner or later def prepare_notebook( @@ -211,6 +217,12 @@ def save_context_core( run.is_consecutive = is_consecutive run.save() else: + if not from_cli: + if get_seconds_since_modified(filepath) > 2 and not ln_setup._TESTING: + # this can happen when auto-knitting an html with RStudio + raise NotebookNotSaved( + "Please save the notebook in RStudio right before calling `db$finish()`" + ) if run.report_id is not None: hash, _ = hash_file(report_path) # ignore hash_type for now if hash != run.report.hash: diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index 465566b49..aaf9b859c 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -579,10 +579,7 @@ def finish(self, ignore_non_consecutive: None | bool = None) -> None: `lamin save script.py` or `lamin save notebook.ipynb` → `docs `__ """ - from lamindb._finish import save_context_core - - def get_seconds_since_modified(filepath) -> float: - return datetime.now().timestamp() - filepath.stat().st_mtime + from lamindb._finish import get_seconds_since_modified, save_context_core def get_shortcut() -> str: import platform From 66fd2337232f3153c65a6ad1e57c9cd71f496a16 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Fri, 29 Nov 2024 11:09:06 +0100 Subject: [PATCH 05/21] =?UTF-8?q?=F0=9F=9A=B8=20Do=20not=20use=20transform?= =?UTF-8?q?=5Ftype=20to=20trigger=20interactive=20dialogue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/core/_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index aaf9b859c..aa7b07284 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -90,7 +90,7 @@ def raise_missing_context(transform_type: str, key: str) -> bool: f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n" f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()' ) - if transform_type == "notebook": + if is_run_from_ipython: print(f"→ {message}") response = input("→ Ready to re-run? (y/n)") if response == "y": From dcec96f50fcb37658960ce6d7d3902674fd22296 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Fri, 29 Nov 2024 11:26:08 +0100 Subject: [PATCH 06/21] =?UTF-8?q?=F0=9F=90=9B=20Actually=20check=20timesta?= =?UTF-8?q?mp=20of=20report?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 2411f6d9f..72b40e9e3 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -218,7 +218,7 @@ def save_context_core( run.save() else: if not from_cli: - if get_seconds_since_modified(filepath) > 2 and not ln_setup._TESTING: + if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING: # this can happen when auto-knitting an html with RStudio raise NotebookNotSaved( "Please save the notebook in RStudio right before calling `db$finish()`" From 4580434ad057e0cf2579ad6cdec00120f3487ffe Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 18:42:10 +0100 Subject: [PATCH 07/21] =?UTF-8?q?=F0=9F=9A=B8=20Polish?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 19 ++++++++++++------- lamindb/core/_context.py | 13 ++++++------- lamindb/core/exceptions.py | 2 +- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 72b40e9e3..242fe27d7 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -18,6 +18,12 @@ from ._query_set import QuerySet +def get_shortcut() -> str: + import platform + + return "CMD + s" if platform.system() == "Darwin" else "CTRL + s" + + def get_seconds_since_modified(filepath) -> float: return datetime.now().timestamp() - filepath.stat().st_mtime @@ -213,15 +219,12 @@ def save_context_core( run.finished_at = datetime.now(timezone.utc) # track report and set is_consecutive - if report_path is None: - run.is_consecutive = is_consecutive - run.save() - else: + if report_path is not None: if not from_cli: if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING: # this can happen when auto-knitting an html with RStudio raise NotebookNotSaved( - "Please save the notebook in RStudio right before calling `db$finish()`" + f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`" ) if run.report_id is not None: hash, _ = hash_file(report_path) # ignore hash_type for now @@ -245,11 +248,13 @@ def save_context_core( ) report_file.save(upload=True, print_progress=False) run.report = report_file - run.is_consecutive = is_consecutive - run.save() logger.debug( f"saved transform.latest_run.report: {transform.latest_run.report}" ) + run.is_consecutive = is_consecutive + + # save both run & transform records if we arrive here + run.save() transform.save() # finalize diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index aa7b07284..9a7c8c929 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -579,12 +579,11 @@ def finish(self, ignore_non_consecutive: None | bool = None) -> None: `lamin save script.py` or `lamin save notebook.ipynb` → `docs `__ """ - from lamindb._finish import get_seconds_since_modified, save_context_core - - def get_shortcut() -> str: - import platform - - return "CMD + s" if platform.system() == "Darwin" else "CTRL + s" + from lamindb._finish import ( + get_seconds_since_modified, + get_shortcut, + save_context_core, + ) if self.run is None: raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`") @@ -606,7 +605,7 @@ def get_shortcut() -> str: self.transform.save() if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING: raise NotebookNotSaved( - f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.finish()`" + f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`" ) save_context_core( run=self.run, diff --git a/lamindb/core/exceptions.py b/lamindb/core/exceptions.py index 1efe12065..1c053f512 100644 --- a/lamindb/core/exceptions.py +++ b/lamindb/core/exceptions.py @@ -79,7 +79,7 @@ class IntegrityError(Exception): pass -class NoTitleError(Exception): +class NoTitleError(SystemExit): """Notebook has no title.""" pass From b901bd2a09affb96710ca4b9094895833a864d89 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 19:28:48 +0100 Subject: [PATCH 08/21] =?UTF-8?q?=E2=9C=85=20Add=20a=20test=20for=20stripp?= =?UTF-8?q?ing=20error=20message?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 13 ++++++ .../basic-r-notebook.Rmd.cleaned.html | 43 ++++++++++++++++++ .../core/notebooks/basic-r-notebook.Rmd.html | 45 +++++++++++++++++++ tests/core/test_context.py | 21 ++++++--- 4 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html create mode 100644 tests/core/notebooks/basic-r-notebook.Rmd.html diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 242fe27d7..9d5a923f6 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -94,6 +94,18 @@ def notebook_to_script( script_path.write_text(py_content) +# removes NotebookNotSaved error message from notebook html +def clean_error_from_r_notebook_html(file_path: Path): + import re + + content = file_path.read_text() + # Pattern to match the specific error message that contains NotebookNotSaved + pattern = r"Error in py_call_impl.*?NotebookNotSaved.*?for details\." + cleaned_content = re.sub(pattern, "", content, flags=re.DOTALL) + cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}") + cleaned_path.write_text(cleaned_content) + + def save_context_core( *, run: Run, @@ -167,6 +179,7 @@ def save_context_core( transform._source_code_artifact_id is not None or transform.source_code is not None # equivalent to transform.hash is not None ): + print(transform.source_code) # check if the hash of the transform source code matches # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point) ref_hash = ( diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html new file mode 100644 index 000000000..468786176 --- /dev/null +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -0,0 +1,43 @@ + + + + + My exemplary R analysis +

My exemplary R analysis

+ + + + + + +
library(laminr)
+
+db <- connect()
+ + + +
→ connected lamindb: laminlabs/lamindata
+ + + +
db$track("lOScuxDTDE0q0000")
+ + + +
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
+ + + + + + + +
db$finish()
+ + + +
+ + + + diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.html b/tests/core/notebooks/basic-r-notebook.Rmd.html new file mode 100644 index 000000000..6b9bf6979 --- /dev/null +++ b/tests/core/notebooks/basic-r-notebook.Rmd.html @@ -0,0 +1,45 @@ + + + + + My exemplary R analysis +

My exemplary R analysis

+ + + + + + +
library(laminr)
+
+db <- connect()
+ + + +
→ connected lamindb: laminlabs/lamindata
+ + + +
db$track("lOScuxDTDE0q0000")
+ + + +
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
+ + + + + + + +
db$finish()
+ + + +
Error in py_call_impl(callable, call_args$unnamed, call_args$named) :
+  lamindb.core.exceptions.NotebookNotSaved: Please save the notebook in RStudio (shortcut `CMD + s`) within 2 sec before calling `db$finish()`
+Run ]8;;rstudio:run:reticulate::py_last_error()`reticulate::py_last_error()`]8;; for details.
+ + + + diff --git a/tests/core/test_context.py b/tests/core/test_context.py index f4262d4fd..43646a196 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -4,9 +4,10 @@ import lamindb as ln import pytest from lamindb.core._context import context, get_uid_ext +from lamindb.core._finish import clean_error_from_r_notebook_html from lamindb.core.exceptions import TrackNotCalled, ValidationError -scripts_dir = Path(__file__).parent.resolve() / "scripts" +SCRIPTS_DIR = Path(__file__).parent.resolve() / "scripts" def test_track_with_multi_parents(): @@ -134,7 +135,7 @@ def test_create_or_load_transform(): def test_run_scripts_for_versioning(): # regular execution result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -144,7 +145,7 @@ def test_run_scripts_for_versioning(): # updated key (filename change) result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'script-to-test-filename-change.py'}", + f"python {SCRIPTS_DIR / 'script-to-test-filename-change.py'}", shell=True, capture_output=True, ) @@ -154,7 +155,7 @@ def test_run_scripts_for_versioning(): # version already taken result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'duplicate1/script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'duplicate1/script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -167,7 +168,7 @@ def test_run_scripts_for_versioning(): # regular version bump result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'duplicate2/script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'duplicate2/script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -179,7 +180,7 @@ def test_run_scripts_for_versioning(): # inconsistent version result = subprocess.run( # noqa: S602 - f"python {scripts_dir / 'duplicate3/script-to-test-versioning.py'}", + f"python {SCRIPTS_DIR / 'duplicate3/script-to-test-versioning.py'}", shell=True, capture_output=True, ) @@ -232,3 +233,11 @@ def test_track_notebook_or_script_manually(type): error.exconly() == "ValueError: Use `ln.track()` without passing transform in a notebook or script - metadata is automatically parsed" ) + + +def test_clean_error_from_r_notebook_html(): + comparison_path = SCRIPTS_DIR / "basic-r-notebook.Rmd.html" + compare = comparison_path.read_text() + comparison_path.unlink() + clean_error_from_r_notebook_html(SCRIPTS_DIR / "basic-r-notebook.Rmd.html") + assert compare == comparison_path.read_text() From 25c07063fe4f5dcb96b45988d12524ea2d0870fe Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 19:32:39 +0100 Subject: [PATCH 09/21] =?UTF-8?q?=F0=9F=92=9A=20Fix=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/core/test_context.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 43646a196..e1daf5510 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -3,11 +3,12 @@ import lamindb as ln import pytest +from lamindb._finish import clean_error_from_r_notebook_html from lamindb.core._context import context, get_uid_ext -from lamindb.core._finish import clean_error_from_r_notebook_html from lamindb.core.exceptions import TrackNotCalled, ValidationError SCRIPTS_DIR = Path(__file__).parent.resolve() / "scripts" +NOTEBOOKS_DIR = Path(__file__).parent.resolve() / "notebooks" def test_track_with_multi_parents(): @@ -236,8 +237,8 @@ def test_track_notebook_or_script_manually(type): def test_clean_error_from_r_notebook_html(): - comparison_path = SCRIPTS_DIR / "basic-r-notebook.Rmd.html" + comparison_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.cleaned.html" compare = comparison_path.read_text() comparison_path.unlink() - clean_error_from_r_notebook_html(SCRIPTS_DIR / "basic-r-notebook.Rmd.html") + clean_error_from_r_notebook_html(NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html") assert compare == comparison_path.read_text() From c9fc5b2bc21d2a7dd1bb6ec74b2ab922e02c8662 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 19:35:45 +0100 Subject: [PATCH 10/21] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Strip=20error=20mess?= =?UTF-8?q?age=20from=20R=20notebooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 9d5a923f6..fc993f06d 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -95,7 +95,7 @@ def notebook_to_script( # removes NotebookNotSaved error message from notebook html -def clean_error_from_r_notebook_html(file_path: Path): +def clean_error_from_r_notebook_html(file_path: Path) -> Path: import re content = file_path.read_text() @@ -104,6 +104,7 @@ def clean_error_from_r_notebook_html(file_path: Path): cleaned_content = re.sub(pattern, "", content, flags=re.DOTALL) cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}") cleaned_path.write_text(cleaned_content) + return cleaned_path def save_context_core( @@ -239,6 +240,8 @@ def save_context_core( raise NotebookNotSaved( f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`" ) + if is_r_notebook: + report_path = clean_error_from_r_notebook_html(report_path) if run.report_id is not None: hash, _ = hash_file(report_path) # ignore hash_type for now if hash != run.report.hash: From 030675829da94dfee3ee5a94c7343bb56b946feb Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 20:41:26 +0100 Subject: [PATCH 11/21] =?UTF-8?q?=F0=9F=9A=B8=20Improve?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 17 ++++++++--------- .../notebooks/basic-r-notebook.Rmd.cleaned.html | 2 +- tests/core/notebooks/basic-r-notebook.Rmd.html | 4 +--- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index fc993f06d..685ef992c 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -18,6 +18,10 @@ from ._query_set import QuerySet +def get_r_save_notebook_message() -> str: + return f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`" + + def get_shortcut() -> str: import platform @@ -99,9 +103,7 @@ def clean_error_from_r_notebook_html(file_path: Path) -> Path: import re content = file_path.read_text() - # Pattern to match the specific error message that contains NotebookNotSaved - pattern = r"Error in py_call_impl.*?NotebookNotSaved.*?for details\." - cleaned_content = re.sub(pattern, "", content, flags=re.DOTALL) + cleaned_content = content.replace(get_r_save_notebook_message(), "") cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}") cleaned_path.write_text(cleaned_content) return cleaned_path @@ -178,9 +180,8 @@ def save_context_core( hash, _ = hash_file(source_code_path) # ignore hash_type for now if ( transform._source_code_artifact_id is not None - or transform.source_code is not None # equivalent to transform.hash is not None + or transform.hash is not None # .hash is equivalent to .transform ): - print(transform.source_code) # check if the hash of the transform source code matches # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point) ref_hash = ( @@ -200,7 +201,7 @@ def save_context_core( logger.warning("Please re-run `ln.track()` to make a new version") return "rerun-the-notebook" else: - logger.important("source code is already saved") + logger.debug("source code is already saved") else: transform.source_code = source_code_path.read_text() transform.hash = hash @@ -237,9 +238,7 @@ def save_context_core( if not from_cli: if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING: # this can happen when auto-knitting an html with RStudio - raise NotebookNotSaved( - f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`" - ) + raise NotebookNotSaved(get_r_save_notebook_message()) if is_r_notebook: report_path = clean_error_from_r_notebook_html(report_path) if run.report_id is not None: diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html index 468786176..d7fc39ede 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -36,7 +36,7 @@

My exemplary R analysis

-
+
MoreOUTPUT
diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.html b/tests/core/notebooks/basic-r-notebook.Rmd.html index 6b9bf6979..70dac0ae5 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.html @@ -36,9 +36,7 @@

My exemplary R analysis

-
Error in py_call_impl(callable, call_args$unnamed, call_args$named) :
-  lamindb.core.exceptions.NotebookNotSaved: Please save the notebook in RStudio (shortcut `CMD + s`) within 2 sec before calling `db$finish()`
-Run ]8;;rstudio:run:reticulate::py_last_error()`reticulate::py_last_error()`]8;; for details.
+
MoreOUTPUT NotebookNotSaved: Please save the notebook in RStudio (shortcut `CMD + s`) within 2 sec before calling `db$finish()`
From 4159b9a0c4b7f8055c3d7cf2fc6c31bcf2d582fc Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 20:44:59 +0100 Subject: [PATCH 12/21] =?UTF-8?q?=F0=9F=92=9A=20Fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html index d7fc39ede..7b838d55d 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -36,7 +36,7 @@

My exemplary R analysis

-
MoreOUTPUT
+
MoreOUTPUT 
From 27868cd6a7a2dc8a1a6b7c907fd4b6dd1e3fde45 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 21:03:41 +0100 Subject: [PATCH 13/21] =?UTF-8?q?=F0=9F=9A=B8=20Polish?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 19 +++++++++++++++---- .../basic-r-notebook.Rmd.cleaned.html | 2 -- tests/core/test_context.py | 6 +++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 685ef992c..10f9e225e 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -99,11 +99,22 @@ def notebook_to_script( # removes NotebookNotSaved error message from notebook html -def clean_error_from_r_notebook_html(file_path: Path) -> Path: +def clean_r_notebook_html(file_path: Path) -> Path: import re - content = file_path.read_text() - cleaned_content = content.replace(get_r_save_notebook_message(), "") + cleaned_content = ( + file_path.read_text() + ) # at this point cleaned_content is still raw + pattern_title = r"(.*?)" + title_match = re.search(pattern_title, cleaned_content) + if title_match: + title_text = title_match.group(1) + pattern_h1 = f"]*>{re.escape(title_text)}" + cleaned_content = re.sub(pattern_title, "\n", cleaned_content) + cleaned_content = re.sub(pattern_h1, "\n", cleaned_content) + cleaned_content = cleaned_content.replace( + f"NotebookNotSaved: {get_r_save_notebook_message()}", "" + ) cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}") cleaned_path.write_text(cleaned_content) return cleaned_path @@ -240,7 +251,7 @@ def save_context_core( # this can happen when auto-knitting an html with RStudio raise NotebookNotSaved(get_r_save_notebook_message()) if is_r_notebook: - report_path = clean_error_from_r_notebook_html(report_path) + report_path = clean_r_notebook_html(report_path) if run.report_id is not None: hash, _ = hash_file(report_path) # ignore hash_type for now if hash != run.report.hash: diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html index 7b838d55d..0ea382806 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -2,8 +2,6 @@ - My exemplary R analysis -

My exemplary R analysis

diff --git a/tests/core/test_context.py b/tests/core/test_context.py index e1daf5510..026fd2afa 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -3,7 +3,7 @@ import lamindb as ln import pytest -from lamindb._finish import clean_error_from_r_notebook_html +from lamindb._finish import clean_r_notebook_html from lamindb.core._context import context, get_uid_ext from lamindb.core.exceptions import TrackNotCalled, ValidationError @@ -236,9 +236,9 @@ def test_track_notebook_or_script_manually(type): ) -def test_clean_error_from_r_notebook_html(): +def test_clean_r_notebook_html(): comparison_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.cleaned.html" compare = comparison_path.read_text() comparison_path.unlink() - clean_error_from_r_notebook_html(NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html") + clean_r_notebook_html(NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html") assert compare == comparison_path.read_text() From 4a2fb03549a1802bf327735f75360386e8e5755b Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 21:09:00 +0100 Subject: [PATCH 14/21] =?UTF-8?q?=F0=9F=92=9A=20Fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 2 +- lamindb/_finish.py | 4 ++-- tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html | 3 ++- tests/core/notebooks/basic-r-notebook.Rmd.html | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c97ff1a8d..42c769dd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: - id: prettier exclude: | (?x)( - docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml + docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html ) - repo: https://github.com/kynan/nbstripout rev: 0.6.1 diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 10f9e225e..ddda23c11 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -110,8 +110,8 @@ def clean_r_notebook_html(file_path: Path) -> Path: if title_match: title_text = title_match.group(1) pattern_h1 = f"]*>{re.escape(title_text)}" - cleaned_content = re.sub(pattern_title, "\n", cleaned_content) - cleaned_content = re.sub(pattern_h1, "\n", cleaned_content) + cleaned_content = re.sub(pattern_title, "", cleaned_content) + cleaned_content = re.sub(pattern_h1, "", cleaned_content) cleaned_content = cleaned_content.replace( f"NotebookNotSaved: {get_r_save_notebook_message()}", "" ) diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html index 0ea382806..2f3b61590 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -1,8 +1,9 @@ - + + diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.html b/tests/core/notebooks/basic-r-notebook.Rmd.html index 70dac0ae5..e92571a14 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.html @@ -1,5 +1,4 @@ - My exemplary R analysis From 1d79bd014f191c38630f4a3046eebb5202b00241 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 21:12:47 +0100 Subject: [PATCH 15/21] =?UTF-8?q?=F0=9F=92=9A=20Fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 4 ++++ tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 42c769dd3..2794fd384 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,6 +42,10 @@ repos: - id: mixed-line-ending args: [--fix=lf] - id: trailing-whitespace + exclude: | + (?x)( + tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html + ) - id: check-case-conflict - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.7.1 diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html index 2f3b61590..506767ef8 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html @@ -1,8 +1,8 @@ - - + + From 59a17d91b5fe24e1840ba16be5aadb5a0452c0f3 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 21:19:23 +0100 Subject: [PATCH 16/21] =?UTF-8?q?=F0=9F=9A=B8=20Deal=20with=20notebook=20t?= =?UTF-8?q?itles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 9 ++++++--- tests/core/test_context.py | 6 +++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index ddda23c11..7f27d06c8 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -99,7 +99,7 @@ def notebook_to_script( # removes NotebookNotSaved error message from notebook html -def clean_r_notebook_html(file_path: Path) -> Path: +def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]: import re cleaned_content = ( @@ -107,6 +107,7 @@ def clean_r_notebook_html(file_path: Path) -> Path: ) # at this point cleaned_content is still raw pattern_title = r"(.*?)" title_match = re.search(pattern_title, cleaned_content) + title_text = None if title_match: title_text = title_match.group(1) pattern_h1 = f"]*>{re.escape(title_text)}" @@ -117,7 +118,7 @@ def clean_r_notebook_html(file_path: Path) -> Path: ) cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}") cleaned_path.write_text(cleaned_content) - return cleaned_path + return title_text, cleaned_path def save_context_core( @@ -251,7 +252,9 @@ def save_context_core( # this can happen when auto-knitting an html with RStudio raise NotebookNotSaved(get_r_save_notebook_message()) if is_r_notebook: - report_path = clean_r_notebook_html(report_path) + title_text, report_path = clean_r_notebook_html(report_path) + if title_text is not None: + transform.name = title_text if run.report_id is not None: hash, _ = hash_file(report_path) # ignore hash_type for now if hash != run.report.hash: diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 026fd2afa..8385acc40 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -240,5 +240,9 @@ def test_clean_r_notebook_html(): comparison_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.cleaned.html" compare = comparison_path.read_text() comparison_path.unlink() - clean_r_notebook_html(NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html") + title_text, cleaned_path = clean_r_notebook_html( + NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html" + ) + assert comparison_path == cleaned_path + assert title_text == "My exemplary R analysis" assert compare == comparison_path.read_text() From aba4826cafde596d337f81d3d82b26b2e0c5f4de Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 22:47:00 +0100 Subject: [PATCH 17/21] =?UTF-8?q?=F0=9F=9A=B8=20Improve=20load=20and=20sav?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 7 +++++-- sub/lamin-cli | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 7f27d06c8..8e069755b 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -186,7 +186,10 @@ def save_context_core( f"no auto-knitted file {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}" ) else: - report_path = filepath.with_suffix(".nb.html") + if filepath.with_suffix(".nb.html").exists(): + report_path = filepath.with_suffix(".nb.html") + elif filepath.with_suffix(".html").exists(): + report_path = filepath.with_suffix(".html") ln.settings.creation.artifact_silence_missing_run_warning = True # track source code hash, _ = hash_file(source_code_path) # ignore hash_type for now @@ -263,7 +266,7 @@ def save_context_core( ) if response == "y": run.report.replace(report_path) - run.report.save(upload=True) + run.report.save(upload=True, print_progress=False) else: logger.important("keeping old report") else: diff --git a/sub/lamin-cli b/sub/lamin-cli index f4fc5511a..c8c3092ab 160000 --- a/sub/lamin-cli +++ b/sub/lamin-cli @@ -1 +1 @@ -Subproject commit f4fc5511a8d0a3b329e54e6ca1007e4a384a8567 +Subproject commit c8c3092ab2335124a71a67879088b8c5d9d54375 From 8c733a43de64543018ecc29f024e2fc6e4714fdb Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 23:07:07 +0100 Subject: [PATCH 18/21] =?UTF-8?q?=F0=9F=9A=B8=20More=20clarity=20in=20logg?= =?UTF-8?q?ing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/core/_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index 9a7c8c929..cd31b8b75 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -343,7 +343,7 @@ def track( ) if run is not None: # loaded latest run run.started_at = datetime.now(timezone.utc) # update run time - self._logging_message_track += f", started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}" + self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}" if run is None: # create new run run = Run( From e8dfbf62f41272f51500484e1f592d7faa526530 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 23:15:37 +0100 Subject: [PATCH 19/21] =?UTF-8?q?=F0=9F=92=9A=20Fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/core/notebooks/basic-r-notebook.Rmd.html | 2 +- tests/core/test_context.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/core/notebooks/basic-r-notebook.Rmd.html b/tests/core/notebooks/basic-r-notebook.Rmd.html index e92571a14..b0a391c9a 100644 --- a/tests/core/notebooks/basic-r-notebook.Rmd.html +++ b/tests/core/notebooks/basic-r-notebook.Rmd.html @@ -35,7 +35,7 @@

My exemplary R analysis

-
MoreOUTPUT NotebookNotSaved: Please save the notebook in RStudio (shortcut `CMD + s`) within 2 sec before calling `db$finish()`
+
MoreOUTPUT NotebookNotSaved: Please save the notebook in RStudio (shortcut `SHORTCUT`) within 2 sec before calling `db$finish()`
diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 8385acc40..b9977b227 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -3,7 +3,7 @@ import lamindb as ln import pytest -from lamindb._finish import clean_r_notebook_html +from lamindb._finish import clean_r_notebook_html, get_shortcut from lamindb.core._context import context, get_uid_ext from lamindb.core.exceptions import TrackNotCalled, ValidationError @@ -237,12 +237,14 @@ def test_track_notebook_or_script_manually(type): def test_clean_r_notebook_html(): + orig_notebook_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html" + content = orig_notebook_path.read_text() + orig_notebook_path.write_text(content.replace("SHORTCUT", get_shortcut())) comparison_path = NOTEBOOKS_DIR / "basic-r-notebook.Rmd.cleaned.html" compare = comparison_path.read_text() comparison_path.unlink() - title_text, cleaned_path = clean_r_notebook_html( - NOTEBOOKS_DIR / "basic-r-notebook.Rmd.html" - ) + title_text, cleaned_path = clean_r_notebook_html(orig_notebook_path) assert comparison_path == cleaned_path assert title_text == "My exemplary R analysis" assert compare == comparison_path.read_text() + orig_notebook_path.write_text(content.replace(get_shortcut(), "SHORTCUT")) From 3a521c37f6bb2d4dc1b3dcfa5db7d37e8a0db147 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 23:21:19 +0100 Subject: [PATCH 20/21] =?UTF-8?q?=F0=9F=92=9A=20Fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_finish.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lamindb/_finish.py b/lamindb/_finish.py index 8e069755b..31b652126 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -181,15 +181,14 @@ def save_context_core( ) notebook_to_script(transform, filepath, source_code_path) elif is_r_notebook: - if not filepath.with_suffix(".nb.html").exists(): + if filepath.with_suffix(".nb.html").exists(): + report_path = filepath.with_suffix(".nb.html") + elif filepath.with_suffix(".html").exists(): + report_path = filepath.with_suffix(".html") + else: logger.warning( - f"no auto-knitted file {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}" + f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}" ) - else: - if filepath.with_suffix(".nb.html").exists(): - report_path = filepath.with_suffix(".nb.html") - elif filepath.with_suffix(".html").exists(): - report_path = filepath.with_suffix(".html") ln.settings.creation.artifact_silence_missing_run_warning = True # track source code hash, _ = hash_file(source_code_path) # ignore hash_type for now From bbeca00bd15bb018528d641df8284518d52507de Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Sun, 1 Dec 2024 23:28:37 +0100 Subject: [PATCH 21/21] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Upgrade=20lamin-cli?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sub/lamin-cli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sub/lamin-cli b/sub/lamin-cli index c8c3092ab..5b890c9d3 160000 --- a/sub/lamin-cli +++ b/sub/lamin-cli @@ -1 +1 @@ -Subproject commit c8c3092ab2335124a71a67879088b8c5d9d54375 +Subproject commit 5b890c9d396b61833681583dc51e23749c0ed1e3