From ff4056781b085693cd6daf07ad3a5c4568d529ab Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Thu, 19 Sep 2024 11:25:39 +0200 Subject: [PATCH 1/8] cancel tokenization --- controller/tokenization_manager.py | 28 ++++++++++++++++++++-------- submodules/model | 2 +- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/controller/tokenization_manager.py b/controller/tokenization_manager.py index f325f2c..3f8ec27 100644 --- a/controller/tokenization_manager.py +++ b/controller/tokenization_manager.py @@ -116,7 +116,12 @@ def tokenize_initial_project( chunks = [ records[x : x + chunk_size] for x in range(0, len(records), chunk_size) ] + tokenization_cancelled = False for idx, record_chunk in enumerate(chunks): + record_tokenization_task = tokenization.get(project_id, task_id) + if record_tokenization_task.state == enums.TokenizerTask.STATE_FAILED.value: + tokenization_cancelled = True + break entries = [] for record_item in record_chunk: if __remove_from_priority_queue(project_id, record_item.id): @@ -131,14 +136,21 @@ def tokenize_initial_project( update_tokenization_progress( project_id, tokenization_task, progress_per_chunk ) - finalize_task( - project_id, - user_id, - non_text_attributes, - tokenization_task, - include_rats, - only_uploaded_attributes, - ) + if not tokenization_cancelled: + finalize_task( + project_id, + user_id, + non_text_attributes, + tokenization_task, + include_rats, + only_uploaded_attributes, + ) + else: + send_websocket_update( + project_id, + False, + ["docbin", "state", str(record_tokenization_task.state)], + ) except Exception: __handle_error(project_id, user_id, task_id) finally: diff --git a/submodules/model b/submodules/model index dfb4279..f5116da 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit dfb42793e0a5178c9a793be69b08f696c46fd27d +Subproject commit f5116dae0f644ed96ab33f8fd34745027f553139 From f91c0dcb8e7b28edee1b9bc467b0e912496599d5 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Sep 2024 10:03:52 +0200 Subject: [PATCH 2/8] cancel tokenization --- controller/tokenization_manager.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/controller/tokenization_manager.py b/controller/tokenization_manager.py index 3f8ec27..69743ee 100644 --- a/controller/tokenization_manager.py +++ b/controller/tokenization_manager.py @@ -54,7 +54,12 @@ def tokenize_calculated_attribute( record_tokenized_entries[x : x + chunk_size] for x in range(0, len(record_tokenized_entries), chunk_size) ] + tokenization_cancelled = False for idx, chunk in enumerate(chunks): + record_tokenization_task = tokenization.get(project_id, task_id) + if record_tokenization_task.state == enums.TokenizerTask.STATE_FAILED.value: + tokenization_cancelled = True + break values = [ add_attribute_to_docbin(tokenizer, record_tokenized_item) for record_tokenized_item in chunk @@ -69,9 +74,20 @@ def tokenize_calculated_attribute( update_tokenization_progress( project_id, tokenization_task, progress_per_chunk ) - finalize_task( - project_id, user_id, non_text_attributes, tokenization_task, include_rats - ) + if not tokenization_cancelled: + finalize_task( + project_id, + user_id, + non_text_attributes, + tokenization_task, + include_rats, + ) + else: + send_websocket_update( + project_id, + False, + ["docbin", "state", str(record_tokenization_task.state)], + ) except Exception: __handle_error(project_id, user_id, task_id) finally: From addc90f4c337d5fb25d482856ed10d1171d1b3b1 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Sep 2024 10:05:19 +0200 Subject: [PATCH 3/8] model --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index f5116da..c0804bc 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit f5116dae0f644ed96ab33f8fd34745027f553139 +Subproject commit c0804bcb42c509137faa0e5a9131ba5a61c103c6 From 61019f1931ff49e976413e126369f4ff3ccf7bf2 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 20 Sep 2024 13:37:31 +0200 Subject: [PATCH 4/8] returning tok id --- app.py | 14 ++++++++++---- controller/task_manager.py | 17 ++++++++++++----- submodules/model | 2 +- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index f75d246..ca377a0 100644 --- a/app.py +++ b/app.py @@ -40,7 +40,7 @@ def tokenize_record(request: Request) -> responses.PlainTextResponse: def tokenize_calculated_attribute( request: AttributeTokenizationRequest, ) -> responses.PlainTextResponse: - task_manager.start_tokenization_task( + record_tokenization_task_id = task_manager.start_tokenization_task( request.project_id, request.user_id, enums.TokenizationTaskTypes.ATTRIBUTE.value, @@ -48,19 +48,25 @@ def tokenize_calculated_attribute( False, request.attribute_id, ) - return responses.PlainTextResponse(status_code=status.HTTP_200_OK) + return responses.JSONResponse( + content={"tokenization_task_id": str(record_tokenization_task_id)}, + status_code=status.HTTP_200_OK, + ) @app.post("/tokenize_project") def tokenize_project(request: Request) -> responses.PlainTextResponse: - task_manager.start_tokenization_task( + record_tokenization_task_id = task_manager.start_tokenization_task( request.project_id, request.user_id, enums.TokenizationTaskTypes.PROJECT.value, request.include_rats, request.only_uploaded_attributes, ) - return responses.PlainTextResponse(status_code=status.HTTP_200_OK) + return responses.JSONResponse( + content={"tokenization_task_id": str(record_tokenization_task_id)}, + status_code=status.HTTP_200_OK, + ) # rats = record_attribute_token_statistics diff --git a/controller/task_manager.py b/controller/task_manager.py index f191088..214bbc8 100644 --- a/controller/task_manager.py +++ b/controller/task_manager.py @@ -78,7 +78,10 @@ def start_tokenization_task( attribute_name, include_rats, ) - return status.HTTP_200_OK + record_tokenization_task_id = None + if task: + record_tokenization_task_id = task.id + return record_tokenization_task_id def start_rats_task( @@ -87,7 +90,9 @@ def start_rats_task( only_uploaded_attributes: bool = False, attribute_id: Optional[str] = None, ) -> int: - if tokenization.is_doc_bin_creation_running_or_queued(project_id, only_running=True): + if tokenization.is_doc_bin_creation_running_or_queued( + project_id, only_running=True + ): # at the end of doc bin creation rats will be calculated return @@ -102,9 +107,11 @@ def start_rats_task( project_id, user_id, enums.TokenizerTask.TYPE_TOKEN_STATISTICS.value, - scope=enums.RecordTokenizationScope.ATTRIBUTE.value - if attribute_id - else enums.RecordTokenizationScope.PROJECT.value, + scope=( + enums.RecordTokenizationScope.ATTRIBUTE.value + if attribute_id + else enums.RecordTokenizationScope.PROJECT.value + ), attribute_name=attribute_name, with_commit=True, ) diff --git a/submodules/model b/submodules/model index c0804bc..7892195 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit c0804bcb42c509137faa0e5a9131ba5a61c103c6 +Subproject commit 7892195cab4f44cb984e724c457c8d0e283b1900 From 1447cf2e3d3508f2d73752c358241e13c0cd60b7 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 25 Sep 2024 12:52:07 +0200 Subject: [PATCH 5/8] model --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 7892195..bc93d26 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 7892195cab4f44cb984e724c457c8d0e283b1900 +Subproject commit bc93d2640fffba59512dcf37c7df7cc9199c9cac From 3fd449ad21b979cdd4d763f8a671880b979377f2 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 25 Sep 2024 13:04:26 +0200 Subject: [PATCH 6/8] error handling --- controller/tokenization_manager.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/controller/tokenization_manager.py b/controller/tokenization_manager.py index 69743ee..44f51e8 100644 --- a/controller/tokenization_manager.py +++ b/controller/tokenization_manager.py @@ -57,7 +57,11 @@ def tokenize_calculated_attribute( tokenization_cancelled = False for idx, chunk in enumerate(chunks): record_tokenization_task = tokenization.get(project_id, task_id) - if record_tokenization_task.state == enums.TokenizerTask.STATE_FAILED.value: + if ( + not record_tokenization_task + or record_tokenization_task.state + == enums.TokenizerTask.STATE_FAILED.value + ): tokenization_cancelled = True break values = [ @@ -135,7 +139,11 @@ def tokenize_initial_project( tokenization_cancelled = False for idx, record_chunk in enumerate(chunks): record_tokenization_task = tokenization.get(project_id, task_id) - if record_tokenization_task.state == enums.TokenizerTask.STATE_FAILED.value: + if ( + not record_tokenization_task + or record_tokenization_task.state + == enums.TokenizerTask.STATE_FAILED.value + ): tokenization_cancelled = True break entries = [] From 5cc30ea2a473890b2f20da6bf64112f69c24183a Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 27 Sep 2024 16:19:18 +0200 Subject: [PATCH 7/8] model merge --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index bc93d26..4dcea6e 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit bc93d2640fffba59512dcf37c7df7cc9199c9cac +Subproject commit 4dcea6e20b5a26ecd3cc48884c3f040a0605a489 From 1b97a214baef91f96114cf23970f8d492975d4b3 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 27 Sep 2024 16:42:42 +0200 Subject: [PATCH 8/8] model update --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 4dcea6e..454a0e8 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 4dcea6e20b5a26ecd3cc48884c3f040a0605a489 +Subproject commit 454a0e84f7b6f9d81ae3ddd8908e6ac36afec992