Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task master cancel #71

Merged
merged 9 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,27 +40,33 @@ def tokenize_record(request: Request) -> responses.PlainTextResponse:
def tokenize_calculated_attribute(
request: AttributeTokenizationRequest,
) -> responses.PlainTextResponse:
task_manager.start_tokenization_task(
record_tokenization_task_id = task_manager.start_tokenization_task(
request.project_id,
request.user_id,
enums.TokenizationTaskTypes.ATTRIBUTE.value,
request.include_rats,
False,
request.attribute_id,
)
return responses.PlainTextResponse(status_code=status.HTTP_200_OK)
return responses.JSONResponse(
content={"tokenization_task_id": str(record_tokenization_task_id)},
status_code=status.HTTP_200_OK,
)


@app.post("/tokenize_project")
def tokenize_project(request: Request) -> responses.PlainTextResponse:
task_manager.start_tokenization_task(
record_tokenization_task_id = task_manager.start_tokenization_task(
request.project_id,
request.user_id,
enums.TokenizationTaskTypes.PROJECT.value,
request.include_rats,
request.only_uploaded_attributes,
)
return responses.PlainTextResponse(status_code=status.HTTP_200_OK)
return responses.JSONResponse(
content={"tokenization_task_id": str(record_tokenization_task_id)},
status_code=status.HTTP_200_OK,
)


# rats = record_attribute_token_statistics
Expand Down
17 changes: 12 additions & 5 deletions controller/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ def start_tokenization_task(
attribute_name,
include_rats,
)
return status.HTTP_200_OK
record_tokenization_task_id = None
if task:
record_tokenization_task_id = task.id
return record_tokenization_task_id


def start_rats_task(
Expand All @@ -87,7 +90,9 @@ def start_rats_task(
only_uploaded_attributes: bool = False,
attribute_id: Optional[str] = None,
) -> int:
if tokenization.is_doc_bin_creation_running_or_queued(project_id, only_running=True):
if tokenization.is_doc_bin_creation_running_or_queued(
project_id, only_running=True
):
# at the end of doc bin creation rats will be calculated
return

Expand All @@ -102,9 +107,11 @@ def start_rats_task(
project_id,
user_id,
enums.TokenizerTask.TYPE_TOKEN_STATISTICS.value,
scope=enums.RecordTokenizationScope.ATTRIBUTE.value
if attribute_id
else enums.RecordTokenizationScope.PROJECT.value,
scope=(
enums.RecordTokenizationScope.ATTRIBUTE.value
if attribute_id
else enums.RecordTokenizationScope.PROJECT.value
),
attribute_name=attribute_name,
with_commit=True,
)
Expand Down
58 changes: 47 additions & 11 deletions controller/tokenization_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,16 @@ def tokenize_calculated_attribute(
record_tokenized_entries[x : x + chunk_size]
for x in range(0, len(record_tokenized_entries), chunk_size)
]
tokenization_cancelled = False
for idx, chunk in enumerate(chunks):
record_tokenization_task = tokenization.get(project_id, task_id)
if (
not record_tokenization_task
or record_tokenization_task.state
== enums.TokenizerTask.STATE_FAILED.value
):
tokenization_cancelled = True
break
values = [
add_attribute_to_docbin(tokenizer, record_tokenized_item)
for record_tokenized_item in chunk
Expand All @@ -69,9 +78,20 @@ def tokenize_calculated_attribute(
update_tokenization_progress(
project_id, tokenization_task, progress_per_chunk
)
finalize_task(
project_id, user_id, non_text_attributes, tokenization_task, include_rats
)
if not tokenization_cancelled:
finalize_task(
project_id,
user_id,
non_text_attributes,
tokenization_task,
include_rats,
)
else:
send_websocket_update(
project_id,
False,
["docbin", "state", str(record_tokenization_task.state)],
)
except Exception:
__handle_error(project_id, user_id, task_id)
finally:
Expand Down Expand Up @@ -116,7 +136,16 @@ def tokenize_initial_project(
chunks = [
records[x : x + chunk_size] for x in range(0, len(records), chunk_size)
]
tokenization_cancelled = False
for idx, record_chunk in enumerate(chunks):
record_tokenization_task = tokenization.get(project_id, task_id)
if (
not record_tokenization_task
or record_tokenization_task.state
== enums.TokenizerTask.STATE_FAILED.value
):
tokenization_cancelled = True
break
entries = []
for record_item in record_chunk:
if __remove_from_priority_queue(project_id, record_item.id):
Expand All @@ -131,14 +160,21 @@ def tokenize_initial_project(
update_tokenization_progress(
project_id, tokenization_task, progress_per_chunk
)
finalize_task(
project_id,
user_id,
non_text_attributes,
tokenization_task,
include_rats,
only_uploaded_attributes,
)
if not tokenization_cancelled:
finalize_task(
project_id,
user_id,
non_text_attributes,
tokenization_task,
include_rats,
only_uploaded_attributes,
)
else:
send_websocket_update(
project_id,
False,
["docbin", "state", str(record_tokenization_task.state)],
)
except Exception:
__handle_error(project_id, user_id, task_id)
finally:
Expand Down
Loading