Skip to content

Commit

Permalink
Merge pull request #977 from ImageMarkup/fix-es-indexing
Browse files Browse the repository at this point in the history
Stop elasticsearch indexing from polluting the cache
  • Loading branch information
danlamanna authored Oct 3, 2024
2 parents 1665f5d + 5df7070 commit 8b1f22e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
16 changes: 9 additions & 7 deletions isic/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def add_to_search_index(image: Image) -> None:


def bulk_add_to_search_index(qs: QuerySet[Image], chunk_size: int = 2_000) -> None:
from opensearchpy.helpers import parallel_bulk
from opensearchpy.helpers import bulk

# The opensearch logger is very noisy when updating records,
# set it to warning during this operation.
Expand All @@ -119,18 +119,20 @@ def bulk_add_to_search_index(qs: QuerySet[Image], chunk_size: int = 2_000) -> No
# Use a generator for lazy evaluation
image_documents = (image.to_elasticsearch_document() for image in qs.iterator())

for success, info in parallel_bulk(
# note we can't use parallel_bulk because the cachalot_disabled context manager
# is thread local.
success, info = bulk(
client=get_elasticsearch_client(),
index=settings.ISIC_ELASTICSEARCH_INDEX,
actions=image_documents,
# The default chunk_size is 2000, but that may be too many models to fit into memory.
# Note the default chunk_size matches QuerySet.iterator
chunk_size=chunk_size,
# the thread count should be limited to avoid exhausting the connection pool
thread_count=2,
):
if not success:
logger.error("Failed to insert document into elasticsearch: %s", info)
max_retries=3,
)

if not success:
logger.error("Failed to insert document into elasticsearch: %s", info)


def _prettify_facets(facets: dict[str, Any]) -> dict[str, Any]:
Expand Down
2 changes: 1 addition & 1 deletion isic/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def share_collection_with_users_task(collection_pk: int, grantor_pk: int, user_p
autoretry_for=(ConnectionError, TimeoutError),
retry_backoff=True,
retry_backoff_max=600,
retry_kwargs={"max_retries": 15},
retry_kwargs={"max_retries": 3},
)
def sync_elasticsearch_index_task():
bulk_add_to_search_index(Image.objects.with_elasticsearch_properties())
Expand Down

0 comments on commit 8b1f22e

Please sign in to comment.