Skip to content

Commit

Permalink
Log first and last docs before uploading the rest to elastic
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanRublev committed Jun 21, 2024
1 parent 81a7c67 commit c1e70ea
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/llm_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def on_finish_fn():
else:
st.session_state.first_last_doc = [docs[0]]
docs.pop(0)
logger.info(f"First and last docs: {st.session_state.first_last_doc}")
_upload_docs_to_elastic(docs, pdf_hash, on_finish_fn)

if st.session_state.input_disabled:
Expand Down Expand Up @@ -195,7 +196,7 @@ def _docs_from_chunks(_chunks, cache_data_pdf_hash):
for chunk in chunks:
text = re.sub(r"[^\x20-\x7E]+", " ", chunk.page_content)
page = chunk.metadata["page"]
text = f"Page {page + 1}:" + text
text = f"Page {page + 1}: " + text
clean_chunks.append(text)

# We have one chunk = one page of original PDF,
Expand Down

0 comments on commit c1e70ea

Please sign in to comment.