diff --git a/src/llm_pdf.py b/src/llm_pdf.py index 75e6542..b46fa65 100644 --- a/src/llm_pdf.py +++ b/src/llm_pdf.py @@ -140,6 +140,7 @@ def on_finish_fn(): else: st.session_state.first_last_doc = [docs[0]] docs.pop(0) + logger.info(f"First and last docs: {st.session_state.first_last_doc}") _upload_docs_to_elastic(docs, pdf_hash, on_finish_fn) if st.session_state.input_disabled: @@ -195,7 +196,7 @@ def _docs_from_chunks(_chunks, cache_data_pdf_hash): for chunk in chunks: text = re.sub(r"[^\x20-\x7E]+", " ", chunk.page_content) page = chunk.metadata["page"] - text = f"Page {page + 1}:" + text + text = f"Page {page + 1}: " + text clean_chunks.append(text) # We have one chunk = one page of original PDF,