Skip to content

Commit

Permalink
Added another check for arxiv from DOI (#59)
Browse files Browse the repository at this point in the history
* added another check for arxiv
  • Loading branch information
blackadad authored Apr 8, 2024
1 parent 084d4bd commit f8b6b10
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions paperscraper/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,20 @@ async def pmc_to_pdf(pmc_id, path, session: ClientSession) -> None:


async def arxiv_scraper(paper, path, session: ClientSession) -> bool:
if "ArXiv" not in paper["externalIds"]:
return False
arxiv_id = paper["externalIds"]["ArXiv"]
await arxiv_to_pdf(arxiv_id, path, session)
return True
# check doi
# example: 10.48550/arXiv.2305.10379
if "DOI" in paper["externalIds"] and paper["externalIds"]["DOI"].split("/")[
-1
].startswith("arXiv"):
arxiv_id = paper["externalIds"]["DOI"].split("/arXiv.")[-1]
await arxiv_to_pdf(arxiv_id, path, session)
return True
# check if it was somehow set
if "ArXiv" in paper["externalIds"]:
arxiv_id = paper["externalIds"]["ArXiv"]
await arxiv_to_pdf(arxiv_id, path, session)
return True
return False


async def xiv_scraper(paper, path, domain: str, session: ClientSession) -> bool:
Expand Down

0 comments on commit f8b6b10

Please sign in to comment.