diff --git a/00_parser.ipynb b/00_parser.ipynb index bc0fead..d23c016 100644 --- a/00_parser.ipynb +++ b/00_parser.ipynb @@ -22,10 +22,22 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The dotenv extension is already loaded. To reload it, use:\n", + " %reload_ext dotenv\n" + ] + } + ], "source": [ "#hide\n", - "from nbdev.showdoc import *" + "from nbdev.showdoc import *\n", + "\n", + "%load_ext dotenv\n", + "%dotenv" ] }, { @@ -43,6 +55,7 @@ "source": [ "#export\n", "\n", + "import os\n", "from urllib.parse import urlparse, parse_qsl, parse_qs\n", "import requests\n", "import arrow\n", @@ -60,12 +73,13 @@ " date = '{}Z'.format(date_obj.format('YYYY-MM-DDT00:00:00'))\n", " return date\n", "\n", - "def parse_query(query):\n", + "def parse_query(query, api_version=2):\n", " '''\n", " Converts the parameters of a search using the Trove web interface into a form the API will understand.\n", " \n", " Parameters: \n", " * `query` – the url of a search in the Trove newspapers & gazettes category\n", + " * `api_version` – Trove API version (default is 2)\n", " \n", " Returns: \n", " * a dict containing the parameters (multiple values will be in a list)\n", @@ -124,10 +138,13 @@ " elif key == 'keyword.any':\n", " keywords.append('({})'.format(' OR '.join(value.split())))\n", " elif key in ['l-ArtType', 'l-advArtType', 'l-artType']:\n", - " if value == 'newspapers':\n", - " new_params['zone'] = 'newspaper'\n", - " elif value == 'gazette':\n", - " new_params['zone'] = 'gazette'\n", + " if api_version == 2:\n", + " if value == 'newspapers':\n", + " new_params['zone'] = 'newspaper'\n", + " elif value == 'gazette':\n", + " new_params['zone'] = 'gazette'\n", + " elif api_version == 3:\n", + " new_params['l-artType'] = value\n", " if keywords:\n", " if 'q' in new_params:\n", " new_params['q'] += ' AND {}'.format(' AND '.join(keywords))\n", @@ -145,12 +162,47 @@ " new_params['q'] = date_query\n", " if 'q' not in new_params:\n", " new_params['q'] = ' '\n", - " if 'zone' not in new_params:\n", + " if api_version == 2 and 'zone' not in new_params:\n", " new_params['zone'] = 'newspaper,gazette'\n", + " if api_version == 3 and 'category' not in new_params:\n", + " new_params['category'] = 'newspaper'\n", " # return '{}?{}'.format('https://api.trove.nla.gov.au/v2/result', urlencode(new_params, doseq=True))\n", " return new_params" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "

parse_query[source]

\n", + "\n", + "> parse_query(**`query`**, **`api_version`**=*`2`*)\n", + "\n", + "Converts the parameters of a search using the Trove web interface into a form the API will understand.\n", + "\n", + "Parameters: \n", + "* `query` – the url of a search in the Trove newspapers & gazettes category\n", + "* `api_version` – Trove API version (default is 2)\n", + "\n", + "Returns: \n", + "* a dict containing the parameters (multiple values will be in a list)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_doc(parse_query)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -176,11 +228,12 @@ "data": { "text/plain": [ "{'q': 'wragge',\n", - " 'zone': 'newspaper',\n", + " 'l-artType': 'newspapers',\n", " 'l-state': ['Queensland'],\n", " 'l-category': ['Article'],\n", " 'l-illustrated': 'true',\n", - " 'l-illtype': ['Cartoon']}" + " 'l-illtype': ['Cartoon'],\n", + " 'category': 'newspaper'}" ] }, "execution_count": null, @@ -189,7 +242,7 @@ } ], "source": [ - "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')\n", + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)\n", "params" ] }, @@ -197,60 +250,52 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If you want to use this to get data back from the Trove API, you'll need to add a `key` parameter with your Trove API key. You might also want to change the `encoding` of the results to 'json'. Then you can just give the parameters as `params` to `requests`. For example:\n", + "If you want to use this to get data back from the Trove API, you'll need to provide your Trove API key, either as a query parameter (version 2), or in the request headers (version 3). You might also want to change the `encoding` of the results to 'json'. Then you can just give the parameters as `params` to `requests`. For example:\n", "\n", "``` python\n", - "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')\n", - "params['key'] = 'mYApiKEY'\n", + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)\n", + "headers = {'X-API-KEY': 'mYApiKEY'}\n", "params['encoding'] = 'json'\n", - "response = requests.get('https://api.trove.nla.gov.au/v2/result', params=params)\n", + "params['n'] = 1\n", + "response = requests.get('https://api.trove.nla.gov.au/v3/result', params=params, headers=headers)\n", "data = response.json()\n", "```\n", "\n", "Assuming your API key is valid, this will return the following results:\n", "\n", "``` python\n", - "{'response': {'query': 'wragge',\n", - " 'zone': [{'name': 'newspaper',\n", - " 'records': {'s': '*',\n", - " 'n': '3',\n", - " 'total': '3',\n", - " 'article': [{'id': '76672882',\n", - " 'url': '/newspaper/76672882',\n", - " 'heading': 'THE POLITICAL STIGER YORTEX.',\n", - " 'category': 'Article',\n", - " 'title': {'id': '274',\n", - " 'value': 'The Charleville Times (Brisbane, Qld. : 1896 - 1954)'},\n", - " 'date': '1901-10-12',\n", - " 'page': 4,\n", - " 'pageSequence': 4,\n", - " 'relevance': {'score': '250.99701', 'value': 'very relevant'},\n", - " 'snippet': 'PREMIER PHILP: \"I think that\\'s a better shot than Wragge\\'s.\"',\n", - " 'troveUrl': 'https://trove.nla.gov.au/ndp/del/article/76672882?searchTerm=wragge'},\n", - " {'id': '50294024',\n", - " 'url': '/newspaper/50294024',\n", - " 'heading': 'We nearly broke the drought (. . . WE THINK)',\n", - " 'category': 'Article',\n", - " 'title': {'id': '12',\n", - " 'value': 'The Courier-Mail (Brisbane, Qld. : 1933 - 1954)'},\n", - " 'date': '1952-02-16',\n", - " 'page': 2,\n", - " 'pageSequence': 2,\n", - " 'relevance': {'score': '12.74085', 'value': 'very relevant'},\n", - " 'snippet': 'WE were determined to try our hand at rainmaking, and',\n", - " 'troveUrl': 'https://trove.nla.gov.au/ndp/del/article/50294024?searchTerm=wragge'},\n", - " {'id': '76372015',\n", - " 'url': '/newspaper/76372015',\n", - " 'heading': 'Digest What YOU Eat.',\n", - " 'category': 'Article',\n", - " 'title': {'id': '266',\n", - " 'value': 'The Western Champion and General Advertiser for the Central-Western Districts (Barcaldine, Qld. : 1892 - 1922)'},\n", - " 'date': '1906-01-08',\n", - " 'page': 5,\n", - " 'pageSequence': 5,\n", - " 'relevance': {'score': '5.734701', 'value': 'very relevant'},\n", - " 'snippet': \"The reason why any wholesome food is not properly digested is because the stomach lacks some important element of digestion. Some stomach' lack peptone, others are deficient in gastric juice or hydrochloric\",\n", - " 'troveUrl': 'https://trove.nla.gov.au/ndp/del/article/76372015?searchTerm=wragge'}]}}]}}\n", + "{'query': 'wragge',\n", + " 'category': [{'code': 'newspaper',\n", + " 'name': 'Newspapers & Gazettes',\n", + " 'records': {'s': '*',\n", + " 'n': 2,\n", + " 'total': 510,\n", + " 'next': 'https://api.trove.nla.gov.au/v3/result?q=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrated=true&l-illtype=Cartoon&category=newspaper&encoding=json&n=2&s=AoIIQzWFoig4MjM0NjM1NA%3D%3D',\n", + " 'nextStart': 'AoIIQzWFoig4MjM0NjM1NA==',\n", + " 'article': [{'id': '21765046',\n", + " 'url': 'https://api.trove.nla.gov.au/v3/newspaper/21765046',\n", + " 'heading': 'Mrs. Adelaide Wragge.',\n", + " 'category': 'Article',\n", + " 'title': {'id': '16',\n", + " 'title': 'The Brisbane Courier (Qld. : 1864 - 1933)'},\n", + " 'date': '1931-12-16',\n", + " 'page': '13',\n", + " 'pageSequence': '13',\n", + " 'relevance': {'score': 215.65185546875, 'value': 'very relevant'},\n", + " 'snippet': 'Formerly of Victoria, and in 1864 Mayoress of Melbourne, the late Mrs. Wragge, who died recently, had been',\n", + " 'troveUrl': 'https://.nla.gov.au/nla.news-article21765046?searchTerm=wragge'},\n", + " {'id': '82346354',\n", + " 'url': 'https://api.trove.nla.gov.au/v3/newspaper/82346354',\n", + " 'heading': 'MR WRAGGE ON WEATHER CANNONS.',\n", + " 'category': 'Article',\n", + " 'title': {'id': '269',\n", + " 'title': 'The North Queensland Register (Townsville, Qld. : 1892 - 1905)'},\n", + " 'date': '1901-03-11',\n", + " 'page': '10',\n", + " 'pageSequence': '10',\n", + " 'relevance': {'score': 181.52200317382812, 'value': 'very relevant'},\n", + " 'snippet': 'I have been to Styria, have seen the cannons made in the forges, have witnessed the experiments, have visited Herr Stiger, the inventor of the',\n", + " 'troveUrl': 'https://.nla.gov.au/nla.news-article82346354?searchTerm=wragge'}]}}]}\n", "```" ] }, @@ -265,7 +310,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Tests" + "## Version 2 tests" ] }, { @@ -651,25 +696,409 @@ "assert {'q': 'wragge', 'zone': 'newspaper', 'l-word': '100 - 1000 Words'} == parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advWord=100%20-%201000%20Words')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Version 3 tests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple search with facets" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'q': 'rabbits date:[1900-01-31T00:00:00Z TO 1900-05-30T00:00:00Z]',\n", - " 'zone': 'newspaper,gazette'}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "def query_api(params):\n", + " api_key = os.getenv(\"TROVE_API_KEY\")\n", + " params[\"n\"] = 0\n", + " response = requests.get(\"https://api.trove.nla.gov.au/v3/result\", params=params, headers={\"X-API-KEY\": api_key})\n", + " return response.status_code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20weather', 3)\n", + "assert {'q': 'wragge weather', 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Multiple keywords with `OR` are passed along as is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20OR%20weather', 3)\n", + "assert {'q': 'wragge OR weather', 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Phrase search passed along as is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=%22inclement%20wragge%22', 3)\n", + "assert {'q': '\"inclement wragge\"', 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "More complex queries such as date ranges should be passed along as is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20date%3A%5B1901%20TO%201903%5D&l-artType=newspapers', 3)\n", + "assert {'q': 'wragge date:[1901 TO 1903]', 'category': 'newspaper', 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to gazettes using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=gazette', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-artType': 'gazette'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit state to NSW using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-state=New%20South%20Wales', 3)\n", + "assert {'q': 'wragge', 'l-state': ['New South Wales'], 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit newspaper to SMH using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-title=35', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-title': ['35'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to 'Article' category using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-category=Article', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-category': ['Article'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to specific decade using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-decade=190', 3)\n", + "assert {'q': 'wragge', 'l-artType': 'newspapers', 'l-decade': ['190'], 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to specific year using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-decade=190&l-year=1903', 3)\n", + "assert {'q': 'wragge', 'l-artType': 'newspapers', 'l-decade': ['190'], 'l-year': ['1903'], 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to articles with illustration type of 'Photo' with facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-illustrationType=Photo', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illtype': ['Photo'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to articles containing more than 1,000 words using facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-word=1000%2B%20Words', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-word': ['1000+ Words'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Advanced search" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Multiple keywords in 'Any of these words' box." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword.any=wragge%20weather', 3)\n", + "assert {'q': '(wragge OR weather)', 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Multiple keywords in 'The phrase' box." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword.phrase=inclement%20wragge', 3)\n", + "assert {'q': '\"inclement wragge\"', 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Keywords in 'All of these words' and 'Without these words' boxes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword.not=weather&keyword=wragge', 3)\n", + "assert {'q': 'wragge AND NOT (weather)', 'category': 'newspaper'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to a specific date range." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&date.from=1900-01-01&date.to=1900-02-04&l-advArtType=newspapers', 3)\n", + "assert {'q': 'wragge date:[1899-12-31T00:00:00Z TO 1900-02-04T00:00:00Z]', 'category': 'newspaper', 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to a specific state." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advstate=Queensland', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-state': ['Queensland'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to specific newspapers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advtitle=16&l-advtitle=1055', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-title': ['16', '1055'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to a specific category." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advcategory=Family%20Notices', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-category': ['Family Notices'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to a specific illustration type." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advIllustrationType=Photo', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illtype': ['Photo'], 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Limit to a specific number of words." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=rabbits&date.from=1900-02-01&date.to=1900-05-30')" + "params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advWord=100%20-%201000%20Words', 3)\n", + "assert {'q': 'wragge', 'category': 'newspaper', 'l-word': '100 - 1000 Words', 'l-artType': 'newspapers'} == params\n", + "assert query_api(params) == 200" ] }, { @@ -682,7 +1111,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } diff --git a/README.md b/README.md index fba6c73..0a8a02a 100644 --- a/README.md +++ b/README.md @@ -10,23 +10,26 @@ * Construct a search in the Trove 'Newspapers and Gazettes' category. * Copy the search url. -* Feed the url to the `parse_query` function. +* Feed the url to the `parse_query` function + +The second parameter to `parse_query` is the Trove API version number. The default is `2` for backwards compatibility. ```python from trove_query_parser.parser import parse_query -parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon') +parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3) ``` {'q': 'wragge', - 'zone': 'newspaper', + 'l-artType': 'newspapers', 'l-state': ['Queensland'], 'l-category': ['Article'], 'l-illustrated': 'true', - 'l-illtype': ['Cartoon']} + 'l-illtype': ['Cartoon'], + 'category': 'newspaper'} diff --git a/docs/index.html b/docs/index.html index eae8764..db31d73 100644 --- a/docs/index.html +++ b/docs/index.html @@ -47,8 +47,9 @@

How to use
  • Construct a search in the Trove 'Newspapers and Gazettes' category.
  • Copy the search url.
  • -
  • Feed the url to the parse_query function.
  • +
  • Feed the url to the parse_query function
  • +

    The second parameter to parse_query is the Trove API version number. The default is 2 for backwards compatibility.

    @@ -62,7 +63,7 @@

    How to use
    from trove_query_parser.parser import parse_query
     
    -parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')
    +parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)
     
    @@ -78,11 +79,12 @@

    How to use
    {'q': 'wragge',
    - 'zone': 'newspaper',
    + 'l-artType': 'newspapers',
      'l-state': ['Queensland'],
      'l-category': ['Article'],
      'l-illustrated': 'true',
    - 'l-illtype': ['Cartoon']}
    + 'l-illtype': ['Cartoon'], + 'category': 'newspaper'} @@ -116,5 +118,5 @@

    Limitations - + diff --git a/docs/parser.html b/docs/parser.html index f1cc05e..7a52803 100644 --- a/docs/parser.html +++ b/docs/parser.html @@ -46,7 +46,7 @@

    Functions

    -

    format_date[source]

    format_date(date, start=False)

    +

    format_date[source]

    format_date(date, start=False)

    The web interface uses YYYY-MM-DD dates, but the API expects YYYY-MM-DDT00:00:00Z. Reformat dates accordingly.

    Also the start date in an API query needs to be set to the day before you want. So if this is a start date, take it back in time by a day.

    @@ -65,6 +65,13 @@

    format_date +

    + {% endraw %} + + {% raw %} + +
    +
    @@ -72,12 +79,13 @@

    format_date -

    parse_query[source]

    parse_query(query)

    +

    parse_query[source]

    parse_query(query, api_version=2)

    Converts the parameters of a search using the Trove web interface into a form the API will understand.

    Parameters:

    • query – the url of a search in the Trove newspapers & gazettes category
    • +
    • api_version – Trove API version (default is 2)

    Returns:

      @@ -91,13 +99,6 @@

      parse_query -

    {% endraw %} @@ -122,7 +123,7 @@

    Basic usage
    -
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)
     params
     
    @@ -139,11 +140,12 @@

    Basic usage
    {'q': 'wragge',
    - 'zone': 'newspaper',
    + 'l-artType': 'newspapers',
      'l-state': ['Queensland'],
      'l-category': ['Article'],
      'l-illustrated': 'true',
    - 'l-illtype': ['Cartoon']}
    + 'l-illtype': ['Cartoon'], + 'category': 'newspaper'}

    @@ -156,55 +158,47 @@

    Basic usage
    -

    If you want to use this to get data back from the Trove API, you'll need to add a key parameter with your Trove API key. You might also want to change the encoding of the results to 'json'. Then you can just give the parameters as params to requests. For example:

    -
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')
    -params['key'] = 'mYApiKEY'
    +

    If you want to use this to get data back from the Trove API, you'll need to provide your Trove API key, either as a query parameter (version 2), or in the request headers (version 3). You might also want to change the encoding of the results to 'json'. Then you can just give the parameters as params to requests. For example:

    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)
    +headers = {'X-API-KEY': 'mYApiKEY'}
     params['encoding'] = 'json'
    -response = requests.get('https://api.trove.nla.gov.au/v2/result', params=params)
    +params['n'] = 1
    +response = requests.get('https://api.trove.nla.gov.au/v3/result', params=params, headers=headers)
     data = response.json()
     

    Assuming your API key is valid, this will return the following results:

    -
    {'response': {'query': 'wragge',
    -  'zone': [{'name': 'newspaper',
    -    'records': {'s': '*',
    -     'n': '3',
    -     'total': '3',
    -     'article': [{'id': '76672882',
    -       'url': '/newspaper/76672882',
    -       'heading': 'THE POLITICAL STIGER YORTEX.',
    -       'category': 'Article',
    -       'title': {'id': '274',
    -        'value': 'The Charleville Times (Brisbane, Qld. : 1896 - 1954)'},
    -       'date': '1901-10-12',
    -       'page': 4,
    -       'pageSequence': 4,
    -       'relevance': {'score': '250.99701', 'value': 'very relevant'},
    -       'snippet': 'PREMIER PHILP: "I think that\'s a better shot than Wragge\'s."',
    -       'troveUrl': 'https://trove.nla.gov.au/ndp/del/article/76672882?searchTerm=wragge'},
    -      {'id': '50294024',
    -       'url': '/newspaper/50294024',
    -       'heading': 'We nearly broke the drought  (. . . WE THINK)',
    -       'category': 'Article',
    -       'title': {'id': '12',
    -        'value': 'The Courier-Mail (Brisbane, Qld. : 1933 - 1954)'},
    -       'date': '1952-02-16',
    -       'page': 2,
    -       'pageSequence': 2,
    -       'relevance': {'score': '12.74085', 'value': 'very relevant'},
    -       'snippet': 'WE were determined to try our hand at rainmaking, and',
    -       'troveUrl': 'https://trove.nla.gov.au/ndp/del/article/50294024?searchTerm=wragge'},
    -      {'id': '76372015',
    -       'url': '/newspaper/76372015',
    -       'heading': 'Digest What YOU Eat.',
    -       'category': 'Article',
    -       'title': {'id': '266',
    -        'value': 'The Western Champion and General Advertiser for the Central-Western Districts (Barcaldine, Qld. : 1892 - 1922)'},
    -       'date': '1906-01-08',
    -       'page': 5,
    -       'pageSequence': 5,
    -       'relevance': {'score': '5.734701', 'value': 'very relevant'},
    -       'snippet': "The reason why any wholesome food is not properly digested is because the stomach lacks some important element of digestion. Some stomach' lack peptone, others are deficient in gastric juice or hydrochloric",
    -       'troveUrl': 'https://trove.nla.gov.au/ndp/del/article/76372015?searchTerm=wragge'}]}}]}}
    +
    {'query': 'wragge',
    + 'category': [{'code': 'newspaper',
    +   'name': 'Newspapers & Gazettes',
    +   'records': {'s': '*',
    +    'n': 2,
    +    'total': 510,
    +    'next': 'https://api.trove.nla.gov.au/v3/result?q=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrated=true&l-illtype=Cartoon&category=newspaper&encoding=json&n=2&s=AoIIQzWFoig4MjM0NjM1NA%3D%3D',
    +    'nextStart': 'AoIIQzWFoig4MjM0NjM1NA==',
    +    'article': [{'id': '21765046',
    +      'url': 'https://api.trove.nla.gov.au/v3/newspaper/21765046',
    +      'heading': 'Mrs. Adelaide Wragge.',
    +      'category': 'Article',
    +      'title': {'id': '16',
    +       'title': 'The Brisbane Courier (Qld. : 1864 - 1933)'},
    +      'date': '1931-12-16',
    +      'page': '13',
    +      'pageSequence': '13',
    +      'relevance': {'score': 215.65185546875, 'value': 'very relevant'},
    +      'snippet': 'Formerly of Victoria, and in 1864 Mayoress of Melbourne, the late Mrs. Wragge, who died recently, had been',
    +      'troveUrl': 'https://.nla.gov.au/nla.news-article21765046?searchTerm=wragge'},
    +     {'id': '82346354',
    +      'url': 'https://api.trove.nla.gov.au/v3/newspaper/82346354',
    +      'heading': 'MR WRAGGE ON WEATHER CANNONS.',
    +      'category': 'Article',
    +      'title': {'id': '269',
    +       'title': 'The North Queensland Register (Townsville, Qld. : 1892 - 1905)'},
    +      'date': '1901-03-11',
    +      'page': '10',
    +      'pageSequence': '10',
    +      'relevance': {'score': 181.52200317382812, 'value': 'very relevant'},
    +      'snippet': 'I have been to Styria, have seen the cannons made in the forges, have witnessed the experiments, have visited Herr Stiger, the inventor of the',
    +      'troveUrl': 'https://.nla.gov.au/nla.news-article82346354?searchTerm=wragge'}]}}]}
     
    @@ -219,7 +213,7 @@

    Basic usage

    -

    Tests

    +

    Version 2 tests

    @@ -787,6 +781,18 @@
    +
    +
    +

    Simple search with facets

    +
    +
    +
    {% raw %} -
    -
    +
    + {% endraw %} -
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20weather', 3)
    +assert {'q': 'wragge weather', 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    +
    + {% endraw %} +
    +
    +

    Multiple keywords with OR are passed along as is.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20OR%20weather', 3)
    +assert {'q': 'wragge OR weather', 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Phrase search passed along as is.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=%22inclement%20wragge%22', 3)
    +assert {'q': '"inclement wragge"', 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    More complex queries such as date ranges should be passed along as is.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20date%3A%5B1901%20TO%201903%5D&l-artType=newspapers', 3)
    +assert {'q': 'wragge date:[1901 TO 1903]', 'category': 'newspaper', 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to gazettes using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=gazette', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-artType': 'gazette'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit state to NSW using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-state=New%20South%20Wales', 3)
    +assert {'q': 'wragge', 'l-state': ['New South Wales'], 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit newspaper to SMH using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-title=35', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-title': ['35'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to 'Article' category using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-category=Article', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-category': ['Article'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to specific decade using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-decade=190', 3)
    +assert {'q': 'wragge', 'l-artType': 'newspapers', 'l-decade': ['190'], 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to specific year using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-decade=190&l-year=1903', 3)
    +assert {'q': 'wragge', 'l-artType': 'newspapers', 'l-decade': ['190'], 'l-year': ['1903'], 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to articles with illustration type of 'Photo' with facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-illustrationType=Photo', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illtype': ['Photo'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to articles containing more than 1,000 words using facets.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-word=1000%2B%20Words', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-word': ['1000+ Words'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +
    +

    Multiple keywords in 'Any of these words' box.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword.any=wragge%20weather', 3)
    +assert {'q': '(wragge OR weather)', 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    -
    -
    {'q': 'rabbits date:[1900-01-31T00:00:00Z TO 1900-05-30T00:00:00Z]',
    - 'zone': 'newspaper,gazette'}
    + {% endraw %} + +
    +
    +

    Multiple keywords in 'The phrase' box.

    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword.phrase=inclement%20wragge', 3)
    +assert {'q': '"inclement wragge"', 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    +
    {% endraw %} +
    +
    +

    Keywords in 'All of these words' and 'Without these words' boxes.

    + +
    - +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword.not=weather&keyword=wragge', 3)
    +assert {'q': 'wragge AND NOT (weather)', 'category': 'newspaper'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to a specific date range.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&date.from=1900-01-01&date.to=1900-02-04&l-advArtType=newspapers', 3)
    +assert {'q': 'wragge date:[1899-12-31T00:00:00Z TO 1900-02-04T00:00:00Z]', 'category': 'newspaper', 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to a specific state.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advstate=Queensland', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-state': ['Queensland'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to specific newspapers.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advtitle=16&l-advtitle=1055', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-title': ['16', '1055'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to a specific category.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advcategory=Family%20Notices', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-category': ['Family Notices'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to a specific illustration type.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advIllustrationType=Photo', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illtype': ['Photo'], 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    +
    +

    Limit to a specific number of words.

    + +
    +
    +
    + {% raw %} + +
    +
    + +
    +
    +
    params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advWord=100%20-%201000%20Words', 3)
    +assert {'q': 'wragge', 'category': 'newspaper', 'l-word': '100 - 1000 Words', 'l-artType': 'newspapers'} == params
    +assert query_api(params) == 200
    +
    + +
    +
    +
    + +
    + {% endraw %} + +
    + + + diff --git a/index.ipynb b/index.ipynb index 709f332..85a0b8b 100644 --- a/index.ipynb +++ b/index.ipynb @@ -31,7 +31,9 @@ "source": [ "* Construct a search in the Trove 'Newspapers and Gazettes' category.\n", "* Copy the search url.\n", - "* Feed the url to the `parse_query` function." + "* Feed the url to the `parse_query` function\n", + "\n", + "The second parameter to `parse_query` is the Trove API version number. The default is `2` for backwards compatibility." ] }, { @@ -43,11 +45,12 @@ "data": { "text/plain": [ "{'q': 'wragge',\n", - " 'zone': 'newspaper',\n", + " 'l-artType': 'newspapers',\n", " 'l-state': ['Queensland'],\n", " 'l-category': ['Article'],\n", " 'l-illustrated': 'true',\n", - " 'l-illtype': ['Cartoon']}" + " 'l-illtype': ['Cartoon'],\n", + " 'category': 'newspaper'}" ] }, "execution_count": null, @@ -58,7 +61,7 @@ "source": [ "from trove_query_parser.parser import parse_query\n", "\n", - "parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')" + "parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)" ] }, { @@ -89,7 +92,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } diff --git a/settings.ini b/settings.ini index 5e68626..3964328 100644 --- a/settings.ini +++ b/settings.ini @@ -13,7 +13,7 @@ author = Tim Sherratt author_email = tim@timsherratt.org copyright = Tim Sherratt branch = master -version = 0.1.1 +version = 0.2.0 min_python = 3.6 audience = Developers language = English diff --git a/trove_query_parser/__init__.py b/trove_query_parser/__init__.py index 485f44a..d3ec452 100644 --- a/trove_query_parser/__init__.py +++ b/trove_query_parser/__init__.py @@ -1 +1 @@ -__version__ = "0.1.1" +__version__ = "0.2.0" diff --git a/trove_query_parser/parser.py b/trove_query_parser/parser.py index 7031fc8..0a2ebfc 100644 --- a/trove_query_parser/parser.py +++ b/trove_query_parser/parser.py @@ -4,6 +4,7 @@ # Cell +import os from urllib.parse import urlparse, parse_qsl, parse_qs import requests import arrow @@ -21,12 +22,13 @@ def format_date(date, start=False): date = '{}Z'.format(date_obj.format('YYYY-MM-DDT00:00:00')) return date -def parse_query(query): +def parse_query(query, api_version=2): ''' Converts the parameters of a search using the Trove web interface into a form the API will understand. Parameters: * `query` – the url of a search in the Trove newspapers & gazettes category + * `api_version` – Trove API version (default is 2) Returns: * a dict containing the parameters (multiple values will be in a list) @@ -85,10 +87,13 @@ def parse_query(query): elif key == 'keyword.any': keywords.append('({})'.format(' OR '.join(value.split()))) elif key in ['l-ArtType', 'l-advArtType', 'l-artType']: - if value == 'newspapers': - new_params['zone'] = 'newspaper' - elif value == 'gazette': - new_params['zone'] = 'gazette' + if api_version == 2: + if value == 'newspapers': + new_params['zone'] = 'newspaper' + elif value == 'gazette': + new_params['zone'] = 'gazette' + elif api_version == 3: + new_params['l-artType'] = value if keywords: if 'q' in new_params: new_params['q'] += ' AND {}'.format(' AND '.join(keywords)) @@ -106,7 +111,9 @@ def parse_query(query): new_params['q'] = date_query if 'q' not in new_params: new_params['q'] = ' ' - if 'zone' not in new_params: + if api_version == 2 and 'zone' not in new_params: new_params['zone'] = 'newspaper,gazette' + if api_version == 3 and 'category' not in new_params: + new_params['category'] = 'newspaper' # return '{}?{}'.format('https://api.trove.nla.gov.au/v2/result', urlencode(new_params, doseq=True)) return new_params \ No newline at end of file