Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parameter validation on MastMissions queries #3126

Merged
merged 5 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,13 @@ mast
- Deprecated ``enable_cloud_dataset`` and ``disable_cloud_dataset`` in classes where they
are non-operational. They will be removed in a future release. [#3113]

- Present users with an error when nonexistent query criteria are used in ``mast.MastMissions`` query functions. [#3126]

- Present users with an error when nonexistent query criteria are used in ``mast.Catalogs.query_region`` and
``mast.Catalogs.query_object``. [#3126]

- Handle HLSP data products in ``Observations.get_cloud_uris``. [#3126]

mpc
^^^

Expand Down
3 changes: 3 additions & 0 deletions astroquery/mast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Conf(_config.ConfigNamespace):
ssoserver = _config.ConfigItem(
'https://ssoportal.stsci.edu',
'MAST SSO Portal server.')
catalogs_server = _config.ConfigItem(
'https://catalogs.mast.stsci.edu',
'Catalogs.MAST server.')
timeout = _config.ConfigItem(
600,
'Time limit for requests from the STScI server.')
Expand Down
184 changes: 159 additions & 25 deletions astroquery/mast/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
This module contains various methods for querying MAST collections such as catalogs.
"""

import difflib
from json import JSONDecodeError
import warnings
import os
import time

from requests import HTTPError
from requests import HTTPError, RequestException

import astropy.units as u
import astropy.coordinates as coord
Expand All @@ -21,7 +23,7 @@
from ..utils.class_or_instance import class_or_instance
from ..exceptions import InvalidQueryError, MaxResultsWarning, InputWarning

from . import utils
from . import utils, conf
from .core import MastQueryWithLogin


Expand All @@ -42,11 +44,13 @@

services = {"panstarrs": {"path": "panstarrs/{data_release}/{table}.json",
"args": {"data_release": "dr2", "table": "mean"}}}
self._catalogs_mast_search_options = ['columns', 'sort_by', 'table', 'data_release']

self._service_api_connection.set_service_params(services, "catalogs", True)

self.catalog_limit = None
self._current_connection = None
self._service_columns = dict() # Info about columns for Catalogs.MAST services

def _parse_result(self, response, *, verbose=False):

Expand All @@ -58,9 +62,102 @@

return results_table

def _get_service_col_config(self, catalog, release='dr2', table='mean'):
"""
For a given Catalogs.MAST catalog, return a list of all searchable columns and their descriptions.
As of now, this function is exclusive to the Pan-STARRS catalog.

Parameters
----------
catalog : str
The catalog to be queried.
release : str, optional
Catalog data release to query from.
table : str, optional
Catalog table to query from.

Returns
-------
response : `~astropy.table.Table` that contains columns names, types, and descriptions
"""
# Only supported for PanSTARRS currently
if catalog != 'panstarrs':
return

Check warning on line 85 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L85

Added line #L85 was not covered by tests

service_key = (catalog, release, table)
if service_key not in self._service_columns:
try:
# Send server request to get column list for given parameters
request_url = f'{conf.catalogs_server}/api/v0.1/{catalog}/{release}/{table}/metadata.json'
resp = utils._simple_request(request_url)

# Parse JSON and extract necessary info
results = resp.json()
rows = [
(result['column_name'], result['db_type'], result['description'])
for result in results
]

# Create Table with parsed data
col_table = Table(rows=rows, names=('name', 'data_type', 'description'))
self._service_columns[service_key] = col_table

except JSONDecodeError as ex:
raise JSONDecodeError(f'Failed to decode JSON response while attempting to get column list'

Check warning on line 106 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L105-L106

Added lines #L105 - L106 were not covered by tests
f' for {catalog} catalog {table}, {release}: {ex}')
except RequestException as ex:
raise ConnectionError(f'Failed to connect to the server while attempting to get column list'

Check warning on line 109 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L108-L109

Added lines #L108 - L109 were not covered by tests
f' for {catalog} catalog {table}, {release}: {ex}')
except KeyError as ex:
raise KeyError(f'Expected key not found in response data while attempting to get column list'

Check warning on line 112 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L111-L112

Added lines #L111 - L112 were not covered by tests
f' for {catalog} catalog {table}, {release}: {ex}')
except Exception as ex:
raise RuntimeError(f'An unexpected error occurred while attempting to get column list'

Check warning on line 115 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L114-L115

Added lines #L114 - L115 were not covered by tests
f' for {catalog} catalog {table}, {release}: {ex}')

return self._service_columns[service_key]

def _validate_service_criteria(self, catalog, **criteria):
"""
Check that criteria keyword arguments are valid column names for the service.
Raises InvalidQueryError if a criteria argument is invalid.

Parameters
----------
catalog : str
The catalog to be queried.
**criteria
Keyword arguments representing criteria filters to apply.

Raises
-------
InvalidQueryError
If a keyword does not match any valid column names, an error is raised that suggests the closest
matching column name, if available.
"""
# Ensure that self._service_columns is populated
release = criteria.get('data_release', 'dr2')
table = criteria.get('table', 'mean')
col_config = self._get_service_col_config(catalog, release, table)

if col_config:
# Check each criteria argument for validity
valid_cols = list(col_config['name']) + self._catalogs_mast_search_options
for kwd in criteria.keys():
col = next((name for name in valid_cols if name.lower() == kwd.lower()), None)
if not col:
closest_match = difflib.get_close_matches(kwd, valid_cols, n=1)
error_msg = (

Check warning on line 150 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L149-L150

Added lines #L149 - L150 were not covered by tests
f"Filter '{kwd}' does not exist for {catalog} catalog {table}, {release}. "
f"Did you mean '{closest_match[0]}'?"
if closest_match
else f"Filter '{kwd}' does not exist for {catalog} catalog {table}, {release}."
)
raise InvalidQueryError(error_msg)

Check warning on line 156 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L156

Added line #L156 was not covered by tests

@class_or_instance
def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
version=None, pagesize=None, page=None, **kwargs):
version=None, pagesize=None, page=None, **criteria):
"""
Given a sky position and radius, returns a list of catalog entries.
See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
Expand Down Expand Up @@ -88,10 +185,18 @@
Default None.
Can be used to override the default behavior of all results being returned to obtain a
specific page of results.
**kwargs
**criteria
Other catalog-specific keyword args.
These can be found in the (service documentation)[https://mast.stsci.edu/api/v0/_services.html]
for specific catalogs. For example one can specify the magtype for an HSC search.
for specific catalogs. For example, one can specify the magtype for an HSC search.
For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
should be either an acceptable value for that parameter, or a list consisting values, or tuples of
decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
consisting of a list of column names. Results may also be sorted through the query with the parameter
sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
Detailed information of Catalogs.MAST criteria usage can
be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.

Returns
-------
Expand All @@ -113,9 +218,20 @@
if catalog.lower() in self._service_api_connection.SERVICES:
self._current_connection = self._service_api_connection
service = catalog

# validate user criteria
self._validate_service_criteria(catalog.lower(), **criteria)

# adding additional user specified parameters
for prop, value in criteria.items():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance to validate these, or there is way to large a scatter in what's allowed for each of these catalogs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, the only service that runs through Catalogs.MAST (currently), and thus the service API connection, is PanSTARRS. I added Catalogs._get_service_col_config and Catalogs._validate_service_criteria to fetch the column metadata for PanSTARRS and check parameters.

params[prop] = value

else:
self._current_connection = self._portal_api_connection

# valid criteria keywords
valid_criteria = []

# Sorting out the non-standard portal service names
if catalog.lower() == "hsc":
if version == 2:
Expand All @@ -125,19 +241,20 @@
warnings.warn("Invalid HSC version number, defaulting to v3.", InputWarning)
service = "Mast.Hsc.Db.v3"

self.catalog_limit = kwargs.get('nr', 50000)

# Hsc specific parameters (can be overridden by user)
params['nr'] = 50000
params['ni'] = 1
params['magtype'] = 1
self.catalog_limit = criteria.pop('nr', 50000)
valid_criteria = ['nr', 'ni', 'magtype']
params['nr'] = self.catalog_limit
params['ni'] = criteria.pop('ni', 1)
params['magtype'] = criteria.pop('magtype', 1)

elif catalog.lower() == "galex":
service = "Mast.Galex.Catalog"
self.catalog_limit = kwargs.get('maxrecords', 50000)
self.catalog_limit = criteria.get('maxrecords', 50000)

# galex specific parameters (can be overridden by user)
params['maxrecords'] = 50000
valid_criteria = ['maxrecords']
params['maxrecords'] = criteria.pop('maxrecords', 50000)

elif catalog.lower() == "gaia":
if version == 1:
Expand All @@ -158,9 +275,16 @@
service = "Mast.Catalogs." + catalog + ".Cone"
self.catalog_limit = None

# adding additional user specified parameters
for prop, value in kwargs.items():
params[prop] = value
# additional user-specified parameters are not valid
if criteria:
key = next(iter(criteria))
closest_match = difflib.get_close_matches(key, valid_criteria, n=1)
error_msg = (

Check warning on line 282 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L280-L282

Added lines #L280 - L282 were not covered by tests
f"Filter '{key}' does not exist for catalog {catalog}. Did you mean '{closest_match[0]}'?"
if closest_match
else f"Filter '{key}' does not exist for catalog {catalog}."
)
raise InvalidQueryError(error_msg)

Check warning on line 287 in astroquery/mast/collections.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/collections.py#L287

Added line #L287 was not covered by tests

# Parameters will be passed as JSON objects only when accessing the PANSTARRS API
use_json = catalog.lower() == 'panstarrs'
Expand All @@ -170,7 +294,7 @@

@class_or_instance
def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
pagesize=None, page=None, version=None, **kwargs):
pagesize=None, page=None, version=None, **criteria):
"""
Given an object name, returns a list of catalog entries.
See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
Expand All @@ -197,10 +321,18 @@
to obtain a specific page of results.
version : int, optional
Version number for catalogs that have versions. Default is highest version.
**kwargs
**criteria
Catalog-specific keyword args.
These can be found in the `service documentation <https://mast.stsci.edu/api/v0/_services.html>`__.
for specific catalogs. For example one can specify the magtype for an HSC search.
for specific catalogs. For example, one can specify the magtype for an HSC search.
For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
should be either an acceptable value for that parameter, or a list consisting values, or tuples of
decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
consisting of a list of column names. Results may also be sorted through the query with the parameter
sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
Detailed information of Catalogs.MAST criteria usage can
be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.

Returns
-------
Expand All @@ -215,7 +347,7 @@
version=version,
pagesize=pagesize,
page=page,
**kwargs)
**criteria)

@class_or_instance
def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria):
Expand Down Expand Up @@ -281,6 +413,9 @@
self._current_connection = self._service_api_connection
service = catalog

# validate user criteria
self._validate_service_criteria(catalog.lower(), **criteria)

if not self._current_connection.check_catalogs_criteria_params(criteria):
raise InvalidQueryError("At least one non-positional criterion must be supplied.")

Expand All @@ -295,26 +430,25 @@
if coordinates or objectname:
service += ".Position"
service += ".Rows" # Using the rowstore version of the query for speed
filters = self._current_connection.build_filter_set("Mast.Catalogs.Tess.Cone",
service, **criteria)
column_config_name = "Mast.Catalogs.Tess.Cone"
params["columns"] = "*"
elif catalog.lower() == "ctl":
service = "Mast.Catalogs.Filtered.Ctl"
if coordinates or objectname:
service += ".Position"
service += ".Rows" # Using the rowstore version of the query for speed
filters = self._current_connection.build_filter_set("Mast.Catalogs.Tess.Cone",
service, **criteria)
column_config_name = "Mast.Catalogs.Tess.Cone"
params["columns"] = "*"
elif catalog.lower() == "diskdetective":
service = "Mast.Catalogs.Filtered.DiskDetective"
if coordinates or objectname:
service += ".Position"
filters = self._current_connection.build_filter_set("Mast.Catalogs.Dd.Cone",
service, **criteria)
column_config_name = "Mast.Catalogs.Dd.Cone"
else:
raise InvalidQueryError("Criteria query not available for {}".format(catalog))

filters = self._current_connection.build_filter_set(column_config_name, service, **criteria)

if not filters:
raise InvalidQueryError("At least one non-positional criterion must be supplied.")
params["filters"] = filters
Expand Down
Loading