-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(deps): update minor updates (#20)
* chore(deps): update minor updates * ruff: format * ruff: cleanup --------- Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Jakob van Santen <[email protected]>
- Loading branch information
1 parent
0fe6b9e
commit 1b2cc3e
Showing
7 changed files
with
496 additions
and
386 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,7 +26,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -78,27 +78,26 @@ | |
], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"from healpy import ang2pix\n", | ||
"from extcats import CatalogPusher\n", | ||
"\n", | ||
"# build the pusher object and point it to the raw files.\n", | ||
"wisep = CatalogPusher.CatalogPusher(\n", | ||
" catalog_name = 'wise',\n", | ||
" data_source = '../testdata/AllWISE/',\n", | ||
" file_type = \".bz2\")\n", | ||
" catalog_name=\"wise\", data_source=\"../testdata/AllWISE/\", file_type=\".bz2\"\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"# read column names and types from schema file\n", | ||
"schema_file = \"../testdata/AllWISE/wise-allwise-cat-schema.txt\"\n", | ||
"names, types = [], {}\n", | ||
"with open(schema_file) as schema:\n", | ||
" for l in schema:\n", | ||
" if \"#\" in l or (not l.strip()):\n", | ||
" for line in schema:\n", | ||
" if \"#\" in line or (not line.strip()):\n", | ||
" continue\n", | ||
" name, dtype = zip(\n", | ||
" [p.strip() for p in l.strip().split(\" \") if not p in [\"\"]])\n", | ||
" name, dtype = zip([p.strip() for p in line.strip().split(\" \") if p not in [\"\"]])\n", | ||
" name, dtype = name[0], dtype[0]\n", | ||
" #print (name, dtype)\n", | ||
" # print (name, dtype)\n", | ||
" names.append(name)\n", | ||
" # convert the data type\n", | ||
" if \"char\" in dtype:\n", | ||
|
@@ -114,60 +113,65 @@ | |
" elif dtype == \"int8\":\n", | ||
" types[name] = np.int8\n", | ||
" else:\n", | ||
" print(\"unknown data type: %s\"%dtype)\n", | ||
" print(\"unknown data type: %s\" % dtype)\n", | ||
"\n", | ||
"# select the columns you want to use.\n", | ||
"use_cols = []\n", | ||
"select = [\"Basic Position and Identification Information\", \n", | ||
" \"Primary Photometric Information\", \n", | ||
" \"Measurement Quality and Source Reliability Information\",\n", | ||
" \"2MASS PSC Association Information\"]\n", | ||
"select = [\n", | ||
" \"Basic Position and Identification Information\",\n", | ||
" \"Primary Photometric Information\",\n", | ||
" \"Measurement Quality and Source Reliability Information\",\n", | ||
" \"2MASS PSC Association Information\",\n", | ||
"]\n", | ||
"with open(schema_file) as schema:\n", | ||
" blocks = schema.read().split(\"#\")\n", | ||
" for block in blocks:\n", | ||
" if any([k in block for k in select]):\n", | ||
" for l in block.split(\"\\n\")[1:]:\n", | ||
" if \"#\" in l or (not l.strip()):\n", | ||
" for line in block.split(\"\\n\")[1:]:\n", | ||
" if \"#\" in line or (not line.strip()):\n", | ||
" continue\n", | ||
" name, dtype = zip(\n", | ||
" [p.strip() for p in l.strip().split(\" \") if not p in [\"\"]])\n", | ||
" [p.strip() for p in line.strip().split(\" \") if p not in [\"\"]]\n", | ||
" )\n", | ||
" use_cols.append(name[0])\n", | ||
"print(\"we will be using %d columns out of %d\"%(len(use_cols), len(names)))\n", | ||
"print(\"we will be using %d columns out of %d\" % (len(use_cols), len(names)))\n", | ||
"\n", | ||
"# now assign the reader to the catalog pusher object\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"wisep.assign_file_reader(\n", | ||
" reader_func = pd.read_csv, \n", | ||
" read_chunks = True,\n", | ||
" names = names,\n", | ||
" usecols = lambda x : x in use_cols,\n", | ||
" #dtype = types, #this mess up with NaN values\n", | ||
" chunksize=5000,\n", | ||
" header=None,\n", | ||
" engine='c',\n", | ||
" sep='|',\n", | ||
" na_values = 'nnnn')\n", | ||
" reader_func=pd.read_csv,\n", | ||
" read_chunks=True,\n", | ||
" names=names,\n", | ||
" usecols=lambda x: x in use_cols,\n", | ||
" # dtype = types, #this mess up with NaN values\n", | ||
" chunksize=5000,\n", | ||
" header=None,\n", | ||
" engine=\"c\",\n", | ||
" sep=\"|\",\n", | ||
" na_values=\"nnnn\",\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"# define the dictionary modifier that will act on the single entries\n", | ||
"def modifier(srcdict):\n", | ||
" srcdict['hpxid_16'] = int(\n", | ||
" ang2pix(2**16, srcdict['ra'], srcdict['dec'], lonlat = True, nest = True))\n", | ||
" #srcdict['_id'] = srcdict.pop('source_id') doesn't work, seems it is not unique\n", | ||
" srcdict[\"hpxid_16\"] = int(\n", | ||
" ang2pix(2**16, srcdict[\"ra\"], srcdict[\"dec\"], lonlat=True, nest=True)\n", | ||
" )\n", | ||
" # srcdict['_id'] = srcdict.pop('source_id') doesn't work, seems it is not unique\n", | ||
" return srcdict\n", | ||
"\n", | ||
"\n", | ||
"wisep.assign_dict_modifier(modifier)\n", | ||
"\n", | ||
"\n", | ||
"# finally push it in the databse\n", | ||
"wisep.push_to_db(\n", | ||
" coll_name = 'srcs', \n", | ||
" index_on = \"hpxid_16\",\n", | ||
" overwrite_coll = True, \n", | ||
" append_to_coll = False)\n", | ||
" coll_name=\"srcs\", index_on=\"hpxid_16\", overwrite_coll=True, append_to_coll=False\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"# if needed print extensive info on database\n", | ||
"#wisep.info()" | ||
"# wisep.info()" | ||
] | ||
}, | ||
{ | ||
|
@@ -210,53 +214,57 @@ | |
} | ||
], | ||
"source": [ | ||
"# now test the database for query performances. We use \n", | ||
"# now test the database for query performances. We use\n", | ||
"# a sample of randomly distributed points on a sphere\n", | ||
"# as targets. \n", | ||
"# as targets.\n", | ||
"\n", | ||
"# define the funtion to test coordinate based queries:\n", | ||
"from healpy import ang2pix, get_all_neighbours\n", | ||
"from healpy import get_all_neighbours\n", | ||
"from astropy.table import Table\n", | ||
"from astropy.coordinates import SkyCoord\n", | ||
"\n", | ||
"return_fields = ['designation', 'ra', 'dec']\n", | ||
"return_fields = [\"designation\", \"ra\", \"dec\"]\n", | ||
"project = {}\n", | ||
"for field in return_fields: project[field] = 1\n", | ||
"print (project)\n", | ||
"for field in return_fields:\n", | ||
" project[field] = 1\n", | ||
"print(project)\n", | ||
"\n", | ||
"\n", | ||
"hp_order, rs_arcsec = 16, 30.0\n", | ||
"\n", | ||
"\n", | ||
"hp_order, rs_arcsec = 16, 30.\n", | ||
"def test_query(ra, dec, coll):\n", | ||
" \"\"\"query collection for points within rs of target ra, dec.\n", | ||
" The results as returned as an astropy Table.\"\"\"\n", | ||
" \n", | ||
" # find the index of the target pixel and its neighbours \n", | ||
" target_pix = int( ang2pix(2**hp_order, ra, dec, nest = True, lonlat = True) )\n", | ||
" neighbs = get_all_neighbours(2**hp_order, ra, dec, nest = True, lonlat = True)\n", | ||
"\n", | ||
" # find the index of the target pixel and its neighbours\n", | ||
" target_pix = int(ang2pix(2**hp_order, ra, dec, nest=True, lonlat=True))\n", | ||
" neighbs = get_all_neighbours(2**hp_order, ra, dec, nest=True, lonlat=True)\n", | ||
"\n", | ||
" # remove non-existing neigbours (in case of E/W/N/S) and add center pixel\n", | ||
" pix_group = [int(pix_id) for pix_id in neighbs if pix_id != -1] + [target_pix]\n", | ||
" \n", | ||
"\n", | ||
" # query the database for sources in these pixels\n", | ||
" qfilter = { 'hpxid_%d'%hp_order: { '$in': pix_group } }\n", | ||
" qfilter = {\"hpxid_%d\" % hp_order: {\"$in\": pix_group}}\n", | ||
" qresults = [o for o in coll.find(qfilter)]\n", | ||
" if len(qresults)==0:\n", | ||
" if len(qresults) == 0:\n", | ||
" return None\n", | ||
" \n", | ||
"\n", | ||
" # then use astropy to find the closest match\n", | ||
" tab = Table(qresults)\n", | ||
" target = SkyCoord(ra, dec, unit = 'deg')\n", | ||
" matches_pos = SkyCoord(tab['ra'], tab['dec'], unit = 'deg')\n", | ||
" target = SkyCoord(ra, dec, unit=\"deg\")\n", | ||
" matches_pos = SkyCoord(tab[\"ra\"], tab[\"dec\"], unit=\"deg\")\n", | ||
" d2t = target.separation(matches_pos).arcsecond\n", | ||
" match_id = np.argmin(d2t)\n", | ||
"\n", | ||
" # if it's too far away don't use it\n", | ||
" if d2t[match_id]>rs_arcsec:\n", | ||
" if d2t[match_id] > rs_arcsec:\n", | ||
" return None\n", | ||
" return tab[match_id]\n", | ||
"\n", | ||
"\n", | ||
"# run the test\n", | ||
"wisep.run_test(test_query, npoints = 10000)\n" | ||
"wisep.run_test(test_query, npoints=10000)" | ||
] | ||
}, | ||
{ | ||
|
@@ -274,7 +282,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -290,13 +298,14 @@ | |
} | ||
], | ||
"source": [ | ||
"mqp.healpix_meta(healpix_id_key = 'hpxid_16', order = 16, is_indexed = True, nest = True)\n", | ||
"mqp.coord_meta(ra = 'ra', dec = 'dec')\n", | ||
"mqp.science_meta(\n", | ||
" contact = 'C. Norris', \n", | ||
" email = '[email protected]', \n", | ||
" description = 'allWISE infrared catalog',\n", | ||
" reference = 'http://wise2.ipac.caltech.edu/docs/release/allwise/')" | ||
"wisep.healpix_meta(healpix_id_key=\"hpxid_16\", order=16, is_indexed=True, nest=True)\n", | ||
"wisep.coord_meta(ra=\"ra\", dec=\"dec\")\n", | ||
"wisep.science_meta(\n", | ||
" contact=\"C. Norris\",\n", | ||
" email=\"[email protected]\",\n", | ||
" description=\"allWISE infrared catalog\",\n", | ||
" reference=\"http://wise2.ipac.caltech.edu/docs/release/allwise/\",\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
|
Oops, something went wrong.