corrected massive index bug

LSSTDESC · Nov 15, 2024 · 87f751c · 87f751c
1 parent 5275786
commit 87f751c
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 15 deletions.
diff --git a/txpipe/extensions/cluster_counts/make_ensemble_profile.py b/txpipe/extensions/cluster_counts/make_ensemble_profile.py
@@ -2,7 +2,7 @@
 import gc
 import numpy as np
 from ...base_stage import PipelineStage
-from .sources_select_compute import CLClusterShearCatalogs
+from .sources_select_compute import CLClusterShearCatalogs, CombinedClusterCatalog
 from ...data_types import ShearCatalog, HDFFile, PhotozPDFFile, FiducialCosmology, TomographyCatalog, PickleFile
 from ...utils.calibrators import Calibrator
 from collections import defaultdict
@@ -70,9 +70,6 @@ def run(self):
         #clusters = self.load_cluster_list(group=bins[0])
 
         #print(bins[0].keys())
-
-
-
 
             pickle.dump(cluster_stack_dict, open(self.get_output("cluster_profiles"), 'wb'))
 
@@ -88,17 +85,22 @@ def load_cluster_shear_catalog(self) :
         with self.open_input("cluster_shear_catalogs") as f:
             g = f["index/"]
             cluster_index = g['cluster_index'][:]
+            cluster_id = g['cluster_id'][:]
             tangential_comp = g['tangential_comp'][:]
             cross_comp = g['cross_comp'][:]
             source_index = g['source_index'][:]
             weight = g['weight'][:]
             distance_arcmin = g['distance_arcmin'][:]
 
         print(len(cluster_index), len(tangential_comp), len(source_index))
-
-        return Table({"cluster_index": cluster_index, "tangential_comp_clmm": tangential_comp,
+        
+        tab = Table({"cluster_index": cluster_index, "cluster_id" : cluster_id, "tangential_comp_clmm": tangential_comp,
                       "cross_comp_clmm": cross_comp, "source_index": source_index,
                       "weight_clmm": weight, "distance_arcmin": distance_arcmin})
+
+        print(tab[0:4])
+
+        return tab 
 
 
 
@@ -110,21 +112,29 @@ def create_cluster_ensemble(self, radial_bins, clmm_cosmo, cluster_list, cluster
 
         # Loop through clusters and calculate the profiles
         ncluster = len(cluster_list)
+        print('Ncluster', ncluster)
 
         for cluster_index in range(ncluster) :
 
             # Select subset of background shear information for this particular cluster
-
-            mask = (cluster_shears_cat["cluster_index"] == cluster_index)
-            bg_cat = cluster_shears_cat[mask]
 
+            print('cluster_index', cluster_index)
+
+            #mask = (cluster_shears_cat["cluster_id"] == id_cl) #THERE IS A PROBLEM HERE !!!
+
             z_cl = cluster_list[cluster_index]["redshift"]
             rich_cl = cluster_list[cluster_index]["richness"]
             ra_cl = cluster_list[cluster_index]["ra"]
             dec_cl = cluster_list[cluster_index]["dec"]
             id_cl = cluster_list[cluster_index]["id"]
 
-            print('theta_max', np.max(bg_cat["distance_arcmin"]), '=', clmm.utils.convert_units(np.max(bg_cat["distance_arcmin"]), 'arcmin', 'Mpc', z_cl, clmm_cosmo), 'Mpc')
+
+            mask = (cluster_shears_cat['cluster_id'] == id_cl)
+            print(mask)
+            bg_cat = cluster_shears_cat[mask]
+
+            print('For cluster', id_cl, 'at z=',z_cl, 'theta_max is', np.max(bg_cat["distance_arcmin"]), ' arcmin =', clmm.utils.convert_units(np.max(bg_cat["distance_arcmin"]), 'arcmin', 'Mpc', z_cl, clmm_cosmo), 'Mpc')
+            print(len(bg_cat), bg_cat[0:3])
 
             # To use CLMM, need to have galaxy table in clmm.GCData type
             galcat = clmm.GCData(bg_cat)
@@ -190,8 +200,8 @@ def load_cluster_catalog_tomography_group(self, radial_bins, clmm_cosmo, cluster
 
             for key in k :
                 group = f["cluster_bin"][key]
-                clusters = self.load_cluster_list(group=group)
-                print(key, group, dict(group.attrs), len(clusters))
+                clusters = self.load_cluster_list(group=group) #elf.get_cluster_indice( DOES THIS FUNCTION COMES FROM ?
+                print(key, group, dict(group.attrs), len(clusters), clusters)
 
                 if  len(clusters)>1:
                     cluster_stack = self.create_cluster_ensemble(radial_bins, clmm_cosmo, clusters, cluster_shears_cat, cluster_ensemble_id=key)

diff --git a/txpipe/extensions/cluster_counts/sources_select_compute.py b/txpipe/extensions/cluster_counts/sources_select_compute.py
@@ -175,6 +175,7 @@ def run(self):
             index_group = outfile.create_group("index")
             index_group.create_dataset("cluster_index", shape=(total_count,), dtype=np.int64)
             index_group.create_dataset("source_index", shape=(total_count,), dtype=np.int64)
+            index_group.create_dataset("cluster_id", shape=(total_count,), dtype=np.int64)
             index_group.create_dataset("weight", shape=(total_count,), dtype=np.float64)
             index_group.create_dataset("tangential_comp", shape=(total_count,), dtype=np.float64)
             index_group.create_dataset("cross_comp", shape=(total_count,), dtype=np.float64)
@@ -250,12 +251,14 @@ def run(self):
             # Now we write out the per-cluster shear catalog information
             index_group["cluster_index"][start:start + n] = i
             index_group["source_index"][start:start + n] = indices
+            index_group["cluster_id"][start:start + n] = catalog_group["cluster_id"][i]
             index_group["weight"][start:start + n] = weights
             index_group["tangential_comp"][start:start + n] = tangential_comps
             index_group["cross_comp"][start:start + n] = cross_comps
             index_group["g1"][start:start + n] = g1
             index_group["g2"][start:start + n] = g2
             index_group["distance_arcmin"][start:start + n] = np.degrees(distances) * 60
+
 
             start += n
 
@@ -572,7 +575,12 @@ def iterate_source_catalog(self):
             # Give this chunk of data to the main run function
             yield s, e, data
 
-
+#def get_cluster_shear_catalogs_index_from_cluster_id(cluster_file, cluster_id):
+#    outfile = HDFFile(cluster_file, "r").file
+#    cond = outfile['catalog/cluster_id'][:] == cluster_id
+#    return np.where(cond)[0][0]
+
+
 class CombinedClusterCatalog:
     """
     The CCC class collects together:
@@ -660,7 +668,6 @@ def get_cluster_info(self, cluster_index):
         return {k: self.cluster_catalog[f'clusters/{k}'][cluster_index] for k in self.cluster_cat_cols}
 
 
-
     def get_background_shear_catalog(self, cluster_index):
         import clmm
 
@@ -675,9 +682,12 @@ def get_background_shear_catalog(self, cluster_index):
 
         # Load all the information that is in the per-cluster shear catalog
         cat = {}
-        for col in ["source_index", "weight", "tangential_comp", "cross_comp", "g1", "g2", "distance_arcmin"]:
+
+        #for col in ["cluster_index", "source_index", "cluster_id", "weight", "tangential_comp", "cross_comp", "g1", "g2", "distance_arcmin"]:
+        for col in index_group.keys():
             cat[col] = index_group[col][start:end]
 
+
         # This is the index into the shear catalog that tells
         # us where the galaxies behind this cluster are in that
         # catalog. It is also the index to the shear photo-z PDF