Global only (#15)

add regression tests and support harmonization of global-only trajectories
iiasa · Nov 6, 2019 · 7fcc0f5 · 7fcc0f5
1 parent 073b425
commit 7fcc0f5
Show file tree

Hide file tree

Showing 20 changed files with 70 additions and 58 deletions.
diff --git a/aneris/_io.py b/aneris/_io.py
@@ -15,6 +15,7 @@
     default_luc_method: reduce_ratio_2150_cov
     cov_threshold: 20
     harmonize_year: 2015
+    global_harmonization_only: false
 prefix: CEDS+|9+ Sectors
 suffix: Unharmonized
 add_5regions: true

diff --git a/aneris/harmonize.py b/aneris/harmonize.py
@@ -477,23 +477,32 @@ def harmonize(self, scenario, diagnostic_config=None):
 
         unharmonized = self._model.copy()
 
+        # flag if this run will be with only global trajectories. if so, then
+        # only global totals are harmonized, rest is skipped.
+        global_harmonization_only = self.config['global_harmonization_only']
+
         # global only gases
         self._glb_model, self._glb_meta = _harmonize_global_total(
             self.config, self.prefix, self.suffix,
-            self._hist, self._model.copy(), self._overrides
-        )
-
-        # regional gases
-        self._model, self._meta = _harmonize_regions(
-            self.config, self.prefix, self.suffix, self._regions,
             self._hist, self._model.copy(), self._overrides,
-            self.config['harmonize_year'], self.add_5regions
+            default_global_gases=not global_harmonization_only,
         )
 
-        # combine special case results with harmonized results
-        if self._glb_model is not None:
-            self._model = self._glb_model.combine_first(self._model)
-            self._meta = self._glb_meta.combine_first(self._meta)
+        if global_harmonization_only:
+            self._model = self._glb_model
+            self._meta = self._glb_meta
+        else:
+            # regional gases
+            self._model, self._meta = _harmonize_regions(
+                self.config, self.prefix, self.suffix, self._regions,
+                self._hist, self._model.copy(), self._overrides,
+                self.config['harmonize_year'], self.add_5regions
+            )
+
+            # combine special case results with harmonized results
+            if self._glb_model is not None:
+                self._model = self._glb_model.combine_first(self._model)
+                self._meta = self._glb_meta.combine_first(self._meta)
 
         # perform any automated diagnostics/analysis
         self._diag = diagnostics(
@@ -551,8 +560,10 @@ def _get_global_overrides(overrides, gases, sector):
         return o.set_index(idx)['method']
 
 
-def _harmonize_global_total(config, prefix, suffix, hist, model, overrides):
-    gases = utils.harmonize_total_gases
+def _harmonize_global_total(config, prefix, suffix, hist, model, overrides,
+                            default_global_gases=True):
+    all_gases = list(model.index.get_level_values('gas').unique())
+    gases = utils.harmonize_total_gases if default_global_gases else all_gases
     sector = '|'.join([prefix, suffix])
     idx = (pd.IndexSlice['World', gases, sector],
            pd.IndexSlice[:])

diff --git a/doc/source/data.rst b/doc/source/data.rst
@@ -34,15 +34,15 @@ Unharmonized IAM Data
 Data from IAMs is expected to be in the following format with a sheetname "data".
 
 .. exceltable:: Example Model Input
-   :file: ../../tests/test_data/model.xls
+   :file: ../../tests/test_data/model_regions_sectors.xls
    :header: 1
    :selection: A1:I4
 
 If overrides are provided, they are expected to be in the following formay with
 a sheetname "harmonization".
 
 .. exceltable:: Example Harmonization Overrides
-   :file: ../../tests/test_data/model.xls
+   :file: ../../tests/test_data/model_regions_sectors.xls
    :sheet: 1
 
 Additionally, configuration parameters (described in :ref:`config`) can be set
@@ -55,7 +55,7 @@ Historical Data
 Historical data is expected to be in the following format
 
 .. exceltable:: Example Historical Data
-   :file: ../../tests/test_data/history.xls
+   :file: ../../tests/test_data/history_regions_sectors.xls
    :header: 1
    :selection: A1:I4
 
@@ -66,5 +66,5 @@ Data for regional mappings (countries to IAM regions) is expected to be in the
 following format
 
 .. csv-table:: Example Regional Definitions
-   :file: ../../tests/test_data/regions.csv
+   :file: ../../tests/test_data/regions_regions_sectors.csv
    :header-rows: 1
diff --git a/tests/test_data/aneris_global_only.yaml b/tests/test_data/aneris_global_only.yaml
@@ -0,0 +1,6 @@
+config:
+    harmonize_year: 2005
+    global_harmonization_only: true
+prefix: prefix
+suffix: suffix
+add_5regions: false
diff --git a/tests/test_data/aneris_global_sectors.yaml b/tests/test_data/aneris_global_sectors.yaml
@@ -0,0 +1,6 @@
+config:
+    harmonize_year: 2005
+    global_harmonization_only: false
+prefix: prefix
+suffix: suffix
+add_5regions: false
diff --git a/tests/test_data/aneris.yaml → tests/test_data/aneris_regions_sectors.yaml b/tests/test_data/aneris.yaml → tests/test_data/aneris_regions_sectors.yaml
diff --git a/tests/test_data/history_global_only.xls b/tests/test_data/history_global_only.xls
diff --git a/tests/test_data/history_global_sectors.xls b/tests/test_data/history_global_sectors.xls
diff --git a/tests/test_data/history.xls → tests/test_data/history_regions_sectors.xls b/tests/test_data/history.xls → tests/test_data/history_regions_sectors.xls
diff --git a/tests/test_data/model_global_only.xls b/tests/test_data/model_global_only.xls
diff --git a/tests/test_data/model_global_sectors.xls b/tests/test_data/model_global_sectors.xls
diff --git a/tests/test_data/model.xls → tests/test_data/model_regions_sectors.xls b/tests/test_data/model.xls → tests/test_data/model_regions_sectors.xls
diff --git a/tests/test_data/regions_global_only.csv b/tests/test_data/regions_global_only.csv
@@ -0,0 +1,2 @@
+ISO Code,Country,Native Region Code,5_region
+World,World,World,World
diff --git a/tests/test_data/regions_global_sectors.csv b/tests/test_data/regions_global_sectors.csv
@@ -0,0 +1,2 @@
+ISO Code,Country,Native Region Code,5_region
+World,World,World,World
diff --git a/tests/test_data/regions.csv → tests/test_data/regions_regions_sectors.csv b/tests/test_data/regions.csv → tests/test_data/regions_regions_sectors.csv
diff --git a/tests/test_data/test_global_only.xlsx b/tests/test_data/test_global_only.xlsx
diff --git a/tests/test_data/test_global_sectors.xlsx b/tests/test_data/test_global_sectors.xlsx
diff --git a/tests/test_data/test_basic_run.xlsx → tests/test_data/test_regions_sectors.xlsx b/tests/test_data/test_basic_run.xlsx → tests/test_data/test_regions_sectors.xlsx
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -8,6 +8,7 @@
         'default_luc_method': 'reduce_ratio_2150_cov',
         'cov_threshold': 20,
         'harmonize_year': 2015,
+        'global_harmonization_only': False,
     },
     'prefix': 'CEDS+|9+ Sectors',
     'suffix': 'Unharmonized',

diff --git a/tests/test_regression.py b/tests/test_regression.py
@@ -31,63 +31,60 @@
 
 class TestHarmonizeRegression():
 
-    def _run(self, inf, checkf, hist, reg, rc, outf, prefix):
+    def _run(self, inf, checkf, hist, reg, rc, prefix, name):
         # path setup
         prefix = join(here, prefix)
         hist = join(prefix, hist)
         reg = join(prefix, reg)
         rc = join(prefix, rc)
         inf = join(prefix, inf)
-        outf = join(prefix, outf)
+        outf = join(prefix, '{}_harmonized.xlsx'.format(name))
+        outf_meta = join(prefix, '{}_metadata.xlsx'.format(name))
+        outf_diag = join(prefix, '{}_diagnostics.xlsx'.format(name))
+        clean = [outf, outf_meta, outf_diag]
 
-        if os.path.exists(outf):
-            os.remove(outf)
+        # make sure we're fresh
+        for f in clean:
+            if os.path.exists(f):
+                os.remove(f)
 
         # run
-        print(inf, hist, reg, rc, 'test')
-        cli.harmonize(inf, hist, reg, rc, prefix, 'test')
+        print(inf, hist, reg, rc, name)
+        cli.harmonize(inf, hist, reg, rc, prefix, name)
 
         # test
         xfile = join(prefix, checkf)
         x = pd.read_excel(xfile, sheet_name='data')
         y = pd.read_excel(outf, sheet_name='data')
         assert_frame_equal(x, y)
 
-        clean = [
-            outf,
-            join(prefix, 'test_metadata.xlsx'),
-            join(prefix, 'test_diagnostics.xlsx'),
-        ]
+        # tidy up after
         for f in clean:
             if os.path.exists(f):
                 os.remove(f)
 
-    def test_basic_run(self):
+    @pytest.mark.parametrize("file_suffix", ['global_only', 'global_sectors', 'regions_sectors'])
+    def test_basic_run(self, file_suffix):
         # this is run no matter what
         prefix = 'test_data'
-        checkf = 'test_basic_run.xlsx'
-        hist = 'history.xls'
-        reg = 'regions.csv'
-        rc = 'aneris.yaml'
-        inf = 'model.xls'
-        outf = 'test_harmonized.xlsx'
+        checkf = 'test_{}.xlsx'.format(file_suffix)
+        hist = 'history_{}.xls'.format(file_suffix)
+        reg = 'regions_{}.csv'.format(file_suffix)
+        inf = 'model_{}.xls'.format(file_suffix)
+        rc = 'aneris_{}.yaml'.format(file_suffix)
 
         # get all arguments
-        self._run(inf, checkf, hist, reg, rc, outf, prefix)
+        self._run(inf, checkf, hist, reg, rc, prefix, file_suffix)
 
-    #
-    # the following are run only on CI, this should be parameterized in the
-    # future
-    #
-
-    def _run_ci(self, name):
+    @pytest.mark.skipif(not ON_CI, reason=ON_CI_REASON)
+    @pytest.mark.parametrize("name", ['msg', 'gcam'])
+    def test_regression_ci(self, name):
         prefix = join(ci_path, 'test-{}'.format(name))
         checkf = '{}_harmonized.xlsx'.format(name)
         hist = 'history.csv'
         reg = 'regiondef.xlsx'
         rc = 'rc.yaml'
         inf = 'inputfile.xlsx'
-        outf = 'test_harmonized.xlsx'
 
         # copy needed files
         for fname in [hist, rc, checkf]:
@@ -96,18 +93,4 @@ def _run_ci(self, name):
             shutil.copyfile(src, dst)
 
         # get all arguments
-        self._run(inf, checkf, hist, reg, rc, outf, prefix)
-
-    # only runs if access to regression data is available
-    @pytest.mark.skipif(not ON_CI, reason=ON_CI_REASON)
-    def test_msg(self):
-        # file setup
-        name = 'msg'
-        self._run_ci(name)
-
-    # only runs if access to regression data is available
-    @pytest.mark.skipif(not ON_CI, reason=ON_CI_REASON)
-    def test_gcam(self):
-        # file setup
-        name = 'gcam'
-        self._run_ci(name)
+        self._run(inf, checkf, hist, reg, rc, prefix, name)