Skip to content

Commit

Permalink
Defer 'expensive' imports until they are needed. Also, do a little cl…
Browse files Browse the repository at this point in the history
…eanup based on flake8 findings.
  • Loading branch information
Raymond Wiker authored and rwiker committed Nov 1, 2024
1 parent 710b055 commit 9b95331
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 14 deletions.
262 changes: 262 additions & 0 deletions examples/table-aggregation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3f2d46a1-b5f0-4c75-bc55-4e768b9de112",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"class Timer:\n",
" def __init__(self):\n",
" return\n",
" def __enter__(self):\n",
" self._t0 = time.perf_counter()\n",
" return\n",
" def __exit__(self, type, value, traceback):\n",
" t1 = time.perf_counter()\n",
" print(f\"Elapsed: {t1-self._t0:0.3f} seconds.\")\n",
" return\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fb6c5b9c-314b-47ec-8157-6b75433f58b5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/RAYW/py-envs/explorer/lib/python3.9/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
" warnings.warn(\n"
]
}
],
"source": [
"from fmu.sumo.explorer import Explorer\n",
"exp=Explorer(env=\"preview\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "501ca50e-272a-4231-8edd-a4dfa4905e68",
"metadata": {},
"outputs": [],
"source": [
"def total_blob_size(sc):\n",
" tbs = sc.metrics.sum(\"file.size_bytes\")\n",
" if tbs == 0:\n",
" tbs = sc.metrics.sum(\"_sumo.blob_size\")\n",
" return tbs\n",
"\n",
"def do_aggregate(tagname, rels, columns):\n",
" print(f\"{tagname}: {len(rels)} objects, {len(rels.columns)} columns.\")\n",
" tot_size_bytes = total_blob_size(rels)\n",
" print(f\"Total size of input: {tot_size_bytes / (1024*1024*1024):.3f} GiB\")\n",
" with Timer():\n",
" agg=rels.filter(column=columns).aggregate(columns=columns)\n",
" print(agg.to_pandas().sort_values(by=[\"REAL\", \"DATE\"]))\n",
"\n",
"def run_exp(caseuuid, itername, tagname, columns):\n",
" case = exp.get_case_by_uuid(caseuuid)\n",
" print(f\"{case.asset}: {case.name}: {caseuuid}\")\n",
" rels=case.tables.filter(iteration=itername, realization=True, tagname=tagname, \n",
" complex={\"bool\": {\"must_not\": [{\"term\": {\"_sumo.hidden\": True}}]}})\n",
" do_aggregate(tagname, rels, columns)\n",
" rels=case.tables.filter(iteration=itername, realization=True, tagname=tagname,\n",
" complex={\"term\": {\"_sumo.hidden\": True}})\n",
" do_aggregate(tagname, rels, columns)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5f3d4e12-2b23-4585-a935-eb0e48951dd6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Troll: 24.0.0-20240828_ix_network_test5: 359e7c72-a4ca-43ee-9203-f09cd0f149a9\n",
"summary: 27 objects, 64996 columns.\n",
"Total size of input: 1.248 GiB\n",
"Elapsed: 15.166 seconds.\n",
" DATE FOPT REAL\n",
"137 2024-07-02 282442208.0 6\n",
"138 2024-07-03 282451072.0 6\n",
"139 2024-08-01 282677120.0 6\n",
"140 2024-09-01 282889760.0 6\n",
"141 2024-10-01 283077440.0 6\n",
".. ... ... ...\n",
"47 2025-02-15 286229120.0 249\n",
"48 2025-04-01 286425696.0 249\n",
"49 2025-09-01 287060416.0 249\n",
"50 2025-10-01 287176832.0 249\n",
"51 2026-01-01 287523552.0 249\n",
"\n",
"[265 rows x 3 columns]\n",
"summary: 3537 objects, 64996 columns.\n",
"Total size of input: 1.087 GiB\n",
"Elapsed: 1.692 seconds.\n",
" DATE FOPT REAL\n",
"52 2024-07-02 282442208.0 6\n",
"53 2024-07-03 282451072.0 6\n",
"54 2024-08-01 282677120.0 6\n",
"55 2024-09-01 282889760.0 6\n",
"56 2024-10-01 283077440.0 6\n",
".. ... ... ...\n",
"173 2025-02-15 286229120.0 249\n",
"174 2025-04-01 286425696.0 249\n",
"175 2025-09-01 287060416.0 249\n",
"176 2025-10-01 287176832.0 249\n",
"177 2026-01-01 287523552.0 249\n",
"\n",
"[265 rows x 3 columns]\n"
]
}
],
"source": [
"run_exp(\"359e7c72-a4ca-43ee-9203-f09cd0f149a9\", \"pred-0\", \"summary\", [\"FOPT\"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4ba6a7a8-4c32-4015-8767-41b2f7c777e0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Troll: 24.0.0-20240820: fc6cc7d3-6162-46a3-9d69-48ad1eaecdfb\n",
"summary: 196 objects, 24568 columns.\n",
"Total size of input: 30.013 GiB\n",
"Elapsed: 32.124 seconds.\n",
" DATE FOPT REAL\n",
"708796 1990-02-01 0.000000e+00 1\n",
"708797 1990-03-01 1.445590e+05 1\n",
"708798 1990-04-01 2.741935e+05 1\n",
"708799 1990-05-01 4.145006e+05 1\n",
"708800 1990-06-01 5.512956e+05 1\n",
"... ... ... ...\n",
"841571 2024-06-27 2.980280e+08 249\n",
"841572 2024-06-28 2.980311e+08 249\n",
"841573 2024-06-29 2.980342e+08 249\n",
"841574 2024-06-30 2.980384e+08 249\n",
"841575 2024-07-01 2.980405e+08 249\n",
"\n",
"[952560 rows x 3 columns]\n",
"summary: 9800 objects, 24568 columns.\n",
"Total size of input: 29.907 GiB\n",
"Elapsed: 4.722 seconds.\n",
" DATE FOPT REAL\n",
"34020 1990-02-01 0.000000e+00 1\n",
"34021 1990-03-01 1.445590e+05 1\n",
"34022 1990-04-01 2.741935e+05 1\n",
"34023 1990-05-01 4.145006e+05 1\n",
"34024 1990-06-01 5.512956e+05 1\n",
"... ... ... ...\n",
"316447 2024-06-27 2.980280e+08 249\n",
"316448 2024-06-28 2.980311e+08 249\n",
"316449 2024-06-29 2.980342e+08 249\n",
"316450 2024-06-30 2.980384e+08 249\n",
"316451 2024-07-01 2.980405e+08 249\n",
"\n",
"[952560 rows x 3 columns]\n"
]
}
],
"source": [
"run_exp(\"fc6cc7d3-6162-46a3-9d69-48ad1eaecdfb\", \"iter-0\", \"summary\", [\"FOPT\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d07581ef-8fdb-4621-b81c-8aaf20b0c204",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Drogon: ruaj_testcase: 5b558daf-61c5-400a-9aa2-c602bb471a16\n",
"summary: 160 objects, 974 columns.\n",
"Total size of input: 0.175 GiB\n",
"Elapsed: 2.485 seconds.\n",
" DATE FOPT REAL\n",
"4910 2018-01-01 0.000000e+00 0\n",
"4911 2018-01-02 0.000000e+00 0\n",
"4912 2018-01-05 0.000000e+00 0\n",
"4913 2018-01-06 3.991868e+03 0\n",
"4914 2018-01-09 1.596676e+04 0\n",
"... ... ... ...\n",
"36831 2020-06-14 7.278816e+06 159\n",
"36832 2020-06-27 7.349246e+06 159\n",
"36833 2020-06-28 7.354664e+06 159\n",
"36834 2020-06-30 7.365482e+06 159\n",
"36835 2020-07-01 7.370888e+06 159\n",
"\n",
"[39280 rows x 3 columns]\n",
"summary: 320 objects, 974 columns.\n",
"Total size of input: 0.163 GiB\n",
"Elapsed: 2.528 seconds.\n",
" DATE FOPT REAL\n",
"19394 2018-01-01 0.000000e+00 0\n",
"19395 2018-01-02 0.000000e+00 0\n",
"19396 2018-01-05 0.000000e+00 0\n",
"19397 2018-01-06 3.991868e+03 0\n",
"19398 2018-01-09 1.596676e+04 0\n",
"... ... ... ...\n",
"10795 2020-06-14 7.278816e+06 159\n",
"10796 2020-06-27 7.349246e+06 159\n",
"10797 2020-06-28 7.354664e+06 159\n",
"10798 2020-06-30 7.365482e+06 159\n",
"10799 2020-07-01 7.370888e+06 159\n",
"\n",
"[39280 rows x 3 columns]\n"
]
}
],
"source": [
"run_exp(\"5b558daf-61c5-400a-9aa2-c602bb471a16\", \"iter-0\", \"summary\", [\"FOPT\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "429d688e-34d1-4e19-b433-348d965dd436",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
8 changes: 4 additions & 4 deletions src/fmu/sumo/explorer/objects/polygons.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
"""Module containing class for polygons object"""

from typing import Dict
import pandas as pd
from sumo.wrapper import SumoClient
from fmu.sumo.explorer.objects._child import Child
from warnings import warn


class Polygons(Child):
Expand All @@ -18,25 +16,27 @@ def __init__(self, sumo: SumoClient, metadata: Dict, blob=None) -> None:
"""
super().__init__(sumo, metadata, blob)

def to_pandas(self) -> pd.DataFrame:
def to_pandas(self):
"""Get polygons object as a DataFrame
Returns:
DataFrame: A DataFrame object
"""

import pandas as pd
try:
return pd.read_csv(self.blob)
except TypeError as type_err:
raise TypeError(f"Unknown format: {self.format}") from type_err

async def to_pandas_async(self) -> pd.DataFrame:
async def to_pandas_async(self):
"""Get polygons object as a DataFrame
Returns:
DataFrame: A DataFrame object
"""

import pandas as pd
try:
return pd.read_csv(await self.blob_async)
except TypeError as type_err:
Expand Down
23 changes: 13 additions & 10 deletions src/fmu/sumo/explorer/objects/table.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
"""module containing class for table"""

import logging
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.feather as pf
from sumo.wrapper import SumoClient
from fmu.sumo.explorer.objects._child import Child
from warnings import warn
from typing import Dict


Expand Down Expand Up @@ -42,6 +37,9 @@ async def _read_table_async(self):
return self._construct_table_from_blob(await self._get_blob_async())

def _construct_table_from_blob(self, blob):
import pandas as pd
import pyarrow.feather as pf

try:
if self.dataformat == "csv":
dataframe = pd.read_csv(blob)
Expand Down Expand Up @@ -71,7 +69,7 @@ def _construct_table_from_blob(self, blob):
pass
return dataframe

def to_pandas(self) -> pd.DataFrame:
def to_pandas(self):
"""Return object as a pandas DataFrame
Returns:
Expand All @@ -81,7 +79,7 @@ def to_pandas(self) -> pd.DataFrame:
self._dataframe = self._read_table()
return self._dataframe

async def to_pandas_async(self) -> pd.DataFrame:
async def to_pandas_async(self):
"""Return object as a pandas DataFrame
Returns:
Expand All @@ -98,6 +96,11 @@ async def _read_arrow_async(self):
return self._construct_arrow_from_blob(await self._get_blob_async())

def _construct_arrow_from_blob(self, blob):
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.feather as pf

try:
if self.dataformat == "csv":
arrowtable = pa.Table.from_pandas(pd.read_csv(blob))
Expand All @@ -117,7 +120,7 @@ def _construct_arrow_from_blob(self, blob):
arrowtable = pq.read_table(blob)
except Exception as ex:
try:
arrowtable = pf.read_table(selfblob)
arrowtable = pf.read_table(blob)
except Exception as ex:
raise TypeError(
f"Unable to convert a blob of format {self.dataformat} to arrow; tried csv, parquet and feather."
Expand All @@ -127,7 +130,7 @@ def _construct_arrow_from_blob(self, blob):
pass
return arrowtable

def to_arrow(self) -> pa.Table:
def to_arrow(self):
"""Return object as an arrow Table
Returns:
Expand All @@ -137,7 +140,7 @@ def to_arrow(self) -> pa.Table:
self._arrowtable = self._read_arrow()
return self._arrowtable

async def to_arrow_async(self) -> pa.Table:
async def to_arrow_async(self):
"""Return object as an arrow Table
Returns:
Expand Down

0 comments on commit 9b95331

Please sign in to comment.