Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Defer 'expensive' imports until they are needed. #352

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 262 additions & 0 deletions examples/table-aggregation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3f2d46a1-b5f0-4c75-bc55-4e768b9de112",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"class Timer:\n",
" def __init__(self):\n",
" return\n",
" def __enter__(self):\n",
" self._t0 = time.perf_counter()\n",
" return\n",
" def __exit__(self, type, value, traceback):\n",
" t1 = time.perf_counter()\n",
" print(f\"Elapsed: {t1-self._t0:0.3f} seconds.\")\n",
" return\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fb6c5b9c-314b-47ec-8157-6b75433f58b5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/RAYW/py-envs/explorer/lib/python3.9/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
" warnings.warn(\n"
]
}
],
"source": [
"from fmu.sumo.explorer import Explorer\n",
"exp=Explorer(env=\"preview\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "501ca50e-272a-4231-8edd-a4dfa4905e68",
"metadata": {},
"outputs": [],
"source": [
"def total_blob_size(sc):\n",
" tbs = sc.metrics.sum(\"file.size_bytes\")\n",
" if tbs == 0:\n",
" tbs = sc.metrics.sum(\"_sumo.blob_size\")\n",
" return tbs\n",
"\n",
"def do_aggregate(tagname, rels, columns):\n",
" print(f\"{tagname}: {len(rels)} objects, {len(rels.columns)} columns.\")\n",
" tot_size_bytes = total_blob_size(rels)\n",
" print(f\"Total size of input: {tot_size_bytes / (1024*1024*1024):.3f} GiB\")\n",
" with Timer():\n",
" agg=rels.filter(column=columns).aggregate(columns=columns)\n",
" print(agg.to_pandas().sort_values(by=[\"REAL\", \"DATE\"]))\n",
"\n",
"def run_exp(caseuuid, itername, tagname, columns):\n",
" case = exp.get_case_by_uuid(caseuuid)\n",
" print(f\"{case.asset}: {case.name}: {caseuuid}\")\n",
" rels=case.tables.filter(iteration=itername, realization=True, tagname=tagname, \n",
" complex={\"bool\": {\"must_not\": [{\"term\": {\"_sumo.hidden\": True}}]}})\n",
" do_aggregate(tagname, rels, columns)\n",
" rels=case.tables.filter(iteration=itername, realization=True, tagname=tagname,\n",
" complex={\"term\": {\"_sumo.hidden\": True}})\n",
" do_aggregate(tagname, rels, columns)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5f3d4e12-2b23-4585-a935-eb0e48951dd6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Troll: 24.0.0-20240828_ix_network_test5: 359e7c72-a4ca-43ee-9203-f09cd0f149a9\n",
"summary: 27 objects, 64996 columns.\n",
"Total size of input: 1.248 GiB\n",
"Elapsed: 15.166 seconds.\n",
" DATE FOPT REAL\n",
"137 2024-07-02 282442208.0 6\n",
"138 2024-07-03 282451072.0 6\n",
"139 2024-08-01 282677120.0 6\n",
"140 2024-09-01 282889760.0 6\n",
"141 2024-10-01 283077440.0 6\n",
".. ... ... ...\n",
"47 2025-02-15 286229120.0 249\n",
"48 2025-04-01 286425696.0 249\n",
"49 2025-09-01 287060416.0 249\n",
"50 2025-10-01 287176832.0 249\n",
"51 2026-01-01 287523552.0 249\n",
"\n",
"[265 rows x 3 columns]\n",
"summary: 3537 objects, 64996 columns.\n",
"Total size of input: 1.087 GiB\n",
"Elapsed: 1.692 seconds.\n",
" DATE FOPT REAL\n",
"52 2024-07-02 282442208.0 6\n",
"53 2024-07-03 282451072.0 6\n",
"54 2024-08-01 282677120.0 6\n",
"55 2024-09-01 282889760.0 6\n",
"56 2024-10-01 283077440.0 6\n",
".. ... ... ...\n",
"173 2025-02-15 286229120.0 249\n",
"174 2025-04-01 286425696.0 249\n",
"175 2025-09-01 287060416.0 249\n",
"176 2025-10-01 287176832.0 249\n",
"177 2026-01-01 287523552.0 249\n",
"\n",
"[265 rows x 3 columns]\n"
]
}
],
"source": [
"run_exp(\"359e7c72-a4ca-43ee-9203-f09cd0f149a9\", \"pred-0\", \"summary\", [\"FOPT\"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4ba6a7a8-4c32-4015-8767-41b2f7c777e0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Troll: 24.0.0-20240820: fc6cc7d3-6162-46a3-9d69-48ad1eaecdfb\n",
"summary: 196 objects, 24568 columns.\n",
"Total size of input: 30.013 GiB\n",
"Elapsed: 32.124 seconds.\n",
" DATE FOPT REAL\n",
"708796 1990-02-01 0.000000e+00 1\n",
"708797 1990-03-01 1.445590e+05 1\n",
"708798 1990-04-01 2.741935e+05 1\n",
"708799 1990-05-01 4.145006e+05 1\n",
"708800 1990-06-01 5.512956e+05 1\n",
"... ... ... ...\n",
"841571 2024-06-27 2.980280e+08 249\n",
"841572 2024-06-28 2.980311e+08 249\n",
"841573 2024-06-29 2.980342e+08 249\n",
"841574 2024-06-30 2.980384e+08 249\n",
"841575 2024-07-01 2.980405e+08 249\n",
"\n",
"[952560 rows x 3 columns]\n",
"summary: 9800 objects, 24568 columns.\n",
"Total size of input: 29.907 GiB\n",
"Elapsed: 4.722 seconds.\n",
" DATE FOPT REAL\n",
"34020 1990-02-01 0.000000e+00 1\n",
"34021 1990-03-01 1.445590e+05 1\n",
"34022 1990-04-01 2.741935e+05 1\n",
"34023 1990-05-01 4.145006e+05 1\n",
"34024 1990-06-01 5.512956e+05 1\n",
"... ... ... ...\n",
"316447 2024-06-27 2.980280e+08 249\n",
"316448 2024-06-28 2.980311e+08 249\n",
"316449 2024-06-29 2.980342e+08 249\n",
"316450 2024-06-30 2.980384e+08 249\n",
"316451 2024-07-01 2.980405e+08 249\n",
"\n",
"[952560 rows x 3 columns]\n"
]
}
],
"source": [
"run_exp(\"fc6cc7d3-6162-46a3-9d69-48ad1eaecdfb\", \"iter-0\", \"summary\", [\"FOPT\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d07581ef-8fdb-4621-b81c-8aaf20b0c204",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Drogon: ruaj_testcase: 5b558daf-61c5-400a-9aa2-c602bb471a16\n",
"summary: 160 objects, 974 columns.\n",
"Total size of input: 0.175 GiB\n",
"Elapsed: 2.485 seconds.\n",
" DATE FOPT REAL\n",
"4910 2018-01-01 0.000000e+00 0\n",
"4911 2018-01-02 0.000000e+00 0\n",
"4912 2018-01-05 0.000000e+00 0\n",
"4913 2018-01-06 3.991868e+03 0\n",
"4914 2018-01-09 1.596676e+04 0\n",
"... ... ... ...\n",
"36831 2020-06-14 7.278816e+06 159\n",
"36832 2020-06-27 7.349246e+06 159\n",
"36833 2020-06-28 7.354664e+06 159\n",
"36834 2020-06-30 7.365482e+06 159\n",
"36835 2020-07-01 7.370888e+06 159\n",
"\n",
"[39280 rows x 3 columns]\n",
"summary: 320 objects, 974 columns.\n",
"Total size of input: 0.163 GiB\n",
"Elapsed: 2.528 seconds.\n",
" DATE FOPT REAL\n",
"19394 2018-01-01 0.000000e+00 0\n",
"19395 2018-01-02 0.000000e+00 0\n",
"19396 2018-01-05 0.000000e+00 0\n",
"19397 2018-01-06 3.991868e+03 0\n",
"19398 2018-01-09 1.596676e+04 0\n",
"... ... ... ...\n",
"10795 2020-06-14 7.278816e+06 159\n",
"10796 2020-06-27 7.349246e+06 159\n",
"10797 2020-06-28 7.354664e+06 159\n",
"10798 2020-06-30 7.365482e+06 159\n",
"10799 2020-07-01 7.370888e+06 159\n",
"\n",
"[39280 rows x 3 columns]\n"
]
}
],
"source": [
"run_exp(\"5b558daf-61c5-400a-9aa2-c602bb471a16\", \"iter-0\", \"summary\", [\"FOPT\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "429d688e-34d1-4e19-b433-348d965dd436",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
8 changes: 4 additions & 4 deletions src/fmu/sumo/explorer/objects/polygons.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
"""Module containing class for polygons object"""

from typing import Dict
import pandas as pd
from sumo.wrapper import SumoClient
from fmu.sumo.explorer.objects._child import Child
from warnings import warn


class Polygons(Child):
Expand All @@ -18,25 +16,27 @@ def __init__(self, sumo: SumoClient, metadata: Dict, blob=None) -> None:
"""
super().__init__(sumo, metadata, blob)

def to_pandas(self) -> pd.DataFrame:
def to_pandas(self):
"""Get polygons object as a DataFrame

Returns:
DataFrame: A DataFrame object
"""

import pandas as pd
try:
return pd.read_csv(self.blob)
except TypeError as type_err:
raise TypeError(f"Unknown format: {self.format}") from type_err

async def to_pandas_async(self) -> pd.DataFrame:
async def to_pandas_async(self):
"""Get polygons object as a DataFrame

Returns:
DataFrame: A DataFrame object
"""

import pandas as pd
try:
return pd.read_csv(await self.blob_async)
except TypeError as type_err:
Expand Down
23 changes: 13 additions & 10 deletions src/fmu/sumo/explorer/objects/table.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
"""module containing class for table"""

import logging
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.feather as pf
from sumo.wrapper import SumoClient
from fmu.sumo.explorer.objects._child import Child
from warnings import warn
from typing import Dict


Expand Down Expand Up @@ -42,6 +37,9 @@ async def _read_table_async(self):
return self._construct_table_from_blob(await self._get_blob_async())

def _construct_table_from_blob(self, blob):
import pandas as pd
import pyarrow.feather as pf

try:
if self.dataformat == "csv":
dataframe = pd.read_csv(blob)
Expand Down Expand Up @@ -71,7 +69,7 @@ def _construct_table_from_blob(self, blob):
pass
return dataframe

def to_pandas(self) -> pd.DataFrame:
def to_pandas(self):
"""Return object as a pandas DataFrame

Returns:
Expand All @@ -81,7 +79,7 @@ def to_pandas(self) -> pd.DataFrame:
self._dataframe = self._read_table()
return self._dataframe

async def to_pandas_async(self) -> pd.DataFrame:
async def to_pandas_async(self):
"""Return object as a pandas DataFrame

Returns:
Expand All @@ -98,6 +96,11 @@ async def _read_arrow_async(self):
return self._construct_arrow_from_blob(await self._get_blob_async())

def _construct_arrow_from_blob(self, blob):
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.feather as pf

try:
if self.dataformat == "csv":
arrowtable = pa.Table.from_pandas(pd.read_csv(blob))
Expand All @@ -117,7 +120,7 @@ def _construct_arrow_from_blob(self, blob):
arrowtable = pq.read_table(blob)
except Exception as ex:
try:
arrowtable = pf.read_table(selfblob)
arrowtable = pf.read_table(blob)
except Exception as ex:
raise TypeError(
f"Unable to convert a blob of format {self.dataformat} to arrow; tried csv, parquet and feather."
Expand All @@ -127,7 +130,7 @@ def _construct_arrow_from_blob(self, blob):
pass
return arrowtable

def to_arrow(self) -> pa.Table:
def to_arrow(self):
"""Return object as an arrow Table

Returns:
Expand All @@ -137,7 +140,7 @@ def to_arrow(self) -> pa.Table:
self._arrowtable = self._read_arrow()
return self._arrowtable

async def to_arrow_async(self) -> pa.Table:
async def to_arrow_async(self):
"""Return object as an arrow Table

Returns:
Expand Down
Loading