Skip to content

Commit

Permalink
add 5 and 30 min period
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Jun 4, 2024
1 parent e2d860f commit a23de3f
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 23 deletions.
12 changes: 4 additions & 8 deletions cloud_archives/pv/passiv/filenames.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from datetime import datetime


def get_daily_hf_file_name(date: datetime):
return f"data/{date.strftime('%Y/%m/%d')}/5min.parquet"
def get_monthly_hf_file_name(date: datetime, period: int = 5):
return f"data/{date.strftime('%Y/%m')}/{period}min.parquet"


def get_monthly_hf_file_name(date: datetime):
return f"data/{date.strftime('%Y/%m')}/5min.parquet"


def get_yearly_hf_file_name(date: datetime):
return f"data/{date.strftime('%Y')}/5min.parquet"
def get_yearly_hf_file_name(date: datetime, period: int = 5):
return f"data/{date.strftime('%Y')}/{period}min.parquet"
32 changes: 26 additions & 6 deletions cloud_archives/pv/passiv/passiv_monthly.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
logger = logging.getLogger(__name__)


def get_monthly_passiv_data(start_date: datetime, upload_to_hf: bool = True, overwrite: bool = False):
def get_monthly_passiv_data(start_date: datetime, upload_to_hf: bool = True, overwrite: bool = False, period:int=5):
""" Get dail passiv data and save to Hugging Face"""

logger.info(f"Getting data for {start_date}")

# check if we have data for that day already
huggingface_file = get_monthly_hf_file_name(date=start_date)
huggingface_file = get_monthly_hf_file_name(date=start_date, period=period)
if not overwrite:
fs = HfFileSystem()
if fs.exists(f'datasets/openclimatefix/uk_pv/{huggingface_file}'):
Expand All @@ -45,7 +45,7 @@ def get_monthly_passiv_data(start_date: datetime, upload_to_hf: bool = True, ove
generation_data = ss_rawdata_api.download(
start=start_date,
end=end_date,
period=5,
period=period,
)

# filter out only passiv systems
Expand Down Expand Up @@ -76,21 +76,41 @@ def get_monthly_passiv_data(start_date: datetime, upload_to_hf: bool = True, ove


@dg.asset(
key=["pv", "passiv", "monthly"],
key=["pv", "passiv", "monthly_30min"],
partitions_def=dg.TimeWindowPartitionsDefinition(
fmt="%Y-%m",
start="2023-01",
cron_schedule="0 12 1 * *", # 1st day of the month, at 12 oclock
),
)
def pv_passiv_monthly(context: dg.AssetExecutionContext):
def pv_passiv_monthly_30min(context: dg.AssetExecutionContext):
"""PV Passiv archive monthlyasset."""

partition_date_str = context.partition_key
start_date = datetime.datetime.strptime(partition_date_str, "%Y-%m")
start_date = pytz.utc.localize(start_date)

get_monthly_passiv_data(start_date)
get_monthly_passiv_data(start_date, period=30)




@dg.asset(
key=["pv", "passiv", "monthly_5min"],
partitions_def=dg.TimeWindowPartitionsDefinition(
fmt="%Y-%m",
start="2023-01",
cron_schedule="0 12 1 * *", # 1st day of the month, at 12 oclock
),
)
def pv_passiv_monthly_5min(context: dg.AssetExecutionContext):
"""PV Passiv archive monthlyasset."""

partition_date_str = context.partition_key
start_date = datetime.datetime.strptime(partition_date_str, "%Y-%m")
start_date = pytz.utc.localize(start_date)

get_monthly_passiv_data(start_date, period=5)



Expand Down
32 changes: 25 additions & 7 deletions cloud_archives/pv/passiv/passiv_year.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from .filenames import get_monthly_hf_file_name, get_yearly_hf_file_name


def get_yearly_passiv_data(start_date: datetime, upload_to_hf: bool = True, overwrite: bool = False):
def get_yearly_passiv_data(start_date: datetime, upload_to_hf: bool = True, overwrite: bool = False, period:int=5):
""" Get yearly passiv data and save to Hugging Face"""

# set up HF and check if we have data for that day already
huggingface_file = get_yearly_hf_file_name(date=start_date)
huggingface_file = get_yearly_hf_file_name(date=start_date, period=period)
fs = HfFileSystem()
if not overwrite:
if fs.exists(f'datasets/openclimatefix/uk_pv/{huggingface_file}'):
Expand All @@ -30,7 +30,7 @@ def get_yearly_passiv_data(start_date: datetime, upload_to_hf: bool = True, over
while date < end_date:

# load file from hugging face
huggingface_load_file = get_monthly_hf_file_name(date=date)
huggingface_load_file = get_monthly_hf_file_name(date=date, period=period)

# load data
print(f"Loading data from {huggingface_load_file}")
Expand All @@ -48,7 +48,7 @@ def get_yearly_passiv_data(start_date: datetime, upload_to_hf: bool = True, over
generation_data = pd.concat(data_df)

# save to parquet file
local_file = f"passiv_5min_yearly_{start_date.date()}.parquet"
local_file = f"passiv_{period}min_yearly_{start_date.date()}.parquet"
generation_data.to_parquet(local_file)

# upload to hugging face
Expand All @@ -65,21 +65,39 @@ def get_yearly_passiv_data(start_date: datetime, upload_to_hf: bool = True, over


@dg.asset(
key=["pv", "passiv", "yearly"],
key=["pv", "passiv", "yearly_5min"],
partitions_def=dg.TimeWindowPartitionsDefinition(
fmt="%Y",
start="2023",
cron_schedule="0 12 2 1 *", # 2nd day of January, at 12 oclock,
),
)
def pv_passiv_yearly(context: dg.AssetExecutionContext):
def pv_passiv_yearly_5min(context: dg.AssetExecutionContext):
"""PV Passiv archive yearly data."""

partition_date_str = context.partition_key
start_date = datetime.datetime.strptime(partition_date_str, "%Y")
start_date = pytz.utc.localize(start_date)

get_yearly_passiv_data(start_date)
get_yearly_passiv_data(start_date, period=5)


@dg.asset(
key=["pv", "passiv", "yearly_30min"],
partitions_def=dg.TimeWindowPartitionsDefinition(
fmt="%Y",
start="2023",
cron_schedule="0 12 2 1 *", # 2nd day of January, at 12 oclock,
),
)
def pv_passiv_yearly_30min(context: dg.AssetExecutionContext):
"""PV Passiv archive yearly data."""

partition_date_str = context.partition_key
start_date = datetime.datetime.strptime(partition_date_str, "%Y")
start_date = pytz.utc.localize(start_date)

get_yearly_passiv_data(start_date, period=30)



Expand Down
4 changes: 2 additions & 2 deletions tests/cloud_archives/pv/test_passiv.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from cloud_archives.pv.passiv.passiv_daily import get_daily_passiv_data
from cloud_archives.pv.passiv.passiv_monthly import get_monthly_passiv_data

from datetime import datetime, timezone


def test_get_daily_passiv_data():
start_date = datetime(2022, 1, 1, tzinfo=timezone.utc)
get_daily_passiv_data(start_date, upload_to_hf=False, overwrite=True)
get_monthly_passiv_data(start_date, upload_to_hf=False, overwrite=True)

0 comments on commit a23de3f

Please sign in to comment.