-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs(ingestion): Emitter api examples + Documentation (#3599)
- Loading branch information
Showing
6 changed files
with
223 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
metadata-ingestion/examples/library/lineage_chart_dashboard.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from typing import List | ||
|
||
import datahub.emitter.mce_builder as builder | ||
from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
from datahub.metadata.com.linkedin.pegasus2avro.dashboard import DashboardInfoClass | ||
from datahub.metadata.schema_classes import ChangeAuditStampsClass, ChangeTypeClass | ||
|
||
# Construct the DashboardInfo aspect with the charts -> dashboard lineage. | ||
charts_in_dashboard: List[str] = [ | ||
builder.make_chart_urn(platform="looker", name="chart_1"), | ||
builder.make_chart_urn(platform="looker", name="chart_2"), | ||
] | ||
|
||
last_modified = ChangeAuditStampsClass() | ||
|
||
|
||
dashboard_info = DashboardInfoClass( | ||
title="My Dashboard 1", | ||
description="Sample dashboard", | ||
lastModified=last_modified, | ||
charts=charts_in_dashboard, | ||
) | ||
|
||
# Construct a MetadataChangeProposalWrapper object with the DashboardInfo aspect. | ||
# NOTE: This will overwrite all of the existing dashboard aspect information associated with this dashboard. | ||
chart_info_mcp = MetadataChangeProposalWrapper( | ||
entityType="dashboard", | ||
changeType=ChangeTypeClass.UPSERT, | ||
entityUrn=builder.make_dashboard_urn(platform="looker", name="my_dashboard_1"), | ||
aspectName="dashboardInfo", | ||
aspect=dashboard_info, | ||
) | ||
|
||
# Create an emitter to the GMS REST API. | ||
emitter = DatahubRestEmitter("http://localhost:8080") | ||
|
||
# Emit metadata! | ||
emitter.emit_mcp(chart_info_mcp) |
38 changes: 38 additions & 0 deletions
38
metadata-ingestion/examples/library/lineage_dataset_chart.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import List | ||
|
||
import datahub.emitter.mce_builder as builder | ||
from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
from datahub.metadata.com.linkedin.pegasus2avro.chart import ChartInfoClass | ||
from datahub.metadata.schema_classes import ChangeAuditStampsClass, ChangeTypeClass | ||
|
||
# Construct the ChartInfo aspect with the input_datasets lineage. | ||
input_datasets: List[str] = [ | ||
builder.make_dataset_urn(platform="hdfs", name="dataset1", env="PROD"), | ||
builder.make_dataset_urn(platform="hdfs", name="dataset2", env="PROD"), | ||
] | ||
|
||
last_modified = ChangeAuditStampsClass() | ||
|
||
chart_info = ChartInfoClass( | ||
title="Baz Chart 1", | ||
description="Sample Baz chart", | ||
lastModified=last_modified, | ||
inputs=input_datasets, | ||
) | ||
|
||
# Construct a MetadataChangeProposalWrapper object with the ChartInfo aspect. | ||
# NOTE: This will overwrite all of the existing chartInfo aspect information associated with this chart. | ||
chart_info_mcp = MetadataChangeProposalWrapper( | ||
entityType="chart", | ||
changeType=ChangeTypeClass.UPSERT, | ||
entityUrn=builder.make_chart_urn(platform="looker", name="my_chart_1"), | ||
aspectName="chartInfo", | ||
aspect=chart_info, | ||
) | ||
|
||
# Create an emitter to the GMS REST API. | ||
emitter = DatahubRestEmitter("http://localhost:8080") | ||
|
||
# Emit metadata! | ||
emitter.emit_mcp(chart_info_mcp) |
49 changes: 49 additions & 0 deletions
49
metadata-ingestion/examples/library/lineage_dataset_job_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from typing import List | ||
|
||
import datahub.emitter.mce_builder as builder | ||
from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
from datahub.metadata.com.linkedin.pegasus2avro.datajob import DataJobInputOutputClass | ||
from datahub.metadata.schema_classes import ChangeTypeClass | ||
|
||
# Construct the DataJobInputOutput aspect. | ||
input_datasets: List[str] = [ | ||
builder.make_dataset_urn(platform="mysql", name="librarydb.member", env="PROD"), | ||
builder.make_dataset_urn(platform="mysql", name="librarydb.checkout", env="PROD"), | ||
] | ||
|
||
output_datasets: List[str] = [ | ||
builder.make_dataset_urn( | ||
platform="kafka", name="debezium.topics.librarydb.member_checkout", env="PROD" | ||
) | ||
] | ||
|
||
input_data_jobs: List[str] = [ | ||
builder.make_data_job_urn( | ||
orchestrator="airflow", flow_id="flow1", job_id="job0", cluster="PROD" | ||
) | ||
] | ||
|
||
datajob_input_output = DataJobInputOutputClass( | ||
inputDatasets=input_datasets, | ||
outputDatasets=output_datasets, | ||
inputDatajobs=input_data_jobs, | ||
) | ||
|
||
# Construct a MetadataChangeProposalWrapper object. | ||
# NOTE: This will overwrite all of the existing lineage information associated with this job. | ||
datajob_input_output_mcp = MetadataChangeProposalWrapper( | ||
entityType="datajob", | ||
changeType=ChangeTypeClass.UPSERT, | ||
entityUrn=builder.make_data_job_urn( | ||
orchestrator="airflow", flow_id="flow1", job_id="job1", cluster="PROD" | ||
), | ||
aspectName="dataJobInputOutput", | ||
aspect=datajob_input_output, | ||
) | ||
|
||
# Create an emitter to the GMS REST API. | ||
emitter = DatahubRestEmitter("http://localhost:8080") | ||
|
||
# Emit metadata! | ||
emitter.emit_mcp(datajob_input_output_mcp) |
42 changes: 42 additions & 0 deletions
42
metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from typing import List | ||
|
||
import datahub.emitter.mce_builder as builder | ||
from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( | ||
DatasetLineageTypeClass, | ||
UpstreamClass, | ||
UpstreamLineage, | ||
) | ||
from datahub.metadata.schema_classes import ChangeTypeClass | ||
|
||
# Construct upstream tables. | ||
upstream_tables: List[UpstreamClass] = [] | ||
upstream_table_1 = UpstreamClass( | ||
dataset=builder.make_dataset_urn("bigquery", "upstream_table_1", "PROD"), | ||
type=DatasetLineageTypeClass.TRANSFORMED, | ||
) | ||
upstream_tables.append(upstream_table_1) | ||
upstream_table_2 = UpstreamClass( | ||
dataset=builder.make_dataset_urn("bigquery", "upstream_table_2", "PROD"), | ||
type=DatasetLineageTypeClass.TRANSFORMED, | ||
) | ||
upstream_tables.append(upstream_table_2) | ||
|
||
# Construct a lineage object. | ||
upstream_lineage = UpstreamLineage(upstreams=upstream_tables) | ||
|
||
# Construct a MetadataChangeProposalWrapper object. | ||
lineage_mcp = MetadataChangeProposalWrapper( | ||
entityType="dataset", | ||
changeType=ChangeTypeClass.UPSERT, | ||
entityUrn=builder.make_dataset_urn("bigquery", "downstream"), | ||
aspectName="upstreamLineage", | ||
aspect=upstream_lineage, | ||
) | ||
|
||
# Create an emitter to the GMS REST API. | ||
emitter = DatahubRestEmitter("http://localhost:8080") | ||
|
||
# Emit metadata! | ||
emitter.emit_mcp(lineage_mcp) |
30 changes: 30 additions & 0 deletions
30
metadata-ingestion/examples/library/lineage_job_dataflow.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import datahub.emitter.mce_builder as builder | ||
from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
from datahub.metadata.com.linkedin.pegasus2avro.datajob import DataJobInfoClass | ||
from datahub.metadata.schema_classes import ChangeTypeClass | ||
|
||
# Construct the DataJobInfo aspect with the job -> flow lineage. | ||
dataflow_urn = builder.make_data_flow_urn( | ||
orchestrator="airflow", flow_id="flow1", cluster="prod" | ||
) | ||
|
||
datajob_info = DataJobInfoClass(name="My Job 1", type="AIRFLOW", flowUrn=dataflow_urn) | ||
|
||
# Construct a MetadataChangeProposalWrapper object with the DataJobInfo aspect. | ||
# NOTE: This will overwrite all of the existing dataJobInfo aspect information associated with this job. | ||
chart_info_mcp = MetadataChangeProposalWrapper( | ||
entityType="dataJob", | ||
changeType=ChangeTypeClass.UPSERT, | ||
entityUrn=builder.make_data_job_urn( | ||
orchestrator="airflow", flow_id="flow1", job_id="job1", cluster="prod" | ||
), | ||
aspectName="dataJobInfo", | ||
aspect=datajob_info, | ||
) | ||
|
||
# Create an emitter to the GMS REST API. | ||
emitter = DatahubRestEmitter("http://localhost:8080") | ||
|
||
# Emit metadata! | ||
emitter.emit_mcp(chart_info_mcp) |