Source code for tests.system.amazon.aws.example_glue_catalog
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from datetime import datetime
from airflow.providers.amazon.aws.operators.glue_catalog import (
GlueCatalogCreateDatabaseOperator,
GlueCatalogCreateTableOperator,
GlueCatalogDeleteDatabaseOperator,
GlueCatalogDeleteTableOperator,
)
from airflow.providers.common.compat.sdk import DAG, chain
from system.amazon.aws.utils import ENV_ID_KEY, SystemTestContextBuilder
from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS
if AIRFLOW_V_3_0_PLUS:
from airflow.sdk import TriggerRule
else:
from airflow.utils.trigger_rule import TriggerRule # type: ignore[no-redef,attr-defined]
[docs]
DAG_ID = "example_glue_catalog"
[docs]
sys_test_context_task = SystemTestContextBuilder().build()
with DAG(
dag_id=DAG_ID,
schedule=None,
start_date=datetime(2024, 1, 1),
catchup=False,
) as dag:
[docs]
test_context = sys_test_context_task()
env_id = test_context[ENV_ID_KEY]
db_name = f"{env_id}_test_db"
# [START howto_operator_glue_catalog_create_database]
create_database = GlueCatalogCreateDatabaseOperator(
task_id="create_database",
database_name=db_name,
description="Test database for Glue Catalog",
)
# [END howto_operator_glue_catalog_create_database]
# [START howto_operator_glue_catalog_delete_database]
delete_database = GlueCatalogDeleteDatabaseOperator(
task_id="delete_database",
database_name=db_name,
trigger_rule=TriggerRule.ALL_DONE,
)
# [END howto_operator_glue_catalog_delete_database]
table_name = f"{env_id}_tbl"
table_input = {
"StorageDescriptor": {
"Columns": [{"Name": "id", "Type": "int"}],
"Location": f"s3://{env_id}-glue/data/",
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"SerdeInfo": {"SerializationLibrary": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},
},
"TableType": "EXTERNAL_TABLE",
}
# [START howto_operator_glue_catalog_create_table]
create_table = GlueCatalogCreateTableOperator(
task_id="create_table",
database_name=db_name,
table_name=table_name,
table_input=table_input,
)
# [END howto_operator_glue_catalog_create_table]
# [START howto_operator_glue_catalog_delete_table]
delete_table = GlueCatalogDeleteTableOperator(
task_id="delete_table",
database_name=db_name,
table_name=table_name,
trigger_rule=TriggerRule.ALL_DONE,
)
# [END howto_operator_glue_catalog_delete_table]
chain(
test_context,
create_database,
create_table,
delete_table,
delete_database,
)
from tests_common.test_utils.watcher import watcher
list(dag.tasks) >> watcher()
from tests_common.test_utils.system_tests import get_test_run # noqa: E402
[docs]
test_run = get_test_run(dag)