Source code for airflow.providers.google.cloud.utils.bigquery_get_data
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsimportitertoolsfromtypingimportTYPE_CHECKINGfromgoogle.cloud.bigquery.tableimportRow,RowIteratorifTYPE_CHECKING:fromcollections.abcimportIteratorfromloggingimportLoggerfromairflow.providers.google.cloud.hooks.bigqueryimportBigQueryHook
[docs]defbigquery_get_data(logger:Logger,dataset_id:str|None,table_id:str|None,big_query_hook:BigQueryHook,batch_size:int,selected_fields:list[str]|str|None,)->Iterator:logger.info("Fetching Data from:")logger.info("Dataset: %s ; Table: %s",dataset_id,table_id)forstart_indexinitertools.count(step=batch_size):rows:list[Row]|RowIterator=big_query_hook.list_rows(dataset_id=dataset_id,table_id=table_id,max_results=batch_size,selected_fields=selected_fields,start_index=start_index,)ifisinstance(rows,RowIterator):raiseTypeError("BigQueryHook.list_rows() returns iterator when return_iterator=False (default)")iflen(rows)==0:logger.info("Job Finished")returnlogger.info("Total Extracted rows: %s",len(rows)+start_index)yield[row.values()forrowinrows]