Exemplo n.º 1
0
    def create_table(self, dataset, table, schema):
        """Create a new table in the dataset.

        Args:
            dataset: the dataset to create the table in.
            table: the name of table to create.
            schema: table schema dict.

        Returns:
            bool indicating if the table was successfully created or not.
        """

        body = {
            'schema': {'fields': schema},
            'tableReference': {
                'tableId': table,
                'projectId': self.project_id,
                'datasetId': dataset
            }
        }

        try:
            self.bigquery.tables().insert(
                projectId=self.project_id,
                datasetId=dataset,
                body=body
            ).execute()
            return True

        except:
            logger.error('Cannot create table %s.%s' % (dataset, table))
            return False
Exemplo n.º 2
0
    def create_dataset(self, dataset_id, friendly_name=None, description=None,
                       access=None):
        """Create a new BigQuery dataset.

        Args:
            dataset_id: required unique string identifying the dataset with the
                        project (the referenceId of the dataset, not the
                        integer id of the dataset)
            friendly_name: optional string providing a human readable name
            description: optional longer string providing a description
            access: optional object indicating access permissions (see
                    https://developers.google.com/bigquery/docs/reference/v2/
                    datasets#resource)

        Returns:
            bool indicating if dataset was created or not
        """
        try:
            datasets = self.bigquery.datasets()
            dataset_data = self.dataset_resource(dataset_id,
                                                 friendly_name=friendly_name,
                                                 description=description,
                                                 access=access)

            datasets.insert(projectId=self.project_id,
                            body=dataset_data).execute()
            return True
        except Exception, e:
            logger.error('Cannot create dataset %s, %s' % (dataset_id, e))
            return False
Exemplo n.º 3
0
    def create_dataset(self,
                       dataset_id,
                       friendly_name=None,
                       description=None,
                       access=None):
        """Create a new BigQuery dataset.

        Args:
            dataset_id: required unique string identifying the dataset with the
                        project (the referenceId of the dataset, not the
                        integer id of the dataset)
            friendly_name: optional string providing a human readable name
            description: optional longer string providing a description
            access: optional object indicating access permissions (see
                    https://developers.google.com/bigquery/docs/reference/v2/
                    datasets#resource)

        Returns:
            bool indicating if dataset was created or not
        """
        try:
            datasets = self.bigquery.datasets()
            dataset_data = self.dataset_resource(dataset_id,
                                                 friendly_name=friendly_name,
                                                 description=description,
                                                 access=access)

            datasets.insert(projectId=self.project_id,
                            body=dataset_data).execute()
            return True
        except Exception, e:
            logger.error('Cannot create dataset %s, %s' % (dataset_id, e))
            return False
Exemplo n.º 4
0
    def patch_dataset(self, dataset_id, friendly_name=None, description=None,
                      access=None):
        """Updates information in an existing dataset. The update method
        replaces the entire dataset resource, whereas the patch method only
        replaces fields that are provided in the submitted dataset resource.

        Args:
            dataset_id: required unique string identifying the dataset with the
                        projedct (the referenceId of the dataset).
            friendly_name: an optional descriptive name for the dataset.
            description: an optional description of the dataset.
            access: an optional object indicating access permissions.
        Returns:
            bool indicating if the patch was successful or not.
        """
        try:
            datasets = self.bigquery.datasets()
            body = self.dataset_resource(dataset_id, friendly_name,
                                         description, access)
            request = datasets.patch(projectId=self.project_id,
                                     datasetId=dataset_id, body=body)
            request.execute()
            return True
        except Exception, e:
            logger.error('Cannot patch dataset %s: %s' % (dataset_id, e))
            return False
Exemplo n.º 5
0
    def create_table(self, dataset, table, schema):
        """Create a new table in the dataset.

        Args:
            dataset: the dataset to create the table in.
            table: the name of table to create.
            schema: table schema dict.

        Returns:
            bool indicating if the table was successfully created or not.
        """

        body = {
            'schema': {
                'fields': schema
            },
            'tableReference': {
                'tableId': table,
                'projectId': self.project_id,
                'datasetId': dataset
            }
        }

        try:
            self.bigquery.tables().insert(projectId=self.project_id,
                                          datasetId=dataset,
                                          body=body).execute()
            return True

        except:
            logger.error('Cannot create table %s.%s' % (dataset, table))
            return False
Exemplo n.º 6
0
    def patch_dataset(self,
                      dataset_id,
                      friendly_name=None,
                      description=None,
                      access=None):
        """Updates information in an existing dataset. The update method
        replaces the entire dataset resource, whereas the patch method only
        replaces fields that are provided in the submitted dataset resource.

        Args:
            dataset_id: required unique string identifying the dataset with the
                        projedct (the referenceId of the dataset).
            friendly_name: an optional descriptive name for the dataset.
            description: an optional description of the dataset.
            access: an optional object indicating access permissions.
        Returns:
            bool indicating if the patch was successful or not.
        """
        try:
            datasets = self.bigquery.datasets()
            body = self.dataset_resource(dataset_id, friendly_name,
                                         description, access)
            request = datasets.patch(projectId=self.project_id,
                                     datasetId=dataset_id,
                                     body=body)
            request.execute()
            return True
        except Exception, e:
            logger.error('Cannot patch dataset %s: %s' % (dataset_id, e))
            return False
Exemplo n.º 7
0
    def get_datasets(self):
        """List all datasets in the project.

        Returns:
            a list of dataset resources
        """
        try:
            datasets = self.bigquery.datasets()
            request = datasets.list(projectId=self.project_id)
            result = request.execute()
            return result.get('datasets', [])
        except Exception, e:
            logger.error("Cannot list datasets: %s" % e)
            return None
Exemplo n.º 8
0
    def get_datasets(self):
        """List all datasets in the project.

        Returns:
            a list of dataset resources
        """
        try:
            datasets = self.bigquery.datasets()
            request = datasets.list(projectId=self.project_id)
            result = request.execute()
            return result.get('datasets', [])
        except Exception, e:
            logger.error("Cannot list datasets: %s" % e)
            return None
Exemplo n.º 9
0
    def push_rows(self, dataset, table, rows, insert_id_key=None):
        """Upload rows to BigQuery table.

        Args:
            dataset: the dataset to upload to.
            table: the name of the table to insert rows into.
            rows: list of rows to add to table
            insert_id_key: key for insertId in row

        Returns:
            bool indicating if insert succeeded or not.
        """

        table_data = self.bigquery.tabledata()

        rows_data = []
        for row in rows:
            each_row = {}
            each_row["json"] = row
            if insert_id_key in row:
                each_row["insertId"] = row[insert_id_key]
            rows_data.append(each_row)

        data = {
            "kind": "bigquery#tableDataInsertAllRequest",
            "rows": rows_data
        }

        try:
            response = table_data.insertAll(
                projectId=self.project_id,
                datasetId=dataset,
                tableId=table,
                body=data
            ).execute()

            if response.get('insertErrors'):
                logger.error('BigQuery insert errors: %s' % response)
                return False

            return True

        except:
            logger.error('Problem with BigQuery insertAll')
            return False
Exemplo n.º 10
0
    def push_rows(self, dataset, table, rows, insert_id_key=None):
        """Upload rows to BigQuery table.

        Args:
            dataset: the dataset to upload to.
            table: the name of the table to insert rows into.
            rows: list of rows to add to table
            insert_id_key: key for insertId in row

        Returns:
            bool indicating if insert succeeded or not.
        """

        table_data = self.bigquery.tabledata()

        rows_data = []
        for row in rows:
            each_row = {}
            each_row["json"] = row
            if insert_id_key in row:
                each_row["insertId"] = row[insert_id_key]
            rows_data.append(each_row)

        data = {
            "kind": "bigquery#tableDataInsertAllRequest",
            "rows": rows_data
        }

        try:
            response = table_data.insertAll(projectId=self.project_id,
                                            datasetId=dataset,
                                            tableId=table,
                                            body=data).execute()

            if response.get('insertErrors'):
                logger.error('BigQuery insert errors: %s' % response)
                return False

            return True

        except:
            logger.error('Problem with BigQuery insertAll')
            return False
Exemplo n.º 11
0
    def delete_table(self, dataset, table):
        """Delete a table from the dataset.

        Args:
            dataset: the dataset to delete the table from.
            table: the name of the table to delete.

        Returns:
            bool indicating if the table was successfully deleted or not.
        """

        try:
            self.bigquery.tables().delete(projectId=self.project_id,
                                          datasetId=dataset,
                                          tableId=table).execute()
            return True

        except:
            logger.error('Cannot delete table %s.%s' % (dataset, table))
            return False
Exemplo n.º 12
0
    def delete_dataset(self, dataset_id):
        """Delete a BigQuery dataset.

        Args:
            dataset_id: required unique string identifying the dataset with the
                        project (the referenceId of the dataset)
        Returns:
            bool indicating if the delete was successful or not

        Raises:
            HttpError 404 when dataset with dataset_id does not exist
        """
        try:
            datasets = self.bigquery.datasets()
            request = datasets.delete(projectId=self.project_id,
                                      datasetId=dataset_id)
            request.execute()
            return True
        except Exception, e:
            logger.error('Cannot delete dataset %s: %s' % (dataset_id, e))
            return None
Exemplo n.º 13
0
    def delete_table(self, dataset, table):
        """Delete a table from the dataset.

        Args:
            dataset: the dataset to delete the table from.
            table: the name of the table to delete.

        Returns:
            bool indicating if the table was successfully deleted or not.
        """

        try:
            self.bigquery.tables().delete(
                projectId=self.project_id,
                datasetId=dataset,
                tableId=table
            ).execute()
            return True

        except:
            logger.error('Cannot delete table %s.%s' % (dataset, table))
            return False
Exemplo n.º 14
0
    def delete_dataset(self, dataset_id, delete_contents=False):
        """Delete a BigQuery dataset.

        Args:
            dataset_id: required unique string identifying the dataset with the
                        project (the referenceId of the dataset)
            delete_contents: forces deletion of the dataset even when the
                        dataset contains data
        Returns:
            bool indicating if the delete was successful or not

        Raises:
            HttpError 404 when dataset with dataset_id does not exist
        """
        try:
            datasets = self.bigquery.datasets()
            request = datasets.delete(projectId=self.project_id,
                                      datasetId=dataset_id,
                                      deleteContents=delete_contents)
            request.execute()
            return True
        except Exception, e:
            logger.error('Cannot delete dataset %s: %s' % (dataset_id, e))
            return False