示例#1
0
    def insert_data(self,
                    rows,
                    row_ids=None,
                    skip_invalid_rows=None,
                    ignore_unknown_values=None,
                    template_suffix=None,
                    client=None):
        """API call:  insert table data via a POST request

        See
        https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll

        :type rows: list of tuples
        :param rows: Row data to be inserted. Each tuple should contain data
                     for each schema field on the current table and in the
                     same order as the schema fields.

        :type row_ids: list of string
        :param row_ids: Unique ids, one per row being inserted.  If not
                        passed, no de-duplication occurs.

        :type skip_invalid_rows: bool
        :param skip_invalid_rows: (Optional) skip rows w/ invalid data?

        :type ignore_unknown_values: bool
        :param ignore_unknown_values: (Optional) ignore columns beyond schema?

        :type template_suffix: str
        :param template_suffix:
            (Optional) treat ``name`` as a template table and provide a suffix.
            BigQuery will create the table ``<name> + <template_suffix>`` based
            on the schema of the template table. See
            https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables

        :type client: :class:`~google.cloud.bigquery.client.Client` or
                      ``NoneType``
        :param client: the client to use.  If not passed, falls back to the
                       ``client`` stored on the current dataset.

        :rtype: list of mappings
        :returns: One mapping per row with insert errors:  the "index" key
                  identifies the row, and the "errors" key contains a list
                  of the mappings describing one or more problems with the
                  row.
        :raises: ValueError if table's schema is not set
        """
        if len(self._schema) == 0:
            raise ValueError(_TABLE_HAS_NO_SCHEMA)

        client = self._require_client(client)
        rows_info = []
        data = {'rows': rows_info}

        for index, row in enumerate(rows):
            row_info = {}

            for field, value in zip(self._schema, row):
                converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
                if converter is not None:  # STRING doesn't need converting
                    value = converter(value)
                row_info[field.name] = value

            info = {'json': row_info}
            if row_ids is not None:
                info['insertId'] = row_ids[index]

            rows_info.append(info)

        if skip_invalid_rows is not None:
            data['skipInvalidRows'] = skip_invalid_rows

        if ignore_unknown_values is not None:
            data['ignoreUnknownValues'] = ignore_unknown_values

        if template_suffix is not None:
            data['templateSuffix'] = template_suffix

        response = client._connection.api_request(method='POST',
                                                  path='%s/insertAll' %
                                                  self.path,
                                                  data=data)
        errors = []

        for error in response.get('insertErrors', ()):
            errors.append({
                'index': int(error['index']),
                'errors': error['errors']
            })

        return errors
示例#2
0
    async def insert_rows(self, table, rows, selected_fields=None, **kwargs):
        """__Asynchronous__ insertion of rows into a table via the streaming API

    Credit:
    http://google-cloud-python.readthedocs.io/en/latest/_modules/google/cloud
    /bigquery/client.html#Client.insert_rows
    https://cloud.google.com/bigquery/docs/reference/rest/v2/
    tabledata/insertAll

    :type table: One of:
                 :class:`~google.cloud.bigquery.table.Table`
                 :class:`~google.cloud.bigquery.table.TableReference`
    :param table: the destination table for the row data, or a reference
                  to it.

    :type rows: One of:
                list of tuples
                list of dictionaries
    :param rows: Row data to be inserted. If a list of tuples is given,
                 each tuple should contain data for each schema field on
                 the current table and in the same order as the schema
                 fields.  If a list of dictionaries is given, the keys must
                 include all required fields in the schema.  Keys which do
                 not correspond to a field in the schema are ignored.

    :type selected_fields:
        list of :class:`~google.cloud.bigquery.schema.SchemaField`
    :param selected_fields:
        The fields to return. Required if ``table`` is a
        :class:`~google.cloud.bigquery.table.TableReference`.

    :type kwargs: dict
    :param kwargs:
        Keyword arguments to
        :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`

    :rtype: list of mappings
    :returns: One mapping per row with insert errors:  the "index" key
              identifies the row, and the "errors" key contains a list
              of the mappings describing one or more problems with the
              row.
    :raises: ValueError if table's schema is not set
    """
        if selected_fields is not None:
            schema = selected_fields
        elif isinstance(table, TableReference):
            raise ValueError('need selected_fields with TableReference')
        elif isinstance(table, Table):
            if len(table.schema) == 0:
                raise ValueError(_TABLE_HAS_NO_SCHEMA)
            schema = table.schema
        else:
            raise TypeError('table should be Table or TableReference')

        json_rows = []
        for index, row in enumerate(rows):
            if isinstance(row, dict):
                row = _row_from_mapping(row, schema)
            json_row = {}

            for field, value in zip(schema, row):
                converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
                if converter is not None:  # STRING doesn't need converting
                    value = converter(value)
                json_row[field.name] = value

            json_rows.append(json_row)

        return await self.insert_rows_json(table, json_rows, **kwargs)