def UpdateTable(self,
                    reference,
                    schema=None,
                    description=None,
                    friendly_name=None,
                    expiration=None):
        """Updates a table.

    Arguments:
      reference: the DatasetReference to update.
      schema: an optional schema.
      description: an optional table description.
      friendly_name: an optional friendly name for the table.
      expiration: optional expiration time in milliseconds since the epoch.
    """
        if schema:
            self._CheckKeyfileFlag()

        if description:
            hashed_table_key, table_version, table_schema = (
                self._GetEBQTableInfo(str(reference)))
            if schema:
                master_key = load_lib.ReadMasterKeyFile(
                    self.master_key_filename)
                # pylint: disable=too-many-function-args
                hashed_key = base64.b64encode(
                    hashlib.sha1(master_key).digest())
                if hashed_key != hashed_table_key:
                    raise bigquery_client.BigqueryAccessDeniedError(
                        'Invalid master key for this table.', None, None, None)
                cipher = ecrypto.ProbabilisticCipher(master_key)
                real_schema = json.dumps(load_lib.RewriteSchema(schema))
                real_schema = str.encode('utf-8')
                table_schema = base64.b64encode(
                    cipher.Encrypt(zlib.compress(real_schema)))
            description = util.ConstructTableDescription(
                description, hashed_table_key, table_version, table_schema)

        # Rewrite the schema if the schema is to be updated.
        if schema:
            schema = load_lib.RewriteSchema(schema)

        super(EncryptedBigqueryClient,
              self).UpdateTable(reference, schema, description, friendly_name,
                                expiration)
Exemplo n.º 2
0
 def testRewriteSchemaWhenString(self):
     """Regression test on RewriteSchema() sloppy behavior on inputs."""
     schema = test_util.GetJobsSchemaString()
     self.assertTrue(isinstance(schema, types.StringTypes))
     # set up the for loop to wrongfully iterate only once, a single char,
     # instead of supplying the entire string and needing to break the
     # flow after _RewriteField()
     schema = schema[0]
     self.mox.StubOutWithMock(load_lib, '_RewriteField')
     load_lib._RewriteField(schema, []).AndReturn(None)
     self.mox.ReplayAll()
     new_schema = load_lib.RewriteSchema(schema)
     self.mox.VerifyAll()
     self.assertEqual(new_schema, [])
    def CreateTable(self,
                    reference,
                    ignore_existing=False,
                    schema=None,
                    description=None,
                    friendly_name=None,
                    expiration=None):
        """Create a table corresponding to TableReference.

    Arguments:
      reference: the TableReference to create.
      ignore_existing: (boolean, default False) If False, raise an exception if
        the dataset already exists.
      schema: An required schema (also requires a master key).
      description: an optional table description.
      friendly_name: an optional friendly name for the table.
      expiration: optional expiration time in milliseconds since the epoch.

    Raises:
      TypeError: if reference is not a TableReference.
      BigqueryDuplicateError: if reference exists and ignore_existing
        is False.
    """
        if schema is None:
            raise bigquery_client.BigqueryNotFoundError(
                'A schema must be specified when making a table.', None, None,
                None)
        self._CheckKeyfileFlag()
        schema = load_lib.ReadSchemaFile(schema)
        master_key = load_lib.ReadMasterKeyFile(self.master_key_filename, True)
        # pylint: disable=too-many-function-args
        hashed_key = base64.b64encode(hashlib.sha1(master_key).digest())
        cipher = ecrypto.ProbabilisticCipher(master_key)
        pretty_schema = json.dumps(schema)
        pretty_schema = pretty_schema.encode('utf-8')
        pretty_schema = zlib.compress(pretty_schema)
        encrypted_schema = base64.b64encode(cipher.Encrypt(pretty_schema))
        if description is None:
            description = ''
        new_description = util.ConstructTableDescription(
            description, hashed_key, util.EBQ_TABLE_VERSION, encrypted_schema)
        new_schema = load_lib.RewriteSchema(schema)
        super(EncryptedBigqueryClient,
              self).CreateTable(reference, ignore_existing, new_schema,
                                new_description, friendly_name, expiration)
Exemplo n.º 4
0
 def testRewriteSchemaTypeCheck(self):
     schema = json.loads(test_util.GetJobsSchemaString())
     self.assertTrue(isinstance(schema, types.ListType))
     new_schema = load_lib.RewriteSchema(schema)
     self.assertTrue(isinstance(new_schema, schema.__class__))
Exemplo n.º 5
0
 def testRewriteMultipleNestedSchemaAsJsonFile(self):
     schema = json.loads(test_util.GetJobsSchemaString())
     new_schema = load_lib.RewriteSchema(schema)
     expected_schema = json.loads(_JOBS_REWRITTEN_SCHEMA)
     self.assertEquals(expected_schema, new_schema)
    def Load(self, destination_table, source, schema=None, **kwds):
        """Encrypt the given data and then load it into BigQuery.

    The job will execute synchronously if sync=True is provided as an
    argument.

    Args:
      destination_table: TableReference to load data into.
      source: String specifying source data to load.
      schema: The schema that defines fields to be loaded.
      **kwds: Passed on to self.ExecuteJob.

    Returns:
      The resulting job info.
    """
        self._CheckKeyfileFlag()
        self._CheckSchemaFile(schema)

        # To make encrypting more secure, we use different keys for each table
        # and cipher. To generate a different key for each table, we need a distinct
        # table identifier for each table. A table name is not secure since a table
        # can be deleted and created with the same name and, thus the same key. The
        # only distinct identifier happens to be creation time. Therefore, we must
        # construct a table if it does not exist so we can use the creation time
        # to encrypt values.
        try:
            self.CreateTable(destination_table, schema=schema)
        except bigquery_client.BigqueryDuplicateError:
            pass  # Table already exists.

        temp_dir = tempfile.mkdtemp()
        orig_schema = load_lib.ReadSchemaFile(schema)
        new_schema = load_lib.RewriteSchema(orig_schema)
        new_schema_file = '%s/schema.enc_schema' % temp_dir
        # write the new schema as a json file
        with open(new_schema_file, 'wt') as f:
            json.dump(new_schema, f, indent=2)
        new_source_file = '%s/data.enc_data' % temp_dir
        # TODO(user): Put the filepath to the master key in .bigqueryrc file.
        master_key = load_lib.ReadMasterKeyFile(self.master_key_filename, True)
        table_name = str(destination_table).split(':')[-1]
        table_id = '%s_%s' % (
            table_name, self._GetTableCreationTime(str(destination_table)))
        hashed_table_key, table_version, table_schema = self._GetEBQTableInfo(
            str(destination_table))
        hashed_master_key = hashlib.sha1(master_key)
        # pylint: disable=too-many-function-args
        hashed_master_key = base64.b64encode(hashed_master_key.digest())
        if hashed_master_key != hashed_table_key:
            raise bigquery_client.BigqueryAccessDeniedError(
                'Invalid master key for this table.', None, None, None)
        if table_version != util.EBQ_TABLE_VERSION:
            raise bigquery_client.BigqueryNotFoundError(
                'Invalid table version.', None, None, None)
        # TODO(user): Generate a different key.
        cipher = ecrypto.ProbabilisticCipher(master_key)
        table_schema = cipher.Decrypt(base64.b64decode(table_schema), raw=True)
        table_schema = zlib.decompress(table_schema)
        table_schema = table_schema.decode('utf-8')
        table_schema = json.loads(table_schema)
        if table_schema != orig_schema:
            raise bigquery_client.BigqueryAccessDeniedError(
                'Invalid schema for this table.', None, None, None)
        if kwds['source_format'] == 'NEWLINE_DELIMITED_JSON':
            load_lib.ConvertJsonDataFile(orig_schema, master_key, table_id,
                                         source, new_source_file)
        elif kwds['source_format'] == 'CSV' or not kwds['source_format']:
            load_lib.ConvertCsvDataFile(orig_schema, master_key, table_id,
                                        source, new_source_file)
        else:
            raise app.UsageError(
                'Currently, we do not allow loading from file types other than\n'
                'NEWLINE_DELIMITED_JSON and CSV.')
        job = super(EncryptedBigqueryClient, self).Load(destination_table,
                                                        new_source_file,
                                                        schema=new_schema_file,
                                                        **kwds)
        try:
            shutil.rmtree(temp_dir)
        except OSError:
            raise OSError('Temp file deleted by user before termination.')
        return job