def test_mutate_row(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)

        rows = [DirectRow(row_key=b'row_key'),
                DirectRow(row_key=b'row_key_2'),
                DirectRow(row_key=b'row_key_3'),
                DirectRow(row_key=b'row_key_4')]

        mutation_batcher.mutate_rows(rows)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 1)
Exemplo n.º 2
0
    def test_mutate_row_with_max_mutations(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)

        row = DirectRow(row_key=b"row_key")
        row.set_cell("cf1", b"c1", 1)
        row.set_cell("cf1", b"c2", 2)
        row.set_cell("cf1", b"c3", 3)

        mutation_batcher.mutate(row)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 1)
    def test_mutate_row_with_max_mutations(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)

        row = DirectRow(row_key=b'row_key')
        row.set_cell('cf1', b'c1', 1)
        row.set_cell('cf1', b'c2', 2)
        row.set_cell('cf1', b'c3', 3)

        mutation_batcher.mutate(row)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 1)
Exemplo n.º 4
0
    def test_mutate_row_with_max_mutations(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)

        row = DirectRow(row_key=b'row_key')
        row.set_cell('cf1', b'c1', 1)
        row.set_cell('cf1', b'c2', 2)
        row.set_cell('cf1', b'c3', 3)

        mutation_batcher.mutate(row)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 1)
    def test_mutate_row_with_max_mutations(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)

        row = DirectRow(row_key=b"row_key")
        row.set_cell("cf1", b"c1", 1)
        row.set_cell("cf1", b"c2", 2)
        row.set_cell("cf1", b"c3", 3)

        mutation_batcher.mutate(row)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 1)
Exemplo n.º 6
0
    def test_mutate_row(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)

        rows = [
            DirectRow(row_key=b"row_key"),
            DirectRow(row_key=b"row_key_2"),
            DirectRow(row_key=b"row_key_3"),
            DirectRow(row_key=b"row_key_4"),
        ]

        mutation_batcher.mutate_rows(rows)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 1)
    def test_flush_with_no_rows(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 0)
Exemplo n.º 8
0
    def test_flush_with_no_rows(self):
        table = _Table(self.TABLE_NAME)
        mutation_batcher = MutationsBatcher(table=table)
        mutation_batcher.flush()

        self.assertEqual(table.mutation_calls, 0)
Exemplo n.º 9
0
class WriteToBigtable(beam.DoFn):
    """ Creates the connector can call and add_row to the batcher using each
    row in beam pipe line

    :type beam_options: class:`~bigtable_configuration.BigtableConfiguration`
    :param beam_options: Class `~bigtable_configuration.BigtableConfiguration`.

    :type flush_count: int
    :param flush_count: (Optional) Max number of rows to flush. If it
    reaches the max number of rows it calls finish_batch() to mutate the
    current row batch. Default is FLUSH_COUNT (1000 rows).

    :type max_mutations: int
    :param max_mutations: (Optional)  Max number of row mutations to flush.
    If it reaches the max number of row mutations it calls finish_batch() to
    mutate the current row batch. Default is MAX_MUTATIONS (100000 mutations).

    :type max_row_bytes: int
    :param max_row_bytes: (Optional) Max number of row mutations size to
    flush. If it reaches the max number of row mutations size it calls
    finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES
    (5 MB).

    :type app_profile_id: str
    :param app_profile_id: (Optional) The unique name of the AppProfile.
    """

    def __init__(self, beam_options, flush_count=None, max_row_bytes=None,
                 app_profile_id=None):
        super(WriteToBigtable, self).__init__(beam_options)
        self.beam_options = beam_options
        self.client = None
        self.instance = None
        self.table = None
        self.batcher = None
        self._app_profile_id = app_profile_id
        self.flush_count = flush_count
        self.max_row_bytes = max_row_bytes

    def start_bundle(self):
        if self.beam_options.credentials is None:
            self.client = bigtable.Client(project=self.beam_options.project_id,
                                          admin=True)
        else:
            self.client = bigtable.Client(
                project=self.beam_options.project_id,
                credentials=self.beam_options.credentials,
                admin=True)
        self.instance = self.client.instance(self.beam_options.instance_id)
        self.table = self.instance.table(self.beam_options.table_id,
                                         self._app_profile_id)
        self.batcher = MutationsBatcher(
            self.table, flush_count=self.flush_count,
            max_row_bytes=self.max_row_bytes)

    def process(self, row):
        # row.table = self.table
        self.batcher.mutate(row)

    def finish_bundle(self):
        return self.batcher.flush()