def insert(self, event_or_events=None):
        # event_or_events = [
        #   event: {
        #       row: {'id': 3, 'a': 3}
        #   },
        #   event: {
        #       row: {'id': 3, 'a': 3}
        #   },
        # ]

        events = self.listify(event_or_events)
        if len(events) < 1:
            logging.warning('No events to insert. class: %s', __class__)
            return

        # assume we have at least one Event

        logging.debug('class:%s insert %d rows', __class__, len(events))

        for event in events:
            schema = self.dst_schema if self.dst_schema else event.schema
            table = None
            if self.dst_distribute:
                table = TableProcessor.create_distributed_table_name(
                    db=event.schema, table=event.table)
            else:
                table = self.dst_table if self.dst_table else event.table
                if self.dst_schema:
                    table = TableProcessor.create_migrated_table_name(
                        prefix=self.dst_table_prefix, table=table)

            sql = 'INSERT INTO `{0}`.`{1}` ({2}) FORMAT CSV'.format(
                schema,
                table,
                ', '.join(map(lambda column: '`%s`' % column,
                              event.fieldnames)),
            )

            choptions = ""
            if self.host:
                choptions += " --host=" + shlex.quote(self.host)
            if self.port:
                choptions += " --port=" + str(self.port)
            if self.user:
                choptions += " --user="******" --password="******"tail -n +2 '{0}' | clickhouse-client {1} --query='{2}'".format(
                event.filename,
                choptions,
                sql,
            )

            logging.info('starting clickhouse-client process')
            logging.debug('starting %s', bash)
            os.system(bash)

        pass
示例#2
0
    def insert(self, event_or_events=None):
        # event_or_events = [
        #   event: {
        #       row: {'id': 3, 'a': 3}
        #   },
        #   event: {
        #       row: {'id': 3, 'a': 3}
        #   },
        # ]

        events = self.listify(event_or_events)
        if len(events) < 1:
            logging.warning('No events to insert. class: %s', __class__)
            return

        # assume we have at least one Event

        logging.debug('class:%s insert %d event(s)', __class__, len(events))

        # verify and converts events and consolidate converted rows from all events into one batch

        rows = []
        event_converted = None
        for event in events:
            if not event.verify:
                logging.warning(
                    'Event verification failed. Skip one event. Event: %s Class: %s',
                    event.meta(), __class__)
                continue  # for event

            event_converted = self.convert(event)
            for row in event_converted:
                for key in row.keys():
                    # we need to convert Decimal value to str value for suitable for table structure
                    if type(row[key]) == Decimal:
                        row[key] = str(row[key])
                    elif type(row[key]) == datetime.timedelta:
                        row[key] = str(row[key])
                    elif type(row[key]) == bytes:
                        row[key] = '0x' + str.upper(row[key].hex())

                rows.append(row)

        logging.debug('class:%s insert %d row(s)', __class__, len(rows))

        # determine target schema.table

        schema = self.dst_schema if self.dst_schema else event_converted.schema
        table = None
        if self.dst_distribute:
            table = TableProcessor.create_distributed_table_name(
                db=event_converted.schema, table=event_converted.table)
        else:
            table = self.dst_table if self.dst_table else event_converted.table
            if self.dst_schema:
                table = TableProcessor.create_migrated_table_name(
                    prefix=self.dst_table_prefix, table=table)

        logging.debug(
            "schema={} table={} self.dst_schema={} self.dst_table={}".format(
                schema, table, self.dst_schema, self.dst_table))

        # and INSERT converted rows

        sql = ''
        try:
            sql = 'INSERT INTO `{0}`.`{1}` ({2}) VALUES'.format(
                schema, table,
                ', '.join(map(lambda column: '`%s`' % column, rows[0].keys())))
            self.client.execute(sql, rows)
        except Exception as ex:
            logging.critical('QUERY FAILED')
            logging.critical('ex={}'.format(ex))
            logging.critical('sql={}'.format(sql))
            sys.exit(0)
    def __init__(
        self,
        host=None,
        port=None,
        user=None,
        password=None,
        dbs=None,
        schema=None,
        distribute=None,
        cluster=None,
        tables=None,
        tables_prefixes=None,
        tables_where_clauses=None,
        column_skip=[],
    ):
        super().__init__(host=host,
                         port=port,
                         user=user,
                         password=password,
                         dbs=dbs,
                         schema=schema,
                         distribute=distribute,
                         cluster=cluster,
                         tables=tables,
                         tables_prefixes=tables_prefixes,
                         column_skip=column_skip)
        self.client.cursorclass = SSDictCursor

        # parse tables where clauses
        if not tables_where_clauses:
            return

        # tables_where_clauses contains:
        # [
        #   'db1.t1=where_filename_1',
        #   'db2.t2=where_filename_2'
        # ]

        # debug info
        logging.info("tables_where_clauses={}".format(tables_where_clauses))
        for table_where in tables_where_clauses:
            logging.info("table_where={}".format(table_where))

        # process WHERE migration clauses
        for table_where_clause in tables_where_clauses:
            # table_where_clause contains 'db1.t1=where_filename_1'
            full_table_name, equals, where_file_name = table_where_clause.partition(
                '=')

            # sanity check
            if not full_table_name or not equals or not where_file_name:
                continue
            if not TableProcessor.is_full_table_name(full_table_name):
                continue

            # prepare the following data structure:
            # {
            #   'db1': {
            #       'table1': "a = 1 and b = 2"
            #       'table2': "c = 1 and d = 2"
            #   },
            #   'db2': {
            #       'table1': "e = 2 and f = 3"
            #       'table2': "g = 1 and h = 2"
            #   }
            # }
            db, table = TableProcessor.parse_full_table_name(full_table_name)
            if not db in self.where_clauses:
                self.where_clauses[db] = {}

            if os.path.isfile(where_file_name):
                self.wheres[db][table] = open(where_file_name,
                                              'r').read().strip("\n")
            else:
                self.wheres[db][table] = where_file_name

        # debug info
        logging.info("migration where clauses")
        for db, tables in self.where_clauses.items():
            for table, where in tables.items():
                logging.info("{}.{}.where={}".format(db, table, where))
示例#4
0
    def __init__(
        self,
        connection_settings,
        server_id,
        log_file=None,
        log_pos=None,
        schemas=None,
        tables=None,
        tables_prefixes=None,
        blocking=None,
        resume_stream=None,
        nice_pause=None,
        binlog_position_file=None,
        callbacks={},
    ):
        super().__init__(callbacks=callbacks)

        self.connection_settings = connection_settings
        self.server_id = server_id
        self.log_file = log_file
        self.log_pos = log_pos
        self.schemas = None if not TableProcessor.extract_dbs(
            schemas, Util.join_lists(
                tables, tables_prefixes)) else TableProcessor.extract_dbs(
                    schemas, Util.join_lists(tables, tables_prefixes))
        self.tables = None if tables is None else TableProcessor.extract_tables(
            tables)
        self.tables_prefixes = None if tables_prefixes is None else TableProcessor.extract_tables(
            tables_prefixes)
        self.blocking = blocking
        self.resume_stream = resume_stream
        self.nice_pause = nice_pause
        self.binlog_position_file = binlog_position_file
        self.cache_pool = CachePool()
        self.cache_pool.reader = self

        logging.info("raw dbs list. len()=%d",
                     0 if schemas is None else len(schemas))
        if schemas is not None:
            for schema in schemas:
                logging.info(schema)
        logging.info("normalised dbs list. len()=%d",
                     0 if self.schemas is None else len(self.schemas))
        if self.schemas is not None:
            for schema in self.schemas:
                logging.info(schema)

        logging.info("raw tables list. len()=%d",
                     0 if tables is None else len(tables))
        if tables is not None:
            for table in tables:
                logging.info(table)
        logging.info("normalised tables list. len()=%d",
                     0 if self.tables is None else len(self.tables))
        if self.tables is not None:
            for table in self.tables:
                logging.info(table)

        logging.info("raw tables-prefixes list. len()=%d",
                     0 if tables_prefixes is None else len(tables_prefixes))
        if tables_prefixes is not None:
            for table in tables_prefixes:
                logging.info(table)
        logging.info(
            "normalised tables-prefixes list. len()=%d",
            0 if self.tables_prefixes is None else len(self.tables_prefixes))
        if self.tables_prefixes is not None:
            for table in self.tables_prefixes:
                logging.info(table)

        if not isinstance(self.server_id, int):
            raise Exception(
                "Please specify server_id of src server as int. Ex.: --src-server-id=1"
            )

        self.binlog_stream = BinLogStreamReader(
            # MySQL server - data source
            connection_settings=self.connection_settings,
            server_id=self.server_id,
            # we are interested in reading CH-repeatable events only
            only_events=[
                # Possible events
                #BeginLoadQueryEvent,
                DeleteRowsEvent,
                #ExecuteLoadQueryEvent,
                #FormatDescriptionEvent,
                #GtidEvent,
                #HeartbeatLogEvent,
                #IntvarEvent
                #NotImplementedEvent,
                #QueryEvent,
                #RotateEvent,
                #StopEvent,
                #TableMapEvent,
                UpdateRowsEvent,
                WriteRowsEvent,
                #XidEvent,
            ],
            only_schemas=self.schemas,
            # in case we have any prefixes - this means we need to listen to all tables within specified schemas
            only_tables=self.tables if not self.tables_prefixes else None,
            log_file=self.log_file,
            log_pos=self.log_pos,
            freeze_schema=
            True,  # If true do not support ALTER TABLE. It's faster.
            blocking=False,
            resume_stream=self.resume_stream,
        )
        logging.debug("mysql connection settings:{}".format(
            self.connection_settings))