示例#1
0
    def run(self) -> List[SchemaContent]:
        db_list = self.inspection.get_schema_names()
        update_tables = self.get_update_tables(db_list)

        schemas = []

        for db, tb in update_tables:
            schema = SchemaContent(name=tb, database=db, comment=self.get_table_comment(tb, db), type=self.db_type)
            columns = self.inspection.get_columns(table_name=self._warp(tb), schema=self._warp(db))
            self.set_primary_info(schema, columns, db, tb)

            fields = []
            for x in columns:
                field = SchemaField(name=x['name'], type=self.convert_flink_type(x['type']),
                                    nullable=x['nullable'], autoincrement=x.get('autoincrement'))
                if field.type is None:
                    logger.error(
                        "Not Add Column {} in {}.{} current not support : {}".format(field.name, schema.database,
                                                                                     schema.name, str(x['type'])))
                else:
                    fields.append(field)

            schema.fields.extend(fields)

            schemas.append(schema)
        return schemas
示例#2
0
    def run(self) -> List[SchemaContent]:
        assert 'elasticsearch' == self.db_type
        from elasticsearch import Elasticsearch
        es = Elasticsearch(hosts=self.connection_url)
        schemas = []
        for index in es.indices.get_alias('*'):
            if index not in self.need_tables:
                continue

            schema = SchemaContent(name=index, type=self.db_type)
            mappings = es.indices.get_mapping(index)

            fields = []
            for k, v in mappings[index]['mappings']['properties'].items():
                field = SchemaField(name=k, type=self._es2flink(v['type']),
                                    nullable=True)
                if field.type is None:
                    logger.error(
                        "Not Add Column {} in {}.{} current not support : {}".format(field.name, schema.database,
                                                                                     schema.name, str(v['type'])))
                else:
                    fields.append(field)

            fields.sort(key=lambda x: x.name)

            schema.fields.extend(fields)

            print(schema)
            schemas.append(schema)

        return schemas
示例#3
0
 def to_schema_content(self) -> SchemaContent:
     origin_dict = self.as_dict()
     use = ['name', 'database', 'comment', 'partitionable']
     origin = {k: v for k, v in origin_dict.items() if k in use}
     origin['fields'] = [SchemaField(
         **x) for x in load_yaml(self.fields)] if self.fields else []
     origin['type'] = self.connection.type.code
     return SchemaContent(**origin)
示例#4
0
 def rowtime_field(self) -> Optional[SchemaField]:
     if self.get_config('rowtime_enable', typ=bool):
         row_field = {
             "timestamps": {
                 "type": "from-field",
                 "from": self.get_config('rowtime_from', typ=str)
             },
             "watermarks": {
                 "type": "periodic-bounded",
                 "delay": self.get_config("rowtime_watermarks", typ=int)
             }
         }
         return SchemaField(name=self.get_config('rowtime_name', typ=str),
                            type=BlinkSQLType.TIMESTAMP,
                            rowtime=row_field)
示例#5
0
    def generate_table_cache(self, config: VersionConfig,
                             connection: Connection, connection_type: str,
                             resource_name: ResourceName, schema: SchemaEvent,
                             template: ResourceTemplate, template_type: str,
                             version: 'ResourceVersion') -> dict:
        res = dict()
        if config.update_mode:
            res['update-mode'] = config.update_mode
        if config.format:
            res['format'] = config.format
        elif connection_type not in FlinkConnectorType.schema_less():
            if resource_name.save_format == FlinkSaveFormat.json:
                res['format'] = {"type": 'json'}
            elif resource_name.save_format == FlinkSaveFormat.csv:
                res['format'] = {"type": 'csv'}
            else:
                msg = "Not Support Save format: {} in {}".format(
                    resource_name.save_format, resource_name.full_name)
                raise NotImplementedError(msg)

        connector = self.generate_table_connector(connection, connection_type,
                                                  resource_name, schema,
                                                  template, version)
        res['connector'] = connector if connector else None
        fields = [SchemaField(**x)
                  for x in load_yaml(schema.fields)] if schema.fields else []
        need_fields = [
            x for x in fields
            if x.name in NameFilter(config.include, config.exclude)
        ]
        field_names = [x.name for x in need_fields]
        schemas = []
        if config.schema:
            for x in config.schema:
                assert x[
                    'name'] not in field_names, "{} contain in origin field"
            schemas.extend(config.schema)
        for x in need_fields:
            if self.is_filter_field(x, connection_type, template_type,
                                    resource_name):
                continue
            schemas.append(self.field2schema(x))
        res['schema'] = schemas
        return res
示例#6
0
    def _generate_update_fields(self, schema_event: SchemaEvent,
                                config: VersionConfig) -> List[SchemaField]:
        cnt = self._connector
        fields = []
        config.exclude = '.*'
        schema_fields = [
            SchemaField(**x) for x in load_yaml(schema_event.fields)
        ] if schema_event else []

        for schema in schema_fields:
            for suffix, tp in zip([
                    cnt.before_column_suffix, cnt.after_column_suffix,
                    cnt.update_suffix
            ], [schema.type, schema.type, BlinkSQLType.BOOLEAN]):
                n_schema = deepcopy(schema)
                n_schema.type = tp
                n_schema.name = n_schema.name + suffix
                fields.append(n_schema)
        return fields
示例#7
0
 def run(self) -> List[SchemaContent]:
     assert 'hbase' == self.db_type
     import happybase
     host, port = self.connection_url.split(':')
     connection = happybase.Connection(host, int(port), autoconnect=True)
     schemas = []
     for x in connection.tables():
         tab = x.decode()
         table = connection.table(tab)
         schema = SchemaContent(name=tab, type=self.db_type)
         fields = []
         for fm in table.families():
             fields.append(
                 SchemaField(name=fm.decode(),
                             type=BlinkHiveSQLType.BYTES,
                             nullable=True))
         schema.fields.extend(fields)
         schemas.append(schema)
     return schemas
示例#8
0
 def db_execute_time_field(self) -> Optional[SchemaField]:
     if not self.get_config('rowtime_enable', typ=bool):
         return SchemaField(name=self.get_config('rowtime_from', typ=str),
                            type=BlinkSQLType.TIMESTAMP)
示例#9
0
 def binlog_type_name_field(self) -> SchemaField:
     return SchemaField(name=self.get_config('binlog_type_name', typ=str),
                        type=BlinkSQLType.INTEGER)
示例#10
0
 def process_time_field(self) -> Optional[SchemaField]:
     if self.get_config('process_time_enable', typ=bool):
         return SchemaField(name=self.get_config('process_time_name',
                                                 typ=str),
                            type=BlinkSQLType.TIMESTAMP,
                            proctime=True)