def setUp(self): self._columns = { 'f__o_o': ColumnDefinition('f__o_o', type_name='text'), 'bar': ColumnDefinition('bar', type_name='int'), 'baz': ColumnDefinition('baz', type_name='text[]'), 'quux': ColumnDefinition('quux', type_name='int') } self._fdw = ESForeignDataWrapper( { 'doc_type': 'foo_doc', 'index': 'our_index' }, self._columns) self._quals = [Qual('f__o_o', '=', 'value'), Qual('bar', '>', 5)]
def import_schema(self, schema, srv_options, options, restriction_type, restricts): tables = [] for provider in Faker().providers: columns = [] schema_name, provider_name = self._destructure_class_name(provider) for field in filter(lambda x: not x.startswith('_'), dir(provider)): if field.startswith('random') or field.endswith('ify') or field == 'generator': continue columns.append(ColumnDefinition( column_name=field, type_name="varchar")) tables.append(TableDefinition( table_name=provider_name, schema=schema, columns=columns, options=options)) return tables
def import_schema(self, schema, srv_options, options, restriction_type, restricts): log_to_postgres( "IMPORT %s FROM srv %s OPTIONS %s RESTRICTION: %s %s" % (schema, srv_options, options, restriction_type, restricts)) tables = set([ unicode_("imported_table_1"), unicode_("imported_table_2"), unicode_("imported_table_3") ]) if restriction_type == 'limit': tables = tables.intersection(set(restricts)) elif restriction_type == 'except': tables = tables - set(restricts) rv = [] for tname in sorted(list(tables)): table = TableDefinition(tname) nb_col = options.get('nb_col', 3) for col in range(nb_col): table.columns.append( ColumnDefinition("col%s" % col, type_name="text", options={"option1": "value1"})) rv.append(table) return rv
def _get_table_definition(response, fdw_options, table_name, table_options): # Allow overriding introspection options with per-table params (e.g. encoding, delimiter...) fdw_options = copy(fdw_options) fdw_options.update(table_options) csv_options, reader = make_csv_reader( response, CSVOptions.from_fdw_options(fdw_options)) sample = list(islice(reader, csv_options.schema_inference_rows)) if not csv_options.header: sample = [[""] * len(sample[0])] + sample sg_schema = infer_sg_schema(sample, None, None) # For nonexistent column names: replace with autogenerated ones (can't have empty column names) sg_schema = generate_column_names(sg_schema) # Merge the autodetected table options with the ones passed to us originally (e.g. # S3 object etc) new_table_options = copy(table_options) new_table_options.update(csv_options.to_table_options()) # Build Multicorn TableDefinition. ColumnDefinition takes in type OIDs, # typmods and other internal PG stuff but other FDWs seem to get by with just # the textual type name. return TableDefinition( table_name=table_name, schema=None, columns=[ ColumnDefinition(column_name=c.name, type_name=c.pg_type) for c in sg_schema ], options=new_table_options, )
def _import_table(cls, db, table, options): columns = [] sql = "SELECT name, type FROM system.columns where database='%s' and table='%s'" % (db.db_name, table) for row in db.select(sql): try: columns.append(ColumnDefinition(row.name, type_name=_convert_column_type(row.type))) except KeyError: cls._warn('Unsupported column type %s in table %s was skipped' % (row.type, table)) merged_options = dict(options, table_name=table) return TableDefinition(table, columns=columns, options=merged_options)
def import_schema(self, schema, srv_options, options, restriction_type, restricts): Bag = import_bag(srv_options) pcid_str = options.pop('pcid', srv_options.pop('pcid', 0)) pcid = int(pcid_str) patch_column = options.pop('patch_column', srv_options.pop('patch_column', 'points')) patch_columns = options.pop('patch_columns', '*').strip() patch_columns = [ col.strip() for col in patch_columns.split(',') if col.strip() ] filename = srv_options.pop('rosbag_path', "") + options.pop( 'rosbag_path', "") + schema bag = Bag(filename, 'r') tablecols = [] topics = bag.get_type_and_topic_info().topics pcid_for_topic = {k: pcid + 1 + i for i, k in enumerate(topics.keys())} pointcloud_formats = True if restriction_type is 'limit': topics = {k: v for k, v in topics.items() if k in restricts} pointcloud_formats = 'pointcloud_formats' in restricts elif restriction_type is 'except': topics = {k: v for k, v in topics.items() if k not in restricts} pointcloud_formats = 'pointcloud_formats' not in restricts tabledefs = [] if pointcloud_formats: tablecols = [ ColumnDefinition('pcid', type_name='integer'), ColumnDefinition('srid', type_name='integer'), ColumnDefinition('schema', type_name='text'), ColumnDefinition('format', type_name='text'), ColumnDefinition('rostype', type_name='text'), ColumnDefinition('columns', type_name='text[]'), ColumnDefinition('ply_header', type_name='text'), ] tableopts = { 'metadata': 'true', 'rosbag': schema, 'pcid': pcid_str } tabledefs.append( TableDefinition("pointcloud_formats", columns=tablecols, options=tableopts)) for topic, infos in topics.items(): pcid = pcid_for_topic[topic] columns, _, _, _, _, _ = get_columns(bag, topic, infos, pcid, patch_column, patch_columns) tablecols = [get_column_def(k, *v) for k, v in columns.items()] tableopts = {'topic': topic, 'rosbag': schema, 'pcid': str(pcid)} tabledefs.append( TableDefinition(topic, columns=tablecols, options=tableopts)) return tabledefs
def _format_table(properties, table): """ Utility method. Given a dict and a TableDefinition object iterates through the dict and populate the table using the dict keys :param dict properties: the dict holding the keys used to build the table :param TableDefinition table: the table to build """ for key in properties.keys(): table.columns.append( ColumnDefinition(column_name=key, type_name='character varying' ))
def test_execute_column_name_translation(self, scan_mock, _elasticsearch_mock): columns = { 'object__nested_field': ColumnDefinition('object__nested_field', type_name='text') } fdw = ESForeignDataWrapper( { 'doc_type': 'foo_doc', 'index': 'our_index', 'column_name_translation': 'true' }, columns) quals = [Qual('object__nested_field', '=', 'value')] scan_mock.return_value = [{ 'fields': { 'object.nested-field': ['value'] } }] rows = list(fdw.execute(quals, ['object__nested_field'])) expected_query = { 'fields': ['object.nested-field'], 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'object.nested-field': 'value' } }] } } } } } scan_mock.assert_called_once_with(fdw.esclient, query=expected_query, index='our_index', doc_type='foo_doc', size=fdw._SCROLL_SIZE, scroll=fdw._SCROLL_LENGTH) expected_rows = [{'object__nested_field': 'value'}] self.assertEqual(rows, expected_rows)
def get_column_def(col, typ, array, typmod, fmt): py2sql = { 'int8': 'int2', 'uint8': 'int2', 'int16': 'int2', 'uint16': 'int4', 'int32': 'int4', 'uint32': 'int8', 'float32': 'float4', 'float64': 'float8', 'int64': 'int8', 'uint64': 'int8', 'bool': 'bool', 'string': 'text', } if typ in py2sql: typ = py2sql[typ] typ += array if typmod: typ = "{}({})".format(typ, typmod) return ColumnDefinition(col, type_name=typ)
def import_schema(schema, srv_options, options, restriction_type, restricts): if ISDEBUG: logger.log( u"import schema {0} requiested with options {1}; restriction type: {2}; restrictions: {3}".format(schema, options, restriction_type, restricts)) if "hosts" not in srv_options: logger.log("The hosts parameter is needed, setting to localhost.", WARNING) hosts = srv_options.get("hosts", "localhost").split(",") if "port" not in srv_options: logger.log("The port parameter is needed, setting to 9042.", WARNING) port = srv_options.get("port", "9042") username = srv_options.get("username", None) password = srv_options.get("password", None) with_row_id = options.get('with_row_id', 'True') == 'True' names_mapping = options.get('mapping', '').split(';') mapping_dict = {} mapping_dict_backward = {} for s in names_mapping: kp = s.split('=') if len(kp) != 2: continue key = kp[0].strip() value = kp[1].strip() mapping_dict[key] = value mapping_dict_backward[value] = key cluster = Cluster(hosts) if (username is not None): cluster.auth_provider = PlainTextAuthProvider(username=username, password=password) # Cassandra connection init session = cluster.connect() keyspace = cluster.metadata.keyspaces[schema] cassandra_tables = [] tables = keyspace.tables views = keyspace.views if restriction_type is None: for t in tables: if t in tables: cassandra_tables.append(tables[t]) else: cassandra_tables.append(views[t]) elif restriction_type == 'limit': for r in restricts: t_name = r if t_name in mapping_dict_backward: t_name = mapping_dict_backward[t_name] if t_name in tables: cassandra_tables.append(tables[t_name]) else: cassandra_tables.append(views[t_name]) elif restriction_type == 'except': for t in tables: if t not in restricts: if t in tables: cassandra_tables.append(tables[t]) else: cassandra_tables.append(views[t]) pg_tables = [] for c_table in cassandra_tables: if ISDEBUG: logger.log("Importing table {0}...".format(c_table.name)) pg_table_name = c_table.name if pg_table_name in mapping_dict: if ISDEBUG: logger.log("Cassandra table name '{0}' maps to PostgreSQL table name '{1}'".format(pg_table_name, mapping_dict[ pg_table_name])) pg_table_name = mapping_dict[pg_table_name] pg_table = TableDefinition(pg_table_name) pg_table.options['keyspace'] = schema pg_table.options['columnfamily'] = c_table.name for c_column_name in c_table.columns: cql_type = c_table.columns[c_column_name].cql_type pg_type = types_mapper.get_pg_type(cql_type) if ISDEBUG: logger.log("Adding column {0} with PostgreSQL type {2} (CQL type {1})".format(c_column_name, cql_type, pg_type)) pg_table.columns.append(ColumnDefinition(c_column_name, type_name=pg_type)) if with_row_id: pg_table.columns.append(ColumnDefinition('__rowid__', type_name='text')) pg_tables.append(pg_table) if ISDEBUG: logger.log("Table imported: {0}".format(c_table.name)) session.shutdown() return pg_tables
def import_schema(self, schema, srv_options, options, restriction_type, restricts): """ Called on an IMPORT FOREIGN SCHEMA command. Populates a PostgreSQL schema using the json output of the barman diagnose command. This method iterates through the results of the diagnose command, creates a foreign table for every server using the field of the backup json object as columns of the table. Additionally creates 2 tables: 'server_config' and 'server_status' 'server_config' contains the name, description and json configuration of every barman server stored in a jsonb column. 'server_status' contains the status of every server, using as columns the field of the status json object. Every table is created using the 'table_name' server option. The table_name is used during the execution of SELECT statements to retrieve the correct table for the execution of the query. """ # Executes the barman diagnose command through ssh connection errors, result = self._execute_barman_cmd("barman diagnose", srv_options['barman_user'], srv_options['barman_host']) # if an error is present, return. if errors: return # Load the result using json result = json.loads(result) servers = result['servers'] tables = [] # Iterates through the available servers for server, values in servers.items(): # Encodes the name of the server replacing - and . characters # with _ server = server.replace('-', '_').replace('.', '_') log_to_postgres('schema %s table %s' % (schema, server), DEBUG) table = TableDefinition(table_name=server) table.options['schema'] = schema table.options['table_name'] = server for backup, properties in values['backups'].items(): # Creates a table for every server. uses the fields of the # keys of the json object as columns. # set the encoded server name as table name self._format_table(properties, table) # Add the table to the list of the tables. log_to_postgres('schema %s table %s' % (schema, server)) break tables.append(table) # Create the server_config table table_config = TableDefinition(table_name='server_config') table_config.options['schema'] = schema # Set the table name table_config.options['table_name'] = 'server_config' # Create the columns table_config.columns.append( ColumnDefinition( column_name='server', type_name='character varying' )) table_config.columns.append( ColumnDefinition( column_name='description', type_name='character varying' )) table_config.columns.append( ColumnDefinition( column_name='config', type_name='jsonb' )) # Add the table to the list of the tables. tables.append(table_config) # Create the server_status table table_status = TableDefinition(table_name='server_status') table_status.options['schema'] = schema table_status.options['table_name'] = 'server_status' # Iterates through the fields of the status json object, # and use them to create the columns of the table for server, values in servers.items(): self._format_table(values['status'], table_status) break # Adds a column containing the server name. table_status.columns.append( ColumnDefinition( column_name='server', type_name='character varying' )) # Add the table to the list of the tables. tables.append(table_status) return tables
def import_schema(cls, schema, srv_options, options, restriction_type, restricts): return [ TableDefinition('{}_things'.format(schema), schema=schema, options={'table_type': 'thing'}, columns=[ ColumnDefinition('thing_name', type_name='text'), ColumnDefinition('thing_type_name', type_name='text'), ColumnDefinition('thing_arn', type_name='text'), ColumnDefinition('thing_version', type_name='integer'), ColumnDefinition('thing_groups', type_name='jsonb'), ColumnDefinition('thing_attributes', type_name='jsonb'), ColumnDefinition('thing_shadow_data', type_name='jsonb') ]), TableDefinition('{}_thing_types'.format(schema), schema=schema, options={'table_type': 'thing-type'}, columns=[ ColumnDefinition('thing_type_name', type_name='text'), ColumnDefinition('thing_type_arn', type_name='text'), ColumnDefinition('thing_type_properties', type_name='jsonb'), ColumnDefinition('thing_type_metadata', type_name='jsonb') ]), TableDefinition('{}_thing_groups'.format(schema), schema=schema, options={'table_type': 'thing-group'}, columns=[ ColumnDefinition('thing_group_name', type_name='text'), ColumnDefinition('thing_group_arn', type_name='text') ]) ]
def import_schema_bigquery_fdw(self, schema, srv_options, options, restriction_type, restricts=None): """ Pulls in the remote schema. """ if restriction_type == 'limit': only = lambda t: t in restricts elif restriction_type == 'except': only = lambda t: t not in restricts else: only = None client = self.getClient() query = f''' SELECT table_schema, table_name, column_name, data_type FROM `{schema}.INFORMATION_SCHEMA.COLUMNS` ORDER BY ordinal_position; ''' schemas = set() client.runQuery(query, (), self.dialect) tables = defaultdict(list) for row in client.readResult(): if only and not only(row.table_name): # doesn't match required fields continue schemas.add(row.table_schema) tables[row.table_schema, row.table_name].append( (row.column_name, row.data_type)) to_insert = [] for (_schema, table), columns in tables.items(): if _schema.lower() != schema.lower(): # wrong schema, we'll skip continue # Let's make sure the table is sane-ish with respect to # column names and counts. try: if not self._check_table(table, columns): # for "skip" in fdw_colcount and "skip_table" in fdw_colnames continue except FDWImportError: # for "error" cases in fdw_colnames and fdw_colcount return [] # for non-error, trim, and trim_columns ftable = TableDefinition(table) ftable.options['schema'] = schema ftable.options['tablename'] = table for col, typ in columns: typ = DEFAULT_MAPPINGS.get(typ, "TEXT") ftable.columns.append(ColumnDefinition(col, type_name=typ)) to_insert.append(ftable) if self.verbose: log_to_postgres( "fdw importing table `" + schema + "." + table + "`", WARNING) return to_insert
def import_schema(self, schema, srv_options, options, restriction_type, restricts): """Support for IMPORT FOREIGN SCHEMA""" ftable = TableDefinition(options['species'], schema=None, columns=[ ColumnDefinition("chrom", type_name="text"), ColumnDefinition("pos", type_name="integer"), ColumnDefinition("id", type_name="text"), ColumnDefinition("ref", type_name="text"), ColumnDefinition("alt", type_name="text[]"), ColumnDefinition("qual", type_name="real"), ColumnDefinition("heterozygosity", type_name="real"), ColumnDefinition("sample", type_name="text"), ColumnDefinition("species", type_name="text"), ColumnDefinition("info", type_name="text"), ColumnDefinition("depth", type_name="integer"), ColumnDefinition("genotype", type_name="text"), ColumnDefinition("filter", type_name="text"), ColumnDefinition("issnp", type_name="boolean"), ColumnDefinition("issv", type_name="boolean"), ColumnDefinition("isindel", type_name="boolean"), ColumnDefinition("ismonomorphic", type_name="boolean"), ColumnDefinition("isdeletion", type_name="boolean"), ColumnDefinition("issvprecise", type_name="boolean"), ColumnDefinition("istransition", type_name="boolean"), ColumnDefinition("source", type_name="text") ]) ftable.options = options ftable.options['basedir'] = schema return [ftable]