def _find_table_name(self, data): if not self.table_name: if pymongo and isinstance(data, pymongo.collection.Collection): self.table_name = data.name elif hasattr(data, 'lower'): # duck-type string test if os.path.isfile(data): (file_path, file_extension) = os.path.splitext(data) self.table_name = os.path.split(file_path)[1].lower() self.table_name = self.table_name or \ 'generated_table%s' % Table.table_index self.table_name = reshape.clean_key_name(self.table_name) Table.table_index += 1
def _find_table_name(self, data): if not self.table_name: if pymongo and isinstance(data, pymongo.collection.Collection): self.table_name = data.name elif hasattr(data, 'lower'): # duck-type string test if os.path.isfile(data): (file_path, file_extension) = os.path.splitext(data) self.table_name = os.path.split(file_path)[1] self.table_name = self.table_name or \ 'generated_table%s' % Table.table_index self.table_name = reshape.clean_key_name(self.table_name) Table.table_index += 1
def __init__(self, data, table_name=None, default_dialect=None, save_metadata_to=None, metadata_source=None, varying_length_text = False, uniques=False, pk_name=None, force_pk=False, _parent_table=None, _fk_field_name=None, reorder=False, loglevel=logging.WARN): """ Initialize a Table and load its data. If ``varying_length_text`` is ``True``, text columns will be TEXT rather than VARCHAR. This *improves* performance in PostgreSQL. If a ``metadata<timestamp>`` YAML file generated from a previous ddlgenerator run is provided, *only* ``INSERT`` statements will be produced, and the table structure determined during the previous run will be assumed. """ logging.getLogger().setLevel(loglevel) self.table_name = None self.varying_length_text = varying_length_text self._load_data(data) if hasattr(self.data, 'lower'): raise SyntaxError("Data was interpreted as a single string - no table structure:\n%s" % self.data[:100]) self.table_name = table_name or self.table_name or 'generated_table%s' % Table.table_index Table.table_index += 1 self.table_name = reshape.clean_key_name(self.table_name) if not hasattr(self.data, 'append') and not hasattr(self.data, '__next__') \ and not hasattr(self.data, 'next'): self.data = [self.data,] self.data = reshape.walk_and_clean(self.data) (self.data, self.pk_name, children, child_fk_names) = reshape.unnest_children( data=self.data, parent_name=self.table_name, pk_name=pk_name, force_pk=force_pk) self.default_dialect = default_dialect self.comments = {} child_metadata_sources = {} if metadata_source: if isinstance(metadata_source, OrderedDict): logging.info('Column metadata passed in as OrderedDict') self.columns = metadata_source else: logging.info('Pulling column metadata from file %s' % metadata_source) with open(metadata_source) as infile: self.columns = yaml.load(infile.read()) for (col_name, col) in self.columns.items(): if isinstance(col, OrderedDict): child_metadata_sources[col_name] = col self.columns.pop(col_name) else: self._fill_metadata_from_sample(col) else: self._determine_types(varying_length_text, uniques=uniques) if reorder: ordered_columns = OrderedDict() if pk_name and pk_name in self.columns: ordered_columns[pk_name] = self.columns.pop(pk_name) for (c, v) in sorted(self.columns.items()): ordered_columns[c] = v self.columns = ordered_columns if _parent_table: fk = sa.ForeignKey('%s.%s' % (_parent_table.table_name, _parent_table.pk_name)) else: fk = None column_args = [] self.table = sa.Table(self.table_name, metadata, *[sa.Column(cname, col['satype'], fk if fk and (_fk_field_name == cname) else None, primary_key=(cname == self.pk_name), unique=col['is_unique'], nullable=col['is_nullable'], doc=self.comments.get(cname)) for (cname, col) in self.columns.items() if True ]) self.children = {child_name: Table(child_data, table_name=child_name, default_dialect=self.default_dialect, varying_length_text = varying_length_text, uniques=uniques, pk_name=pk_name, force_pk=force_pk, _parent_table=self, reorder=reorder, _fk_field_name = child_fk_names[child_name], metadata_source = child_metadata_sources.get(child_name), loglevel=loglevel) for (child_name, child_data) in children.items()} if save_metadata_to: if not save_metadata_to.endswith(('.yml','yaml')): save_metadata_to += '.yaml' with open(save_metadata_to, 'w') as outfile: outfile.write(yaml.dump(self._saveable_metadata())) logging.info('Pass ``--save-metadata-to %s`` next time to re-use structure' % save_metadata_to)
def __init__(self, data, table_name=None, default_dialect=None, save_metadata_to=None, metadata_source=None, varying_length_text=False, uniques=False, pk_name=None, force_pk=False, _parent_table=None, _fk_field_name=None, reorder=False, loglevel=logging.WARN): """ Initialize a Table and load its data. If ``varying_length_text`` is ``True``, text columns will be TEXT rather than VARCHAR. This *improves* performance in PostgreSQL. If a ``metadata<timestamp>`` YAML file generated from a previous ddlgenerator run is provided, *only* ``INSERT`` statements will be produced, and the table structure determined during the previous run will be assumed. """ logging.getLogger().setLevel(loglevel) self.table_name = None self.varying_length_text = varying_length_text self._load_data(data) if hasattr(self.data, 'lower'): raise SyntaxError( "Data was interpreted as a single string - no table structure:\n%s" % self.data[:100]) self.table_name = table_name or self.table_name or 'generated_table%s' % Table.table_index Table.table_index += 1 self.table_name = reshape.clean_key_name(self.table_name) if not hasattr(self.data, 'append') and not hasattr(self.data, '__next__') \ and not hasattr(self.data, 'next'): self.data = [ self.data, ] self.data = reshape.walk_and_clean(self.data) (self.data, self.pk_name, children, child_fk_names) = reshape.unnest_children(data=self.data, parent_name=self.table_name, pk_name=pk_name, force_pk=force_pk) self.default_dialect = default_dialect self.comments = {} child_metadata_sources = {} if metadata_source: if isinstance(metadata_source, OrderedDict): logging.info('Column metadata passed in as OrderedDict') self.columns = metadata_source else: logging.info('Pulling column metadata from file %s' % metadata_source) with open(metadata_source) as infile: self.columns = yaml.load(infile.read()) for (col_name, col) in self.columns.items(): if isinstance(col, OrderedDict): child_metadata_sources[col_name] = col self.columns.pop(col_name) else: self._fill_metadata_from_sample(col) else: self._determine_types(varying_length_text, uniques=uniques) if reorder: ordered_columns = OrderedDict() if pk_name and pk_name in self.columns: ordered_columns[pk_name] = self.columns.pop(pk_name) for (c, v) in sorted(self.columns.items()): ordered_columns[c] = v self.columns = ordered_columns if _parent_table: fk = sa.ForeignKey( '%s.%s' % (_parent_table.table_name, _parent_table.pk_name)) else: fk = None column_args = [] self.table = sa.Table( self.table_name, metadata, *[ sa.Column(cname, col['satype'], fk if fk and (_fk_field_name == cname) else None, primary_key=(cname == self.pk_name), unique=col['is_unique'], nullable=col['is_nullable'], doc=self.comments.get(cname)) for (cname, col) in self.columns.items() if True ]) self.children = { child_name: Table(child_data, table_name=child_name, default_dialect=self.default_dialect, varying_length_text=varying_length_text, uniques=uniques, pk_name=pk_name, force_pk=force_pk, _parent_table=self, reorder=reorder, _fk_field_name=child_fk_names[child_name], metadata_source=child_metadata_sources.get(child_name), loglevel=loglevel) for (child_name, child_data) in children.items() } if save_metadata_to: if not save_metadata_to.endswith(('.yml', 'yaml')): save_metadata_to += '.yaml' with open(save_metadata_to, 'w') as outfile: outfile.write(yaml.dump(self._saveable_metadata())) logging.info( 'Pass ``--save-metadata-to %s`` next time to re-use structure' % save_metadata_to)