Пример #1
0
    def __init__(self, data, table_name=None, default_dialect=None, 
                 save_metadata_to=None, metadata_source=None,
                 varying_length_text = False, uniques=False, 
                 pk_name=None, force_pk=False,
                 _parent_table=None, _fk_field_name=None, reorder=False,
                 loglevel=logging.WARN):
        """
        Initialize a Table and load its data.
        
        If ``varying_length_text`` is ``True``, text columns will be TEXT rather than VARCHAR.
        This *improves* performance in PostgreSQL.
        
        If a ``metadata<timestamp>`` YAML file generated from a previous ddlgenerator run is
        provided, *only* ``INSERT`` statements will be produced, and the table structure 
        determined during the previous run will be assumed.
        """
        logging.getLogger().setLevel(loglevel) 
        self.table_name = None
        self.varying_length_text = varying_length_text
        self._load_data(data)
        if hasattr(self.data, 'lower'):
            raise SyntaxError("Data was interpreted as a single string - no table structure:\n%s" 
                              % self.data[:100])
        self.table_name = table_name or self.table_name or 'generated_table%s' % Table.table_index
        Table.table_index += 1
        self.table_name = reshape.clean_key_name(self.table_name)
        
        if not hasattr(self.data, 'append') and not hasattr(self.data, '__next__') \
            and not hasattr(self.data, 'next'):
            self.data = [self.data,]
        self.data = reshape.walk_and_clean(self.data)
        
        (self.data, self.pk_name, children, child_fk_names) = reshape.unnest_children(
            data=self.data, parent_name=self.table_name, pk_name=pk_name, force_pk=force_pk)
      
        self.default_dialect = default_dialect
        self.comments = {}
        child_metadata_sources = {}
        if metadata_source:
            if isinstance(metadata_source, OrderedDict):
                logging.info('Column metadata passed in as OrderedDict')
                self.columns = metadata_source
            else:
                logging.info('Pulling column metadata from file %s' % metadata_source)
                with open(metadata_source) as infile:
                    self.columns = yaml.load(infile.read())
            for (col_name, col) in self.columns.items():
                if isinstance(col, OrderedDict):
                    child_metadata_sources[col_name] = col
                    self.columns.pop(col_name)
                else:
                    self._fill_metadata_from_sample(col)
        else:
            self._determine_types(varying_length_text, uniques=uniques)

        if reorder:
            ordered_columns = OrderedDict()
            if pk_name and pk_name in self.columns:
                ordered_columns[pk_name] = self.columns.pop(pk_name)
            for (c, v) in sorted(self.columns.items()):
                ordered_columns[c] = v
            self.columns = ordered_columns    
            
        if _parent_table:
            fk = sa.ForeignKey('%s.%s' % (_parent_table.table_name, _parent_table.pk_name))
        else:
            fk = None
            
        column_args = []
        self.table = sa.Table(self.table_name, metadata, 
                              *[sa.Column(cname, col['satype'], 
                                          fk if fk and (_fk_field_name == cname) else None,
                                          primary_key=(cname == self.pk_name),
                                          unique=col['is_unique'],
                                          nullable=col['is_nullable'],
                                          doc=self.comments.get(cname)) 
                                for (cname, col) in self.columns.items()
                                if True
                                ])
      
        self.children = {child_name: Table(child_data, table_name=child_name, 
                                           default_dialect=self.default_dialect, 
                                           varying_length_text = varying_length_text, 
                                           uniques=uniques, pk_name=pk_name, force_pk=force_pk,
                                           _parent_table=self, reorder=reorder,
                                           _fk_field_name = child_fk_names[child_name],
                                           metadata_source = child_metadata_sources.get(child_name),
                                           loglevel=loglevel)
                         for (child_name, child_data) in children.items()}
       
        if save_metadata_to:
            if not save_metadata_to.endswith(('.yml','yaml')):
                save_metadata_to += '.yaml'
            with open(save_metadata_to, 'w') as outfile:
                outfile.write(yaml.dump(self._saveable_metadata()))
            logging.info('Pass ``--save-metadata-to %s`` next time to re-use structure' %
                         save_metadata_to)
Пример #2
0
    def __init__(self, data, table_name=None, default_dialect=None,
                 save_metadata_to=None, metadata_source=None,
                 varying_length_text=False, uniques=False,
                 pk_name=None, force_pk=False, data_size_cushion=0,
                 _parent_table=None, _fk_field_name=None, reorder=False,
                 loglevel=logging.WARN, limit=None):
        """
        Initialize a Table and load its data.

        If ``varying_length_text`` is ``True``,
        text columns will be TEXT rather than VARCHAR.
        This *improves* performance in PostgreSQL.

        If a ``metadata<timestamp>`` YAML file generated
        from a previous ddlgenerator run is
        provided, *only* ``INSERT`` statements will be produced,
        and the table structure
        determined during the previous run will be assumed.
        """
        logging.getLogger().setLevel(loglevel)
        self.varying_length_text = varying_length_text
        self.table_name = table_name
        self.data_size_cushion = data_size_cushion
        self._find_table_name(data)
        # Send anything but Python data objects to
        # data_dispenser.sources.Source
        if isinstance(data, Source):
            self.data = data
        elif hasattr(data, 'lower') or hasattr(data, 'read'):
            self.data = Source(data, limit=limit)
        else:
            try:
                self.data = iter(data)
            except TypeError:
                self.data = Source(data)
            
        if (    self.table_name.startswith('generated_table')
            and hasattr(self.data, 'table_name')):
            self.table_name = self.data.table_name
        self.table_name = self.table_name.lower()

        if hasattr(self.data.generator, 'sqla_columns'):
            children = {}
            self.pk_name = next(col.name for col in self.data.generator.sqla_columns if col.primary_key)
        else:
            self.data = reshape.walk_and_clean(self.data)
            (self.data, self.pk_name, children, child_fk_names
                ) = reshape.unnest_children(data=self.data,
                                            parent_name=self.table_name,
                                            pk_name=pk_name,
                                            force_pk=force_pk)

        self.default_dialect = default_dialect
        self.comments = {}
        child_metadata_sources = {}
        if metadata_source:
            if isinstance(metadata_source, OrderedDict):
                logging.info('Column metadata passed in as OrderedDict')
                self.columns = metadata_source
            else:
                logging.info('Pulling column metadata from file %s'
                             % metadata_source)
                with open(metadata_source) as infile:
                    self.columns = yaml.load(infile.read())
            for (col_name, col) in self.columns.items():
                if isinstance(col, OrderedDict):
                    child_metadata_sources[col_name] = col
                    self.columns.pop(col_name)
                else:
                    self._fill_metadata_from_sample(col)
        else:
            self._determine_types()

        if reorder:
            ordered_columns = OrderedDict()
            if pk_name and pk_name in self.columns:
                ordered_columns[pk_name] = self.columns.pop(pk_name)
            for (c, v) in sorted(self.columns.items()):
                ordered_columns[c] = v
            self.columns = ordered_columns

        if _parent_table:
            fk = sa.ForeignKey('%s.%s' % (_parent_table.table_name,
                                          _parent_table.pk_name))
        else:
            fk = None

        self.table = sa.Table(self.table_name, metadata,
                              *[sa.Column(cname, col['satype'],
                                          fk if fk and (_fk_field_name == cname)
                                             else None,
                                          primary_key=(cname == self.pk_name),
                                          unique=(uniques and col['is_unique']),
                                          nullable=col['is_nullable'],
                                          doc=self.comments.get(cname))
                                for (cname, col) in self.columns.items()
                                if True
                                ])
        
        self.children = {child_name: Table(child_data, table_name=child_name,
                                           default_dialect=self.default_dialect,
                                           varying_length_text=varying_length_text,
                                           uniques=uniques, pk_name=pk_name,
                                           force_pk=force_pk, data_size_cushion=data_size_cushion,
                                           _parent_table=self, reorder=reorder,
                                           _fk_field_name=child_fk_names[child_name],
                                           metadata_source=child_metadata_sources.get(child_name),
                                           loglevel=loglevel)
                         for (child_name, child_data) in children.items()}

        if save_metadata_to:
            if not save_metadata_to.endswith(('.yml', 'yaml')):
                save_metadata_to += '.yaml'
            with open(save_metadata_to, 'w') as outfile:
                outfile.write(yaml.dump(self._saveable_metadata()))
            logging.info('Pass ``--save-metadata-to %s`` next time to re-use structure' %
                         save_metadata_to)
Пример #3
0
    def __init__(self, data, table_name=None, default_dialect=None,
                 save_metadata_to=None, metadata_source=None,
                 varying_length_text=False, uniques=False,
                 pk_name=None, force_pk=False,
                 _parent_table=None, _fk_field_name=None, reorder=False,
                 loglevel=logging.WARN, limit=None):
        """
        Initialize a Table and load its data.

        If ``varying_length_text`` is ``True``,
        text columns will be TEXT rather than VARCHAR.
        This *improves* performance in PostgreSQL.

        If a ``metadata<timestamp>`` YAML file generated
        from a previous ddlgenerator run is
        provided, *only* ``INSERT`` statements will be produced,
        and the table structure
        determined during the previous run will be assumed.
        """
        logging.getLogger().setLevel(loglevel)
        self.varying_length_text = varying_length_text
        self.table_name = table_name
        self._find_table_name(data)
        # Send anything but Python data objects to
        # data_dispenser.sources.Source
        if hasattr(data, 'lower') or hasattr(data, 'read'):
            self.data = Source(data, limit=limit)
        else:
            try:
                self.data = iter(data)
            except TypeError:
                self.data = Source(data)

        self.data = reshape.walk_and_clean(self.data)

        (self.data, self.pk_name, children, child_fk_names
            ) = reshape.unnest_children(data=self.data,
                                        parent_name=self.table_name,
                                        pk_name=pk_name,
                                        force_pk=force_pk)

        self.default_dialect = default_dialect
        self.comments = {}
        child_metadata_sources = {}
        if metadata_source:
            if isinstance(metadata_source, OrderedDict):
                logging.info('Column metadata passed in as OrderedDict')
                self.columns = metadata_source
            else:
                logging.info('Pulling column metadata from file %s'
                             % metadata_source)
                with open(metadata_source) as infile:
                    self.columns = yaml.load(infile.read())
            for (col_name, col) in self.columns.items():
                if isinstance(col, OrderedDict):
                    child_metadata_sources[col_name] = col
                    self.columns.pop(col_name)
                else:
                    self._fill_metadata_from_sample(col)
        else:
            self._determine_types()

        if reorder:
            ordered_columns = OrderedDict()
            if pk_name and pk_name in self.columns:
                ordered_columns[pk_name] = self.columns.pop(pk_name)
            for (c, v) in sorted(self.columns.items()):
                ordered_columns[c] = v
            self.columns = ordered_columns

        if _parent_table:
            fk = sa.ForeignKey('%s.%s' % (_parent_table.table_name,
                                          _parent_table.pk_name))
        else:
            fk = None

        self.table = sa.Table(self.table_name, metadata,
                              *[sa.Column(cname, col['satype'],
                                          fk if fk and (_fk_field_name == cname)
                                             else None,
                                          primary_key=(cname == self.pk_name),
                                          unique=col['is_unique'],
                                          nullable=col['is_nullable'],
                                          doc=self.comments.get(cname))
                                for (cname, col) in self.columns.items()
                                if True
                                ])

        self.children = {child_name: Table(child_data, table_name=child_name,
                                           default_dialect=self.default_dialect,
                                           varying_length_text=varying_length_text,
                                           uniques=uniques, pk_name=pk_name,
                                           force_pk=force_pk,
                                           _parent_table=self, reorder=reorder,
                                           _fk_field_name=child_fk_names[child_name],
                                           metadata_source=child_metadata_sources.get(child_name),
                                           loglevel=loglevel)
                         for (child_name, child_data) in children.items()}

        if save_metadata_to:
            if not save_metadata_to.endswith(('.yml', 'yaml')):
                save_metadata_to += '.yaml'
            with open(save_metadata_to, 'w') as outfile:
                outfile.write(yaml.dump(self._saveable_metadata()))
            logging.info('Pass ``--save-metadata-to %s`` next time to re-use structure' %
                         save_metadata_to)