def generate(): args = read_args() set_logging(args) logging.info(str(args)) if args.dialect in ('pg', 'pgsql', 'postgres'): args.dialect = 'postgresql' if args.dialect not in dialect_names: raise NotImplementedError('First arg must be one of: %s' % ", ".join(dialect_names)) for datafile in args.datafile: table = Table(datafile, varying_length_text=args.text, uniques=args.uniques, pk_name=args.key, force_pk=args.force_key, reorder=args.reorder, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log) print( table.sql(dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from))
def test_pydata_named_tuples(self): tbl = Table(self.canada) generated = tbl.sql('postgresql', inserts=True).strip() self.assertIn('capital VARCHAR(11) NOT NULL,', generated) self.assertIn( '(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)', generated)
def generate(): args = read_args() set_logging(args) logging.info(str(args)) if args.dialect in ("pg", "pgsql", "postgres"): args.dialect = "postgresql" if args.dialect not in dialect_names: raise NotImplementedError("First arg must be one of: %s" % ", ".join(dialect_names)) for datafile in args.datafile: table = Table( datafile, varying_length_text=args.text, uniques=args.uniques, pk_name=args.key, force_pk=args.force_key, reorder=args.reorder, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log, ) print( table.sql( dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from, ) )
def test_cushion(self): tbl = Table(self.merovingians, data_size_cushion=0) generated = tbl.sql('postgresql').strip() self.assertIn('VARCHAR(12)', generated) tbl = Table(self.merovingians, data_size_cushion=1) generated = tbl.sql('postgresql').strip() self.assertIn('VARCHAR(14)', generated)
def test_sqlalchemy(self): tbl = Table(self.merovingians) generated = tbl.sqlalchemy() self.assertIn("Column('reign_from'", generated) self.assertIn("Integer()", generated) tbl = Table(self.canada) generated = tbl.sqlalchemy() self.assertIn("Column('capital', Unicode", generated)
def test_pydata_named_tuples(self): prov_type = namedtuple('province', ['name', 'capital', 'pop']) canada = [prov_type('Quebec', 'Quebec City', '7903001'), prov_type('Ontario', 'Toronto', '12851821'), ] tbl = Table(canada) generated = tbl.sql('postgresql', inserts=True).strip() self.assertIn('capital VARCHAR(11) NOT NULL,', generated) self.assertIn('(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)', generated)
def test_django(self): tbl = Table(self.merovingians) generated = tbl.django_models() #print("generated") #print(generated) #self.assertIn("(models.Model):", generated) #self.assertIn("name_name_id =", generated) tbl = Table(self.canada) generated = tbl.django_models()
def test_files(self): for sql_fname in glob.glob(here('*.sql')): with open(sql_fname) as infile: expected = infile.read().strip() (fname, ext) = os.path.splitext(sql_fname) for source_fname in glob.glob(here('%s.*' % fname)): (fname, ext) = os.path.splitext(source_fname) if ext != '.sql': tbl = Table(source_fname, uniques=True) generated = tbl.sql('postgresql', inserts=True, drops=True).strip() self.assertEqual(generated, expected)
def test_nested(self): merovingians = [ OrderedDict([('name', {'name_id': 1, 'name_txt': 'Clovis I'}), ('reign', {'from': 486, 'to': 511}), ]), OrderedDict([('name', {'name_id': 1, 'name_txt': 'Childebert I'}), ('reign', {'from': 511, 'to': 558}), ]), ] tbl = Table(merovingians) generated = tbl.sql('postgresql', inserts=True).strip()
def test_files(self): for sql_fname in glob.glob('*.sql'): (fname, ext) = sql_fname.split('.') with open(sql_fname) as infile: expected = infile.read().strip() for source_fname in glob.glob('%s.*' % fname): (fname, ext) = source_fname.split('.') if ext != 'sql': tbl = Table(source_fname, uniques=True) generated = tbl.sql('postgresql', inserts=True, drops=True).strip() self.assertEqual(generated, expected)
def parquet_to_sql_ddl( file_path: str, table_name: str, sql_dialect: str, file_output: str ): f = io.StringIO() df = pd.read_parquet(file_path) data = df.to_dict("records") table = Table(data=data, table_name=table_name, uniques=True) sql = table.sql(sql_dialect, drops=False, metadata_source=True) with open(file=file_output, mode="w") as output: output.write(sql)
def generate(): args = read_args() set_logging(args) logging.info(str(args)) if args.dialect in ('pg', 'pgsql', 'postgres'): args.dialect = 'postgresql' if args.dialect not in dialect_names: raise NotImplementedError('First arg must be one of: %s' % ", ".join(dialect_names)) for datafile in args.datafile: #import ipdb; ipdb.set_trace() table = Table(datafile, varying_length_text=args.text, uniques=args.uniques, pk_name = args.key, force_pk=args.force_key, reorder=args.reorder, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log, limit=args.limit) print(table.sql(dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from))
def generate_one(tbl, args, table_name=None, file=None): """ Prints code (SQL, SQLAlchemy, etc.) to define a table. """ table = Table(tbl, table_name=table_name, varying_length_text=args.text, uniques=args.uniques, pk_name=args.key, force_pk=args.force_key, reorder=args.reorder, data_size_cushion=args.cushion, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log, limit=args.limit) if args.dialect.startswith('sqla'): if not args.no_creates: print(table.sqlalchemy(), file=file) if args.inserts: print("\n".join(table.inserts(dialect=args.dialect)), file=file) elif args.dialect.startswith('dj'): table.django_models() else: print(table.sql(dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from), file=file) return table
def generate(): args = read_args() set_logging(args) logging.info(str(args)) if args.dialect in ('pg', 'pgsql', 'postgres'): args.dialect = 'postgresql' if args.dialect not in dialect_names: raise NotImplementedError('First arg must be one of: %s' % ", ".join(dialect_names)) for datafile in args.datafile: table = Table(datafile, varying_length_text=args.text, uniques=args.uniques, pk_name = args.key, force_pk=args.force_key, reorder=args.reorder, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log, limit=args.limit) if args.dialect.startswith('sqla'): print(table.sqlalchemy()) if args.inserts: print("\n".join(table.inserts(dialect=args.dialect))) #inserter.compile().bindtemplate else: print(table.sql(dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from))
def generate_one(datafile, args, table_name=None): table = Table(datafile, table_name=table_name, varying_length_text=args.text, uniques=args.uniques, pk_name = args.key, force_pk=args.force_key, reorder=args.reorder, data_size_cushion=args.cushion, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log, limit=args.limit) if args.dialect.startswith('sqla'): print(table.sqlalchemy()) if args.inserts: print("\n".join(table.inserts(dialect=args.dialect))) #inserter.compile().bindtemplate elif args.dialect.startswith('dj'): table.django_models() else: print(table.sql(dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from))
def generate_one(tbl, args, table_name=None, file=None): """ Prints code (SQL, SQLAlchemy, etc.) to define a table. """ table = Table(tbl, table_name=table_name, varying_length_text=args.text, uniques=args.uniques, pk_name = args.key, force_pk=args.force_key, reorder=args.reorder, data_size_cushion=args.cushion, save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from, loglevel=args.log, limit=args.limit) if args.dialect.startswith('sqla'): if not args.no_creates: print(table.sqlalchemy(), file=file) if args.inserts: print("\n".join(table.inserts(dialect=args.dialect)), file=file) elif args.dialect.startswith('dj'): table.django_models() else: print(table.sql(dialect=args.dialect, inserts=args.inserts, creates=(not args.no_creates), drops=args.drops, metadata_source=args.use_metadata_from), file=file) return table
(block, email) = parse_regex(block, email_patt) (block, url) = parse_regex(block, url_patt) (block, phone) = parse_regex(block, phone_patt) (block, address) = parse_regex(block, address_patt) (block, state) = parse_regex(block, state_patt) if state: address = "%s\n%s" % (address, state) name = block.splitlines()[0] description = "\n".join(block.splitlines()[1:]) tup = LinkTuple( category=current_heading, subcategory=current_subheading, link_text=name, link_target=url, description=description, phone=phone, address=address, email=email, source=file_name, ) yield tup if __name__ == "__main__": tbl = Table(nyc_data(), table_name="nyc", force_pk=True, varying_length_text=True) print(tbl.sql(inserts=True, drops=True, dialect="postgresql").encode("utf8")) tbl = Table(ga_data(), table_name="ga", force_pk=True, varying_length_text=True) print(tbl.sql(inserts=True, drops=True, dialect="postgresql").encode("utf8"))
nlines = len(lines) if nlines == 1: if block.startswith('- '): current_heading = block.strip('-').strip().title() current_subheading = '' else: current_subheading = block.strip().title() continue (block, email) = parse_regex(block, email_patt) (block, url) = parse_regex(block, url_patt) (block, phone) = parse_regex(block, phone_patt) (block, address) = parse_regex(block, address_patt) (block, state) = parse_regex(block, state_patt) if state: address = '%s\n%s' % (address, state) name = block.splitlines()[0] description = '\n'.join(block.splitlines()[1:]) tup = LinkTuple(category=current_heading, subcategory=current_subheading, link_text=name, link_target=url, description=description, phone=phone, address=address, email=email, source=file_name) yield tup if __name__ == '__main__': tbl = Table(nyc_data(), table_name='nyc', force_pk=True, varying_length_text=True) print(tbl.sql(inserts=True,drops=True,dialect='postgresql').encode('utf8')) tbl = Table(ga_data(), table_name='ga', force_pk=True, varying_length_text=True) print(tbl.sql(inserts=True,drops=True,dialect='postgresql').encode('utf8'))
def test_nested(self): tbl = Table(self.merovingians) generated = tbl.sql('postgresql', inserts=True).strip() self.assertIn("reign_to", generated)
def test_pydata_named_tuples(self): tbl = Table(self.canada) generated = tbl.sql('postgresql', inserts=True).strip() self.assertIn('capital VARCHAR(11) NOT NULL,', generated) self.assertIn('(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)', generated)
def testData(self): winners = Table(self.tbl, pk_name='year') generated = winners.sql('postgresql', inserts=True) self.assertIn('REFERENCES prize_winners (year)', generated)
# delete empty file def deleteemptyfile(filename): if os.path.isfile(filename) and os.stat(filename).st_size == 0: os.remove(filename) # read in either full file or random if args.maxrows is None: ls = readfullfile(args.inputFile, args.delim, args.quotechar) else: ls = randomsampler(args.inputFile, int(args.maxrows), args.delim, args.quotechar) # generate sql using ddlgenerator #if args.primarykey is None: # table = Table(ls, table_name = args.tablename) #else: # table = Table(ls, table_name = args.tablename, pk_name = args.primarykey) table = Table(ls, table_name = args.tablename, table_owner = args.tableowner) sql = table.sql(args.dialect, inserts = args.addinserts) # write or print out the sql file if args.outputfile is None: print(sql) else: f = open(args.outputfile, "w+") f.write(sql) f.close() # if log file is empty then can delete deleteemptyfile(args.logfile)
def test_use_open_file(self): with open(here('knights.yaml')) as infile: knights = Table(infile) generated = knights.sql('postgresql', inserts=True) self.assertIn('Lancelot', generated)
def test_unique_columns(self): test = [{'col1':'four', 'col2':'four'}] tbl = Table(test, unique_varchar_lengths=True) generated = tbl.sql('postgresql').strip() self.assertEqual(generated.count('VARCHAR(4)'), 1)
from howardbrown import data from ddlgenerator.ddlgenerator import Table tbl = Table(data(), force_pk=True, varying_length_text=True) print(tbl.sql(inserts=True,drops=True,dialect='postgresql').encode('utf8'))
from scraping.howardbrown import data from ddlgenerator.ddlgenerator import Table tbl = Table(data(), force_pk=True, varying_length_text=True) print(tbl.sql(inserts=True, drops=True, dialect='postgresql').encode('utf8'))