Пример #1
0
def generate():
    args = read_args()
    set_logging(args)
    logging.info(str(args))
    if args.dialect in ('pg', 'pgsql', 'postgres'):
        args.dialect = 'postgresql'
    if args.dialect not in dialect_names:
        raise NotImplementedError('First arg must be one of: %s' %
                                  ", ".join(dialect_names))
    for datafile in args.datafile:
        table = Table(datafile,
                      varying_length_text=args.text,
                      uniques=args.uniques,
                      pk_name=args.key,
                      force_pk=args.force_key,
                      reorder=args.reorder,
                      save_metadata_to=args.save_metadata_to,
                      metadata_source=args.use_metadata_from,
                      loglevel=args.log)
        print(
            table.sql(dialect=args.dialect,
                      inserts=args.inserts,
                      creates=(not args.no_creates),
                      drops=args.drops,
                      metadata_source=args.use_metadata_from))
Пример #2
0
 def test_pydata_named_tuples(self):
     tbl = Table(self.canada)
     generated = tbl.sql('postgresql', inserts=True).strip()
     self.assertIn('capital VARCHAR(11) NOT NULL,', generated)
     self.assertIn(
         '(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)',
         generated)
Пример #3
0
def generate():
    args = read_args()
    set_logging(args)
    logging.info(str(args))
    if args.dialect in ("pg", "pgsql", "postgres"):
        args.dialect = "postgresql"
    if args.dialect not in dialect_names:
        raise NotImplementedError("First arg must be one of: %s" % ", ".join(dialect_names))
    for datafile in args.datafile:
        table = Table(
            datafile,
            varying_length_text=args.text,
            uniques=args.uniques,
            pk_name=args.key,
            force_pk=args.force_key,
            reorder=args.reorder,
            save_metadata_to=args.save_metadata_to,
            metadata_source=args.use_metadata_from,
            loglevel=args.log,
        )
        print(
            table.sql(
                dialect=args.dialect,
                inserts=args.inserts,
                creates=(not args.no_creates),
                drops=args.drops,
                metadata_source=args.use_metadata_from,
            )
        )
Пример #4
0
 def test_cushion(self):
     tbl = Table(self.merovingians, data_size_cushion=0)
     generated = tbl.sql('postgresql').strip()        
     self.assertIn('VARCHAR(12)', generated)        
     tbl = Table(self.merovingians, data_size_cushion=1)
     generated = tbl.sql('postgresql').strip()        
     self.assertIn('VARCHAR(14)', generated)
Пример #5
0
 def test_sqlalchemy(self):
     tbl = Table(self.merovingians)
     generated = tbl.sqlalchemy()
     self.assertIn("Column('reign_from'", generated)
     self.assertIn("Integer()", generated)
     tbl = Table(self.canada)
     generated = tbl.sqlalchemy()
     self.assertIn("Column('capital', Unicode", generated)
Пример #6
0
 def test_pydata_named_tuples(self):
     prov_type = namedtuple('province', ['name', 'capital', 'pop'])
     canada = [prov_type('Quebec', 'Quebec City', '7903001'),
               prov_type('Ontario', 'Toronto', '12851821'), ]
     tbl = Table(canada)
     generated = tbl.sql('postgresql', inserts=True).strip()
     self.assertIn('capital VARCHAR(11) NOT NULL,', generated)
     self.assertIn('(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)', generated)
Пример #7
0
 def test_pydata_named_tuples(self):
     prov_type = namedtuple('province', ['name', 'capital', 'pop'])
     canada = [prov_type('Quebec', 'Quebec City', '7903001'),
               prov_type('Ontario', 'Toronto', '12851821'), ]
     tbl = Table(canada)
     generated = tbl.sql('postgresql', inserts=True).strip()
     self.assertIn('capital VARCHAR(11) NOT NULL,', generated)
     self.assertIn('(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)', generated)
Пример #8
0
 def test_django(self):
     tbl = Table(self.merovingians)
     generated = tbl.django_models()
     #print("generated")
     #print(generated)
     #self.assertIn("(models.Model):", generated)
     #self.assertIn("name_name_id =", generated)
     tbl = Table(self.canada)
     generated = tbl.django_models()
Пример #9
0
 def test_files(self):
     for sql_fname in glob.glob(here('*.sql')):
         with open(sql_fname) as infile:
             expected = infile.read().strip()
         (fname, ext) = os.path.splitext(sql_fname)
         for source_fname in glob.glob(here('%s.*' % fname)):
             (fname, ext) = os.path.splitext(source_fname)
             if ext != '.sql':
                 tbl = Table(source_fname, uniques=True)
                 generated = tbl.sql('postgresql', inserts=True, drops=True).strip()
                 self.assertEqual(generated, expected)
Пример #10
0
 def test_nested(self):
     merovingians = [
                     OrderedDict([('name', {'name_id': 1, 'name_txt': 'Clovis I'}), 
                                  ('reign', {'from': 486, 'to': 511}),
                                  ]),
                     OrderedDict([('name', {'name_id': 1, 'name_txt': 'Childebert I'}), 
                                  ('reign', {'from': 511, 'to': 558}),
                                  ]),
                     ]
     tbl = Table(merovingians)
     generated = tbl.sql('postgresql', inserts=True).strip()
Пример #11
0
 def test_files(self):
     for sql_fname in glob.glob('*.sql'):
         (fname, ext) = sql_fname.split('.')
         with open(sql_fname) as infile:
             expected = infile.read().strip()
         for source_fname in glob.glob('%s.*' % fname):
             (fname, ext) = source_fname.split('.')
             if ext != 'sql':
                 tbl = Table(source_fname, uniques=True)
                 generated = tbl.sql('postgresql', inserts=True, drops=True).strip()
                 self.assertEqual(generated, expected)
Пример #12
0
 def test_nested(self):
     merovingians = [
                     OrderedDict([('name', {'name_id': 1, 'name_txt': 'Clovis I'}), 
                                  ('reign', {'from': 486, 'to': 511}),
                                  ]),
                     OrderedDict([('name', {'name_id': 1, 'name_txt': 'Childebert I'}), 
                                  ('reign', {'from': 511, 'to': 558}),
                                  ]),
                     ]
     tbl = Table(merovingians)
     generated = tbl.sql('postgresql', inserts=True).strip()
Пример #13
0
def parquet_to_sql_ddl(
    file_path: str, table_name: str, sql_dialect: str, file_output: str
):
    f = io.StringIO()

    df = pd.read_parquet(file_path)
    data = df.to_dict("records")

    table = Table(data=data, table_name=table_name, uniques=True)
    sql = table.sql(sql_dialect, drops=False, metadata_source=True)

    with open(file=file_output, mode="w") as output:
        output.write(sql)
Пример #14
0
def generate():
    args = read_args()
    set_logging(args)
    logging.info(str(args))
    if args.dialect in ('pg', 'pgsql', 'postgres'):
        args.dialect = 'postgresql'
    if args.dialect not in dialect_names:
        raise NotImplementedError('First arg must be one of: %s' % ", ".join(dialect_names))
    for datafile in args.datafile:
        #import ipdb; ipdb.set_trace()
        table = Table(datafile, varying_length_text=args.text, uniques=args.uniques,
                      pk_name = args.key, force_pk=args.force_key, reorder=args.reorder,
                      save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from,
                      loglevel=args.log, limit=args.limit)
        print(table.sql(dialect=args.dialect, inserts=args.inserts,
                        creates=(not args.no_creates), drops=args.drops,
                        metadata_source=args.use_metadata_from))
Пример #15
0
def generate_one(tbl, args, table_name=None, file=None):
    """
    Prints code (SQL, SQLAlchemy, etc.) to define a table.
    """
    table = Table(tbl,
                  table_name=table_name,
                  varying_length_text=args.text,
                  uniques=args.uniques,
                  pk_name=args.key,
                  force_pk=args.force_key,
                  reorder=args.reorder,
                  data_size_cushion=args.cushion,
                  save_metadata_to=args.save_metadata_to,
                  metadata_source=args.use_metadata_from,
                  loglevel=args.log,
                  limit=args.limit)
    if args.dialect.startswith('sqla'):
        if not args.no_creates:
            print(table.sqlalchemy(), file=file)
        if args.inserts:
            print("\n".join(table.inserts(dialect=args.dialect)), file=file)
    elif args.dialect.startswith('dj'):
        table.django_models()
    else:
        print(table.sql(dialect=args.dialect,
                        inserts=args.inserts,
                        creates=(not args.no_creates),
                        drops=args.drops,
                        metadata_source=args.use_metadata_from),
              file=file)
    return table
Пример #16
0
 def test_cushion(self):
     tbl = Table(self.merovingians, data_size_cushion=0)
     generated = tbl.sql('postgresql').strip()
     self.assertIn('VARCHAR(12)', generated)
     tbl = Table(self.merovingians, data_size_cushion=1)
     generated = tbl.sql('postgresql').strip()
     self.assertIn('VARCHAR(14)', generated)
Пример #17
0
def generate():
    args = read_args()
    set_logging(args)
    logging.info(str(args))
    if args.dialect in ('pg', 'pgsql', 'postgres'):
        args.dialect = 'postgresql'
    if args.dialect not in dialect_names:
        raise NotImplementedError('First arg must be one of: %s' % ", ".join(dialect_names))
    for datafile in args.datafile:
        table = Table(datafile, varying_length_text=args.text, uniques=args.uniques,
                      pk_name = args.key, force_pk=args.force_key, reorder=args.reorder,
                      save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from,
                      loglevel=args.log, limit=args.limit)
        if args.dialect.startswith('sqla'):
            print(table.sqlalchemy())
            if args.inserts:
                print("\n".join(table.inserts(dialect=args.dialect)))
                #inserter.compile().bindtemplate
        else:
            print(table.sql(dialect=args.dialect, inserts=args.inserts,
                            creates=(not args.no_creates), drops=args.drops,
                            metadata_source=args.use_metadata_from))
Пример #18
0
 def test_sqlalchemy(self):
     tbl = Table(self.merovingians)
     generated = tbl.sqlalchemy()
     self.assertIn("Column('reign_from'", generated)
     self.assertIn("Integer()", generated)
     tbl = Table(self.canada)
     generated = tbl.sqlalchemy()
     self.assertIn("Column('capital', Unicode", generated)
Пример #19
0
 def test_django(self):
     tbl = Table(self.merovingians)
     generated = tbl.django_models()
     #print("generated")
     #print(generated)
     #self.assertIn("(models.Model):", generated)
     #self.assertIn("name_name_id =", generated)
     tbl = Table(self.canada)
     generated = tbl.django_models()
Пример #20
0
def generate_one(datafile, args, table_name=None):
    table = Table(datafile, table_name=table_name, varying_length_text=args.text, uniques=args.uniques,
                  pk_name = args.key, force_pk=args.force_key, reorder=args.reorder, data_size_cushion=args.cushion,
                  save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from,
                  loglevel=args.log, limit=args.limit)
    if args.dialect.startswith('sqla'):
        print(table.sqlalchemy())
        if args.inserts:
            print("\n".join(table.inserts(dialect=args.dialect)))
            #inserter.compile().bindtemplate
    elif args.dialect.startswith('dj'):
        table.django_models()
    else:
        print(table.sql(dialect=args.dialect, inserts=args.inserts,
                        creates=(not args.no_creates), drops=args.drops,
                        metadata_source=args.use_metadata_from))
Пример #21
0
def generate_one(tbl, args, table_name=None, file=None):
    """
    Prints code (SQL, SQLAlchemy, etc.) to define a table.
    """
    table = Table(tbl, table_name=table_name, varying_length_text=args.text, uniques=args.uniques,
                  pk_name = args.key, force_pk=args.force_key, reorder=args.reorder, data_size_cushion=args.cushion,
                  save_metadata_to=args.save_metadata_to, metadata_source=args.use_metadata_from,
                  loglevel=args.log, limit=args.limit)
    if args.dialect.startswith('sqla'):
        if not args.no_creates:
            print(table.sqlalchemy(), file=file)
        if args.inserts:
            print("\n".join(table.inserts(dialect=args.dialect)), file=file)
    elif args.dialect.startswith('dj'):
        table.django_models()
    else:
        print(table.sql(dialect=args.dialect, inserts=args.inserts,
                        creates=(not args.no_creates), drops=args.drops,
                        metadata_source=args.use_metadata_from), file=file)
    return table
Пример #22
0
        (block, email) = parse_regex(block, email_patt)
        (block, url) = parse_regex(block, url_patt)
        (block, phone) = parse_regex(block, phone_patt)
        (block, address) = parse_regex(block, address_patt)
        (block, state) = parse_regex(block, state_patt)
        if state:
            address = "%s\n%s" % (address, state)
        name = block.splitlines()[0]
        description = "\n".join(block.splitlines()[1:])

        tup = LinkTuple(
            category=current_heading,
            subcategory=current_subheading,
            link_text=name,
            link_target=url,
            description=description,
            phone=phone,
            address=address,
            email=email,
            source=file_name,
        )
        yield tup


if __name__ == "__main__":
    tbl = Table(nyc_data(), table_name="nyc", force_pk=True, varying_length_text=True)
    print(tbl.sql(inserts=True, drops=True, dialect="postgresql").encode("utf8"))
    tbl = Table(ga_data(), table_name="ga", force_pk=True, varying_length_text=True)
    print(tbl.sql(inserts=True, drops=True, dialect="postgresql").encode("utf8"))
Пример #23
0
        nlines = len(lines)
        if nlines == 1:
            if block.startswith('- '):
                current_heading = block.strip('-').strip().title() 
                current_subheading = ''
            else:
                current_subheading = block.strip().title()
            continue
        
        (block, email) = parse_regex(block, email_patt)
        (block, url) = parse_regex(block, url_patt)
        (block, phone) = parse_regex(block, phone_patt)
        (block, address) = parse_regex(block, address_patt)
        (block, state) = parse_regex(block, state_patt)
        if state:
            address = '%s\n%s' % (address, state)
        name = block.splitlines()[0]
        description = '\n'.join(block.splitlines()[1:])
        
        tup = LinkTuple(category=current_heading, subcategory=current_subheading,
                        link_text=name, link_target=url,
                        description=description, phone=phone, address=address, email=email,
                        source=file_name)
        yield tup
        
if __name__ == '__main__':
    tbl = Table(nyc_data(), table_name='nyc', force_pk=True, varying_length_text=True)
    print(tbl.sql(inserts=True,drops=True,dialect='postgresql').encode('utf8'))
    tbl = Table(ga_data(), table_name='ga', force_pk=True, varying_length_text=True)
    print(tbl.sql(inserts=True,drops=True,dialect='postgresql').encode('utf8'))
       
Пример #24
0
 def test_nested(self):
     tbl = Table(self.merovingians)
     generated = tbl.sql('postgresql', inserts=True).strip()
     self.assertIn("reign_to", generated)
Пример #25
0
 def test_pydata_named_tuples(self):
     tbl = Table(self.canada)
     generated = tbl.sql('postgresql', inserts=True).strip()
     self.assertIn('capital VARCHAR(11) NOT NULL,', generated)
     self.assertIn('(name, capital, pop) VALUES (\'Quebec\', \'Quebec City\', 7903001)', generated)
Пример #26
0
 def testData(self):
     winners = Table(self.tbl, pk_name='year')
     generated = winners.sql('postgresql', inserts=True)
     self.assertIn('REFERENCES prize_winners (year)', generated)
Пример #27
0
# delete empty file
def deleteemptyfile(filename):
    if os.path.isfile(filename) and os.stat(filename).st_size == 0:
        os.remove(filename)

# read in either full file or random 
if args.maxrows is None:
    ls = readfullfile(args.inputFile, args.delim, args.quotechar)
else:
    ls = randomsampler(args.inputFile, int(args.maxrows), args.delim, args.quotechar)

# generate sql using ddlgenerator
#if args.primarykey is None:
#    table = Table(ls, table_name = args.tablename) 
#else:
#    table = Table(ls, table_name = args.tablename, pk_name = args.primarykey)
table = Table(ls, table_name = args.tablename, table_owner = args.tableowner) 
sql = table.sql(args.dialect, inserts = args.addinserts)

# write or print out the sql file
if args.outputfile is None:
    print(sql)
else:
    f = open(args.outputfile, "w+")
    f.write(sql)
    f.close()

# if log file is empty then can delete
deleteemptyfile(args.logfile)
Пример #28
0
 def test_use_open_file(self):
     with open(here('knights.yaml')) as infile:
         knights = Table(infile)
         generated = knights.sql('postgresql', inserts=True)
         self.assertIn('Lancelot', generated)
Пример #29
0
 def test_unique_columns(self):
     test = [{'col1':'four', 'col2':'four'}]
     tbl = Table(test, unique_varchar_lengths=True)
     generated = tbl.sql('postgresql').strip()
     self.assertEqual(generated.count('VARCHAR(4)'), 1)
Пример #30
0
from howardbrown import data
from ddlgenerator.ddlgenerator import Table

tbl = Table(data(), force_pk=True, varying_length_text=True)
print(tbl.sql(inserts=True,drops=True,dialect='postgresql').encode('utf8'))
Пример #31
0
 def test_nested(self):
     tbl = Table(self.merovingians)
     generated = tbl.sql('postgresql', inserts=True).strip()
     self.assertIn("reign_to", generated)
Пример #32
0
 def testData(self):
     winners = Table(self.tbl, pk_name='year')
     generated = winners.sql('postgresql', inserts=True)
     self.assertIn('REFERENCES prize_winners (year)', generated)
Пример #33
0
 def test_use_open_file(self):
     with open(here('knights.yaml')) as infile:
         knights = Table(infile)
         generated = knights.sql('postgresql', inserts=True)
         self.assertIn('Lancelot', generated)
Пример #34
0
from scraping.howardbrown import data
from ddlgenerator.ddlgenerator import Table

tbl = Table(data(), force_pk=True, varying_length_text=True)
print(tbl.sql(inserts=True, drops=True, dialect='postgresql').encode('utf8'))