Пример #1
0
def add_sqlite_table(table_name,
                     sqlite_meta,
                     dbc_map,
                     ds,
                     refyear=max(pc.WORKING_PARTITIONS['ferc1']['years']),
                     bad_cols=()):
    """Adds a new Table to the FERC Form 1 database schema.

    Creates a new sa.Table object named ``table_name`` and add it to the
    database schema contained in ``sqlite_meta``. Use the information in the
    dictionary ``dbc_map`` to translate between the DBF filenames in the
    datastore (e.g. ``F1_31.DBF``), and the full name of the table in the
    FoxPro database (e.g. ``f1_fuel``) and also between truncated column
    names extracted from that DBF file, and the full column names extracted
    from the DBC file. Read the column datatypes out of each DBF file and use
    them to define the columns in the new Table object.

    Args:
        table_name (str): The name of the new table to be added to the
            database schema.
        sqlite_meta (:class:`sqlalchemy.schema.MetaData`): The database schema
            to which the newly defined :class:`sqlalchemy.Table` will be added.
        dbc_map (dict): A dictionary of dictionaries
        ds (:class:`Ferc1Datastore`): Initialized datastore
        bad_cols (iterable of 2-tuples): A list or other iterable containing
            pairs of strings of the form (table_name, column_name), indicating
            columns (and their parent tables) which should *not* be cloned
            into the SQLite database for some reason.

    Returns:
        None

    """
    # Create the new table object
    new_table = sa.Table(table_name, sqlite_meta)

    dbf_filename = DBF_TABLES_FILENAMES[table_name]
    filedata = ds.get_file(refyear, dbf_filename)

    ferc1_dbf = dbfread.DBF(dbf_filename,
                            ignore_missing_memofile=True,
                            filedata=filedata)

    # Add Columns to the table
    for field in ferc1_dbf.fields:
        if field.name == '_NullFlags':
            continue
        col_name = dbc_map[table_name][field.name]
        if (table_name, col_name) in bad_cols:
            continue
        col_type = DBF_TYPES[field.type]
        if col_type == sa.String:
            col_type = sa.String(length=field.length)
        new_table.append_column(sa.Column(col_name, col_type))

    col_names = [c.name for c in new_table.columns]

    if table_name == 'f1_respondent_id':
        new_table.append_constraint(
            sa.PrimaryKeyConstraint('respondent_id',
                                    sqlite_on_conflict='REPLACE'))

    if (('respondent_id' in col_names) and (table_name != 'f1_respondent_id')):
        new_table.append_constraint(
            sa.ForeignKeyConstraint(
                columns=[
                    'respondent_id',
                ],
                refcolumns=['f1_respondent_id.respondent_id']))
Пример #2
0
def get_dbc_map(ds, year, min_length=4):
    """
    Extract names of all tables and fields from a FERC Form 1 DBC file.

    Read the DBC file associated with the FERC Form 1 database for the given
    ``year``, and extract all printable strings longer than ``min_lengh``.
    Select those strings that appear to be database table names, and their
    associated field for use in re-naming the truncated column names extracted
    from the corresponding DBF files (those names are limited to having only 10
    characters in their names.)

    Args:
        ds (:class:`Ferc1Datastore`): Initialized datastore
        year (int): The year of data from which the database table and column
            names are to be extracted. Typically this is expected to be the
            most recently available year of FERC Form 1 data.
       min_length (int): The minimum number of consecutive printable
            characters that should be considered a meaningful string and
            extracted.

    Returns:
        dict: a dictionary whose keys are the long table names extracted
        from the DBC file, and whose values are lists of pairs of values,
        the first of which is the full name of each field in the table with
        the same name as the key, and the second of which is the truncated
        (<=10 character) long name of that field as found in the DBF file.

    """
    # Extract all the strings longer than "min" from the DBC file
    dbc = ds.get_file(year, "F1_PUB.DBC")
    tf_dict = get_fields(dbc)

    dbc_map = {}
    for table, dbf_filename in DBF_TABLES_FILENAMES.items():
        try:
            dbc = ds.get_file(year, dbf_filename)
        except KeyError:
            # Not all tables exist in all years, so this is acceptable
            dbc = None

        if dbc is None:
            continue

        dbf_fields = dbfread.DBF("",
                                 filedata=dbc,
                                 ignore_missing_memofile=True).field_names
        dbf_fields = [f for f in dbf_fields if f != '_NullFlags']
        dbc_map[table] = dict(zip(dbf_fields, tf_dict[table]))
        if len(tf_dict[table]) != len(dbf_fields):
            raise ValueError(
                f"Number of DBF fields in {table} does not match what was "
                f"found in the FERC Form 1 DBC index file for {year}.")

    # Insofar as we are able, make sure that the fields match each other
    for k in dbc_map:
        for sn, ln in zip(dbc_map[k].keys(), dbc_map[k].values()):
            if ln[:8] != sn.lower()[:8]:
                raise ValueError(
                    f"DBF field name mismatch: {ln[:8]} != {sn.lower()[:8]}")

    return dbc_map
Пример #3
0
def extract_dbc_tables(year, minstring=4):
    """Extract the names of all the tables and fields from FERC Form 1 DB.

    This function reads all the strings in the given DBC database file for the
    and picks out the ones that appear to be database table names, and their
    subsequent table field names, for use in re-naming the truncated columns
    extracted from the corresponding DBF files (which are limited to having
    only 10 characters in their names.) Strings must have at least min
    printable characters.

    Returns:

        dict: A dictionary whose keys are the long table names extracted from
            the DBC file, and whose values are lists of pairs of values, the
            first of which is the full name of each field in the table with the
            same name as the key, and the second of which is the truncated
            (<=10 character) long name of that field as found in the DBF file.

    TODO: This routine shouldn't refer to any particular year of data, but
    right now it depends on the ferc1_dbf2tbl dictionary, which was generated
    from the 2015 Form 1 database.
    """
    # Extract all the strings longer than "min" from the DBC file
    dbc_strs = list(get_strings(dbc_filename(year), min=minstring))

    # Get rid of leading & trailing whitespace in the strings:
    dbc_strs = [s.strip() for s in dbc_strs]

    # Get rid of all the empty strings:
    dbc_strs = [s for s in dbc_strs if s is not '']

    # Collapse all whitespace to a single space:
    dbc_strs = [re.sub('\s+', ' ', s) for s in dbc_strs]

    # Pull out only strings that begin with Table or Field
    dbc_strs = [s for s in dbc_strs if re.match('(^Table|^Field)', s)]

    # Split each string by whitespace, and retain only the first two elements.
    # This eliminates some weird dangling junk characters
    dbc_strs = [' '.join(s.split()[:2]) for s in dbc_strs]

    # Remove all of the leading Field keywords
    dbc_strs = [re.sub('Field ', '', s) for s in dbc_strs]

    # Join all the strings together (separated by spaces) and then split the
    # big string on Table, so each string is now a table name followed by the
    # associated field names, separated by spaces
    dbc_list = ' '.join(dbc_strs).split('Table ')

    # strip leading & trailing whitespace from the lists, and get rid of empty
    # strings:
    dbc_list = [s.strip() for s in dbc_list if s is not '']

    # Create a dictionary using the first element of these strings (the table
    # name) as the key, and the list of field names as the values, and return
    # it:
    tf_dict = {}
    for tbl in dbc_list:
        x = tbl.split()
        tf_dict[x[0]] = x[1:]

    tf_doubledict = {}
    for dbf in pc.ferc1_dbf2tbl.keys():
        filename = os.path.join(datadir(year), '{}.DBF'.format(dbf))
        if os.path.isfile(filename):
            dbf_fields = dbfread.DBF(filename).field_names
            dbf_fields = [f for f in dbf_fields if f != '_NullFlags']
            tf_doubledict[pc.ferc1_dbf2tbl[dbf]] = \
                {k: v for k, v in
                    zip(dbf_fields, tf_dict[pc.ferc1_dbf2tbl[dbf]])}
            assert(len(tf_dict[pc.ferc1_dbf2tbl[dbf]]) == len(dbf_fields))

    # Insofar as we are able, make sure that the fields match each other
    for k in tf_doubledict.keys():
        for sn, ln in zip(tf_doubledict[k].keys(), tf_doubledict[k].values()):
            assert(ln[:8] == sn.lower()[:8])

    return(tf_doubledict)
Пример #4
0
import pickle
from sklearn import linear_model

#Changing the directory
MyLoc = r'D:\Dropbox\Dropbox\PublicCode_Git\PatrolRedistrict\PatrolRedistrict\DataCreated'
os.chdir(MyLoc)
print(os.getcwd())

#Get the Reporting Areas table, include call weights and XY coordinates
ra_data = []
vars = [
    'PDGrid', 'NumberId', 'XFeet', 'YFeet', 'Cnt_ra', 'Sum_Minute',
    'Sum_Minu_1'
]

for record in dbfread.DBF('RA_Centroids.dbf'):
    temp = [record[i] for i in vars]
    ra_data.append(temp)

#I need to make a dictionary from this that maps the PDGrid to the NumberId value
nid_map = {}
nid_invmap = {}
cont_dict = {}  #contiguity dictionary
call_dict = {}  #call dictionary
dist_dict = {}  #distance dictionary
areas = []
pair_data = []
for i in ra_data:
    areas.append(i[0])
    nid_map[i[1]] = i[0]
    nid_invmap[i[0]] = i[1]
Пример #5
0
def define_db(refyear, ferc1_tables, ferc1_meta, verbose=True):
    """
    Given a list of FERC Form 1 DBF files, create analogous database tables.

    Based on strings extracted from the master F1_PUB.DBC file corresponding to
    the year indicated by refyear, and the list of DBF files specified in dbfs
    recreate a subset of the FERC Form 1 database as a Postgres database using
    SQLAlchemy.

    Args:

        refyear (int): Year of FERC Form 1 data to use as the database
            template.
        ferc1_tables (list): List of FERC Form 1 tables to ingest.
        ferc1_meta (SQLAlchemy MetaData): SQLAlchemy MetaData object
            to store the schema in.
    """
    ferc1_tblmap = extract_dbc_tables(refyear)
    # Translate the list of FERC Form 1 database tables that has
    # been passed in into a list of DBF files prefixes:
    dbfs = [pc.ferc1_tbl2dbf[table] for table in ferc1_tables]

    if verbose:
        print("Defining new FERC Form 1 DB based on {}...".format(refyear))

    if verbose:
        print("Clearing any existing FERC Form 1 database MetaData...")
    # This keeps us from having collisions when re-initializing the DB.
    ferc1_meta.clear()

    for dbf in dbfs:
        dbf_filename = os.path.join(datadir(refyear), '{}.DBF'.format(dbf))
        ferc1_dbf = dbfread.DBF(dbf_filename)

        # And the corresponding SQLAlchemy Table object:
        table_name = pc.ferc1_dbf2tbl[dbf]
        ferc1_sql = sa.Table(table_name, ferc1_meta)

        # _NullFlags isn't a "real" data field... remove it.
        fields = [f for f in ferc1_dbf.fields if not re.match(
            '_NullFlags', f.name)]

        for field in fields:
            col_name = ferc1_tblmap[pc.ferc1_dbf2tbl[dbf]][field.name]
            col_type = pc.dbf_typemap[field.type]

            # String/VarChar is the only type that really NEEDS a length
            if(col_type == sa.String):
                col_type = col_type(length=field.length)

            # This eliminates the "footnote" fields which all mirror database
            # fields, but end with _f. We have not yet integrated the footnotes
            # into the rest of the DB, and so why clutter it up?
            if(not re.match('(.*_f$)', col_name)):
                ferc1_sql.append_column(sa.Column(col_name, col_type))

        # Append primary key constraints to the table:
        if (table_name == 'f1_respondent_id'):
            ferc1_sql.append_constraint(
                sa.PrimaryKeyConstraint('respondent_id'))

        if (table_name in pc.ferc1_data_tables):
            # All the "real" data tables use the same 5 fields as a composite
            # primary key: [ respondent_id, report_year, report_prd,
            # row_number, spplmnt_num ]
            ferc1_sql.append_constraint(sa.PrimaryKeyConstraint(
                'respondent_id',
                'report_year',
                'report_prd',
                'row_number',
                'spplmnt_num')
            )

            # They also all have respondent_id as their foreign key:
            ferc1_sql.append_constraint(sa.ForeignKeyConstraint(
                columns=['respondent_id', ],
                refcolumns=['f1_respondent_id.respondent_id'])
            )
Пример #6
0
def init_db(ferc1_tables=pc.ferc1_default_tables,
            refyear=2015,
            years=[2015, ],
            def_db=True,
            verbose=True,
            testing=False):
    """Assuming an empty FERC Form 1 DB, create tables and insert data.

    This function uses dbfread and SQLAlchemy to migrate a set of FERC Form 1
    database tables from the provided DBF format into a postgres database.

    Args:

        ferc1_tables (list): The set of tables to read from the FERC Form 1 dbf
            database into the FERC Form 1 DB.
        refyear (int): Year of FERC Form 1 data to use as the database
            template.
        years (list): The set of years to read from FERC Form 1 dbf database
            into the FERC Form 1 DB.
    """
    ferc1_engine = db_connect_ferc1(testing=testing)

    # This function (see below) uses metadata from the DBF files to define a
    # postgres database structure suitable for accepting the FERC Form 1 data
    if def_db:
        define_db(refyear, ferc1_tables, ferc1_meta)

    # Wipe the DB and start over...
    drop_tables_ferc1(ferc1_engine)
    create_tables_ferc1(ferc1_engine)

    # Create a DB connection to use for the record insertions below:
    conn = ferc1_engine.connect()

    # This awkward dictionary of dictionaries lets us map from a DBF file
    # to a couple of lists -- one of the short field names from the DBF file,
    # and the other the full names that we want to have the SQL database...
    ferc1_tblmap = extract_dbc_tables(refyear)

    # Translate the list of FERC Form 1 database tables that has
    # been passed in into a list of DBF files prefixes:
    dbfs = [pc.ferc1_tbl2dbf[table] for table in ferc1_tables]

    for year in years:
        if verbose:
            print("Ingesting FERC Form 1 Data from {}...".format(year))
        for dbf in dbfs:
            dbf_filename = os.path.join(datadir(year), '{}.DBF'.format(dbf))
            dbf_table = dbfread.DBF(dbf_filename, load=True)

            # pc.ferc1_dbf2tbl is a dictionary mapping DBF files to SQL table
            # names
            sql_table_name = pc.ferc1_dbf2tbl[dbf]
            sql_stmt = sa.dialects.postgresql.insert(
                ferc1_meta.tables[sql_table_name])

            # Build up a list of dictionaries to INSERT into the postgres
            # database. Each dictionary is one record. Within each dictionary
            # the keys are the field names, and the values are the values for
            # that field.
            sql_records = []
            for dbf_rec in dbf_table.records:
                sql_rec = {}
                for d, s in ferc1_tblmap[sql_table_name].items():
                    sql_rec[s] = dbf_rec[d]
                sql_records.append(sql_rec)

            # If we're reading in multiple years of FERC Form 1 data, we
            # need to avoid collisions in the f1_respondent_id table, which
            # does not have a year field... F1_1 is the DBF file that stores
            # this table:
            if(dbf == 'F1_1'):
                sql_stmt = sql_stmt.on_conflict_do_nothing()

            # insert the new records!
            conn.execute(sql_stmt, sql_records)

    conn.close()
Пример #7
0
def dBase2sqllite(db,
                  table,
                  encoding="cp437",
                  overwrite_table=None,
                  fLOG=noLOG):
    """
    Put all rows from a dBase database into sqlite

    Add a dbase table to an open sqlite database.

    @param      db                  cursor on SQLite or file name
    @param      table               DBF object or filename
    @param      encoding            encoding if table is a filename
    @param      overwrite_table     overwrite the table name
    @param      fLOG                logging function, to see the progress

    The table will be removed if it exists.
    """

    typemap = {
        'F': 'FLOAT',
        'L': 'BOOLEAN',
        'I': 'INTEGER',
        'C': 'TEXT',
        'N': 'REAL',  # because it can be integer or float
        'M': 'TEXT',
        'D': 'DATE',
        'T': 'DATETIME',
        '0': 'INTEGER',
    }

    if isinstance(db, str):
        cursor = Database(db, LOG=fLOG)
        cursor.connect()
    else:
        cursor = db

    if isinstance(table, str):
        import dbfread  # pylint: disable=C0415
        table = dbfread.DBF(table, load=False, encoding=encoding)

    cursor.execute('drop table if exists %s' % table.name)

    field_types = {}
    for f in table.fields:
        field_types[f.name] = typemap.get(f.type, 'TEXT')

    table_name = overwrite_table if overwrite_table is not None else table.name

    # Create the table
    #
    defs = ', '.join(
        ['%s %s' % (f, field_types[f]) for f in table.field_names])
    sql = 'create table %s (%s)' % (table_name, defs)
    cursor.execute(sql)

    # Create data rows
    refs = ', '.join([':' + f for f in table.field_names])
    sql = 'insert into %s values (%s)' % (table_name, refs)

    for i, rec in enumerate(table):
        cursor._connection.execute(sql, list(rec.values()))
        if i % 20000 == 0:
            fLOG("moving line ", i, " to table", table_name)

    if isinstance(db, str):
        cursor.commit()
        cursor.close()
Пример #8
0
import dbfread
import numpy as np
from osgeo import gdal
import matplotlib.pyplot as plt

file = r'E:/Public/JiangJun/标准流程数据/shuju_chuli/TAVG/TAVG_fenxi/TAVG_R2_mask'
data = [
    'Qinghai', 'Haibei', 'Yushudong', 'Guoluodong', 'Yushuxi', 'Huangnan',
    'Tanggula', 'Guoluoxi', 'Hainan', 'Haidong', 'Haixi'
]
ds = gdal.Open(file + '.tif')
data1 = ds.ReadAsArray()
data1[data1 == data1[0][0]] = np.nan
data2 = [np.std(data1[~np.isnan(data1)])]
data1 = [data1[~np.isnan(data1)].mean()]
table = dbfread.DBF(file + '.dbf')
for i in table:
    data1.append(i['MEAN'])
table = dbfread.DBF(file + '_std.dbf')
for i in table:
    data2.append(i['STD'])


class para:
    bar_width = 0.6
    bar_align = 'center'
    ticks_font = 'Times New Roman'
    xtick_direction = 'out'
    ytick_direction = 'in'
    frames = ['right', 'top']
    error_kw = {'ecolor': 'k', 'capsize': 2, 'elinewidth': 0.5}
Пример #9
0
def open_dbf(name):
    return dbfread.DBF("%s/%s.DBF" % (PATH, name.upper()),
                       encoding="latin1",
                       parserclass=ExtraFieldParser)
Пример #10
0
# (team72, Dongfang, Wang, 906257)
from matplotlib.path import Path
import numpy as np
import geopandas as gpd
import dbfread as dbr

NSW_map = gpd.read_file('NSW_LGA_POLYGON_shp.shp')
NSW_name_table1 = dbr.DBF('NSW_LGA_POLYGON_shp.dbf')
NSW_name_table2 = dbr.DBF('NSW_LGA_shp.dbf')

NT_map = gpd.read_file('NT_LGA_POLYGON_shp.shp')
NT_name_table1 = dbr.DBF('NT_LGA_POLYGON_shp.dbf')
NT_name_table2 = dbr.DBF('NT_LGA_shp.dbf')

OT_map = gpd.read_file('OT_LGA_POLYGON_shp.shp')
OT_name_table1 = dbr.DBF('OT_LGA_POLYGON_shp.dbf')
OT_name_table2 = dbr.DBF('OT_LGA_shp.dbf')

QLD_map = gpd.read_file('QLD_LGA_POLYGON_shp.shp')
QLD_name_table1 = dbr.DBF('QLD_LGA_POLYGON_shp.dbf')
QLD_name_table2 = dbr.DBF('QLD_LGA_shp.dbf')

SA_map = gpd.read_file('SA_LGA_POLYGON_shp.shp')
SA_name_table1 = dbr.DBF('SA_LGA_POLYGON_shp.dbf')
SA_name_table2 = dbr.DBF('SA_LGA_shp.dbf')

TAS_map = gpd.read_file('TAS_LGA_POLYGON_shp.shp')
TAS_name_table1 = dbr.DBF('TAS_LGA_POLYGON_shp.dbf')
TAS_name_table2 = dbr.DBF('TAS_LGA_shp.dbf')

VIC_map = gpd.read_file('VIC_LGA_POLYGON_shp.shp')
Пример #11
0
import matplotlib.pyplot as plt


class para1(object):
    Folder_path = r'E:/Public/JiangJun/标准流程数据/shuju_chuli/TAVG/TAVG_ddq/'
    File_extension = '.dbf'
    ini_year = 1980
    end_year = 2018


data1 = []
data2 = []
for year in range(para1.ini_year, para1.end_year + 1):
    file = glob(para1.Folder_path + '*' + str(year) + '*' +
                para1.File_extension)
    table = dbfread.DBF(file[0])
    b = [i['MEAN'] for i in table]
    data2.append(b)
    data1.append(year)
data2 = np.array(data2).T
data1 = np.array(data1)


class para(object):
    Line_marker = ['^', 'o', 'p', 's']
    Line_ls = ['-.', '--', '-', '-']
    Legend = ['果洛东', '黄南', '果洛西', '海东']
    frames = ['right', 'top']
    Legend1_prop = {'family': 'SimSun', 'size': 12}
    # Legend2_prop = {'family': 'Times New Roman', 'style': 'italic', 'size': 12}
    # Line2_label = 'y = {0} x - {1}\n(R = {2} , n = {3} , P < 0.01)'.format(s, d, r, n)
Пример #12
0
def import_dbf(filename, limit=0):
    logger.info(f'Import {filename} (limit={limit}):')

    field_map = {
        'DT_VRSV': 'TEMP_POLICY_DATE',
        'NO_VRSV': 'TEMP_POLICY_NUMBER',
        'EDINNP': 'ENP',
        'SER_POL': 'POLICY_SERIAL',
        'NOM_POL': 'POLICY_NUMBER',
        'FAM': 'SURNAME',
        'IM': 'NAME1',
        'OT': 'NAME2',
        'D_R': 'BIRTHDAY',
        'POL': 'SEX',
        'RAION': 'REGION',
        'KOD_NP': 'LOCALITY_CODE',
        'NAS_P': 'LOCALITY',
        'KOD_UL': 'STREET_CODE',
        'DOM': 'HOUSE',
        'KV': 'FLAT',
        'KDLPU': 'MO_CODE',
        'KOD_SMO': 'SMO_CODE',
        'SS': 'CLIENT_SNILS',
        'SS_UCHVR': 'MEDIC_SNILS',
    }
    search_field_map = {
        'FAM': 'SURNAME',
        'IM': 'NAME1',
        'OT': 'NAME2',
        'D_R': 'BIRTHDAY',
    }

    count = 0
    countAdded = 0

    dbf = dbfread.DBF(filename, encoding='cp866')

    for recs in split_every(500, dbf):
        for rec in recs:
            count += 1
            exists = db.session.query(ClientModel.id).filter_by(
                **{
                    db_field: rec[dbf_field]
                    for dbf_field, db_field in search_field_map.items()
                    if rec[dbf_field] is not None
                }).scalar() is not None

            if not exists:
                fields = {
                    db_field: (rec[dbf_field] or None)
                    for dbf_field, db_field in field_map.items()
                }

                client = ClientModel(**fields)
                db.session.add(client)
                countAdded += 1

            if count >= limit: break

        logger.debug(f'processed: {count}, added: {countAdded}')
        db.session.commit()

        if count >= limit: break

    logger.info(f'Total records: {count}, added: {countAdded}')
Пример #13
0
in_file = None
if len(sys.argv) < 1:
    print("Error: usage: vri_dbf_to_csv [input dbf file]")
else:
    in_file = sys.argv[1]

out_file = in_file + ".csv"
print("+w " + out_file)
ci = 0
count = {}
keys = None
keytype = {}
f = open(out_file, "wb") # "VRI_KLoops.csv", "wb")

for record in dbfread.DBF(in_file):  #"VRI_KLoops.dbf"):
    if keys is None:
        keys = record.keys()
        keys = [k.replace(',', ';') for k in keys]
        f.write(",".join(keys))
    else:
        if str(keys) != str(record.keys()):
            print "Error: record length mismatch"
            sys.exit(1)

    r = []
    for k in keys:
        r.append(str(record[k]))
        if k not in count:
            count[k] = {}
            keytype[k] = type(record[k])
Пример #14
0
 def read_file(self):
     '''
         读取输入文件
     '''
     table = dbf.DBF(self.file_name, load=True)
     return table
Пример #15
0
_author_ = "aman"
import dbfread


def convert_rec(dbf, lst_of_key, lst_of_value):
    i = 0
    for records in dbf:
        lst_value = list(records.values())
        lst_of_value.insert(i, lst_value)
        if i == 0:
            for key in records.keys():
                lst_of_key.append(key)
        i += 1
        if (i < 30):
            print(lst_value[5])
    return lst_of_key, lst_of_value


input_path = "C:/Users/aman.raj/Documents/BCN0220.DBF"
dbf = dbfread.DBF(input_path, encoding="ibm850")

lst_of_key = []
lst_of_value = [[]]
print(dbf)
lst_of_key, lst_of_value = convert_rec(dbf, lst_of_key, lst_of_value)
Пример #16
0
def get_dbc_map(year, data_dir, min_length=4):
    """
    Extract names of all tables and fields from a FERC Form 1 DBC file.

    Read the DBC file associated with the FERC Form 1 database for the given
    ``year``, and extract all printable strings longer than ``min_lengh``.
    Select those strings that appear to be database table names, and their
    associated field for use in re-naming the truncated column names extracted
    from the corresponding DBF files (those names are limited to having only 10
    characters in their names.)

    Args:
        year (int): The year of data from which the database table and column
            names are to be extracted. Typically this is expected to be the
            most recently available year of FERC Form 1 data.
        data_dir (str): A string representing the full path to the top level of
            the PUDL datastore containing the FERC Form 1 data to be used.
        min_length (int): The minimum number of consecutive printable
            characters that should be considered a meaningful string and
            extracted.

    Returns:
        dict: a dictionary whose keys are the long table names extracted
        from the DBC file, and whose values are lists of pairs of values,
        the first of which is the full name of each field in the table with
        the same name as the key, and the second of which is the truncated
        (<=10 character) long name of that field as found in the DBF file.

    """
    # Extract all the strings longer than "min" from the DBC file
    dbc_strings = list(
        get_strings(dbc_filename(year, data_dir), min_length=min_length))

    # Get rid of leading & trailing whitespace in the strings:
    dbc_strings = [s.strip() for s in dbc_strings]

    # Get rid of all the empty strings:
    dbc_strings = [s for s in dbc_strings if s != '']

    # Collapse all whitespace to a single space:
    dbc_strings = [re.sub(r'\s+', ' ', s) for s in dbc_strings]

    # Pull out only strings that begin with Table or Field
    dbc_strings = [s for s in dbc_strings if re.match('(^Table|^Field)', s)]

    # Split strings by whitespace, and retain only the first two elements.
    # This eliminates some weird dangling junk characters
    dbc_strings = [' '.join(s.split()[:2]) for s in dbc_strings]

    # Remove all of the leading Field keywords
    dbc_strings = [re.sub('Field ', '', s) for s in dbc_strings]

    # Join all the strings together (separated by spaces) and then split the
    # big string on Table, so each string is now a table name followed by the
    # associated field names, separated by spaces
    dbc_table_strings = ' '.join(dbc_strings).split('Table ')

    # strip leading & trailing whitespace from the lists
    # and get rid of empty strings:
    dbc_table_strings = [s.strip() for s in dbc_table_strings if s != '']

    # Create a dictionary using the first element of these strings (the table
    # name) as the key, and the list of field names as the values, and return
    # it:
    tf_dict = {}
    for table_string in dbc_table_strings:
        table_and_fields = table_string.split()
        tf_dict[table_and_fields[0]] = table_and_fields[1:]

    dbc_map = {}
    for table in pc.ferc1_tbl2dbf:
        dbf_path = get_dbf_path(table, year, data_dir=data_dir)
        if os.path.isfile(dbf_path):
            dbf_fields = dbfread.DBF(dbf_path).field_names
            dbf_fields = [f for f in dbf_fields if f != '_NullFlags']
            dbc_map[table] = \
                {k: v for k, v in zip(dbf_fields, tf_dict[table])}
            if len(tf_dict[table]) != len(dbf_fields):
                raise ValueError(
                    f"Number of DBF fields in {table} does not match what was "
                    f"found in the FERC Form 1 DBC index file for {year}.")

    # Insofar as we are able, make sure that the fields match each other
    for k in dbc_map:
        for sn, ln in zip(dbc_map[k].keys(), dbc_map[k].values()):
            if ln[:8] != sn.lower()[:8]:
                raise ValueError(
                    f"DBF field name mismatch: {ln[:8]} != {sn.lower()[:8]}")

    return dbc_map
Пример #17
0
import dbfread as dr
records = [p for p in dr.DBF('vendas.dbf')]
print("Quantidade de registros: ", len(records))

records = [dict(p) for p in records]

print(type(records[0]))

db_sequencias = []

for record in records:
    db_sequencias.append(record)

produtos = []

for record in db_sequencias:
    produtos.append([record['PRODUTO'], record['OPERACAO']])

print("Quantiadde de itens", len(produtos))

dataset = {}
for record in produtos:
    dataset[record[1]] = []
for record in produtos:
    dataset[record[1]].append(record[0])

cont = 0
line = ''
with open('input_apriori.csv', 'w') as fl:
    for key in dataset:
        cont += 1