Expects a single filename argument
converts tab delimitation to CSV
cuts out second-line of inkjunk

python scripts/wrangle-starbucks.py \
   data/raw/starbucks_locations.csv \
   > data/wrangled/starbucks_locations.csv
"""

from csv import DictReader, DictWriter
from datetime import date
from re import search
from sys import argv, stdout

HEADERS = [
    "Store ID", "Name", "Store Number", "Phone Number", "Ownership Type",
    "Street Combined", "City", "Country Subdivision", "Country", "Postal Code",
    "Latitude", "Longitude"
]

if __name__ == '__main__':
    srcpath = argv[1]
    wcsv = DictWriter(stdout, fieldnames=HEADERS)
    wcsv.writeheader()
    with open(srcpath, 'r') as rf:
        rcsv = DictReader(rf)
        for row in rcsv:
            wcsv.writerow(
                {k: v.strip()
                 for k, v in row.items() if k in HEADERS})
Exemplo n.º 2
0
def load_data(cursor, config):
  file_tmpl = '{0}{1}.txt'
  
  possible_data = (
    'source',
    'election_administration',
    'state',
    'election',
    'election_official',
    'locality',
    'polling_location',
    'precinct',
    'precinct_split',
    'street_segment',
  )
  
  for i in possible_data:
    filename = file_tmpl.format(config.get('Main','data_dir'),i)
    print "Currently looking at {0}".format(i)
    if os.path.exists(filename):
      with open(filename,'r') as r:
        print "Parsing and loading data from {0}".format(i)
        sect = i.title()
        parser_type = config.get(sect,'parser_type')
        if config.has_section(sect):
          if parser_type=='csv':
            klass = dyn_class(
              config.get(sect,'parser_module'),
              config.get(sect,'parser_class')
            )
            
            reader = klass(
              r,
              delimiter=chr(config.getint(sect,'delimiter')),
              quotechar=config.get(sect, 'quotechar'),
              quoting=QUOTE_MINIMAL
            )
            
            reader.fieldnames = map(str.upper, reader.fieldnames)
            
          elif parser_type=='regex':
            reader = re.compile(config.get(sect,'regex'))
            #reader = DictReader(r, delimiter=chr(config.getint(i.title(),'delimiter')), quotechar='"', quoting=QUOTE_MINIMAL)
        else:
          reader = DictReader(
            r,
            delimiter=chr(config.getint('Parser','delimiter')),
            quotechar='"',
            quoting=QUOTE_MINIMAL
          )
        
        try:
          for line in get_line(r, reader, parser_type):
            # trim off any whitespace and escape the values
            try:
              line.update(zip(line.iterkeys(), map(str.strip, line.itervalues())))
              line.update(zip(line.iterkeys(), map(escape, line.itervalues())))
            except:
              # if this fails, print the offending line
              print line
              
            if i=='source':
              print line
              cursor.execute(
                "INSERT INTO VIP_Info(id,source_id,description,state_id) VALUES (?,?,?,?)",
                (
                  line['VIP_ID'],
                  line['ID'],
                  line.get('DESCRIPTION',None),
                  line.get('STATE_ID', config.get('Main','fips')),
                )
              )
              
            elif i=='election_administration':
              cursor.execute(
                "INSERT OR IGNORE INTO Election_Administration(id,name,eo_id,mailing_address,city,state,zip,zip_plus,elections_url,registration_url,am_i_registered_url,absentee_url,where_do_i_vote_url) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
                (
                  line['ID'],
                  line.get('NAME', None),
                  line.get('EO_ID', None),
                  line.get('MAILING_ADDRESS', None),
                  line.get('CITY', None),
                  line.get('STATE', None),
                  line.get('ZIP', None),
                  line.get('ZIP_PLUS', None),
                  line.get('ELECTIONS_URL', None),
                  line.get('REGISTRATION_URL', None),
                  line.get('AM_I_REGISTERED_URL', None),
                  line.get('ABSENTEE_URL', None),
                  line.get('WHERE_DO_I_VOTE_URL', None),
                )
              )
            
            elif i=='state':
              print line
              cursor.execute(
                "INSERT INTO State(id,name,election_administration_id,organization_url) VALUES (?,?,?,?)",
                (
                  line.get('ID', config.get('Main','fips')),
                  line.get('NAME'),
                  line.get('ELECTION_ADMINISTRATION_ID'),
                  line.get('ORGANIZATION_URL',''),
                )
              )
            
            elif i=='election':
              cursor.execute(
                "INSERT OR IGNORE INTO Election(id,date,election_type,state_id,statewide,registration_info) VALUES (?,?,?,?,?,?)",
                (
                  line['ID'],
                  datetime.strptime(line.get('DATE'), config.get('Main','time_format')).strftime('%Y-%m-%d'),
                  line.get('ELECTION_TYPE', "General"),
                  line.get('STATE_ID', config.get('Main','fips')),
                  line.get('STATEWIDE', "Yes"),
                  line.get('REGISTRATION_INFO', None),
                )
              )
            
            elif i=='election_official':
              cursor.execute(
                "INSERT INTO Election_Official(id,name,title,phone,fax,email) VALUES (?,?,?,?,?,?)",
                (
                  line['ID'],
                  line.get('NAME'),
                  line.get('TITLE', None),
                  line.get('PHONE', None),
                  line.get('FAX', None),
                  line.get('EMAIL', None)
                )
              )
              
            elif i=='locality':
              cursor.execute(
                """INSERT OR IGNORE INTO
                  Locality(
                  id,
                  name,
                  state_id,
                  type,
                  election_administration_id
                ) VALUES (?,?,?,?,?)""",
                (
                  line['ID'],
                  line.get('NAME'),
                  line.get('STATE_ID', config.get('Main','fips')),
                  line.get('TYPE'),
                  line.get('ELECTION_ADMINISTRATION_ID'),
                )
              )
            
            elif i=='polling_location':
              cursor.execute(
                """INSERT OR IGNORE INTO
                  Polling_Location(
                  id,
                  location_name,
                  line1,
                  city,
                  state,
                  zip
                ) VALUES (?,?,?,?,?,?)""",
                (
                  line['ID'],
                  line.get('LOCATION_NAME'),
                  line.get('LINE1'),
                  line.get('CITY'),
                  line.get('STATE'),
                  line.get('ZIP'),
                )
              )
              
            elif i=='precinct':
              cursor.execute(
                "INSERT OR IGNORE INTO Precinct(id,name,locality_id,mail_only) VALUES (?,?,?,?)",
                (
                  line['ID'],
                  line.get('NAME'),
                  line.get('LOCALITY_ID', config.get('Main','locality_id')),
                  line.get('MAIL_ONLY',"No"),
                )
              )
                
              if len(line.get('POLLING_LOCATION_ID',""))>0:
                cursor.execute(
                  "INSERT OR IGNORE INTO Precinct_Polling(precinct_id,polling_location_id) VALUES (?,?)",
                  (
                    line['ID'],
                    line['POLLING_LOCATION_ID'],
                  )
                )
              
              if len(line.get('EARLY_VOTE_SITE_ID',""))>0:
                cursor.execute(
                  "INSERT OR IGNORE INTO Precinct_Early_Vote(precinct_id,early_vote_site_id) VALUES (?,?)",
                  (
                    line['ID'],
                    line['EARLY_VOTE_SITE_ID'],
                  )
                )
            
            elif i=='precinct_split':
              if len(line.get('PRECINCT_ID',""))>0:
                if len(line.get('NAME',''))==0: line['NAME']=line['ID']
                  
                line['ID'] = sanitize(line,'ID')
                
                cursor.execute(
                  "INSERT OR IGNORE INTO Precinct_Split(id,name,precinct_id) VALUES (?,?,?)",
                  (
                    line['ID'],
                    line.get('NAME'),
                    line['PRECINCT_ID'],
                  )
                )
              
              if len(line.get('ELECTORAL_DISTRICT_ID',""))>0:
                cursor.execute(
                  "INSERT OR IGNORE INTO Precinct_Split_District(precinct_split_id,electoral_district_id) VALUES (?,?)",
                  (
                    line['ID'],
                    line['ELECTORAL_DISTRICT_ID'],
                  )
                )
                
              if len(line.get('POLLING_LOCATION_ID',""))>0:
                cursor.execute(
                  "INSERT OR IGNORE INTO Split_Polling(split_id,polling_location_id) VALUES (?,?)",
                  (
                    line['ID'],
                    line['POLLING_LOCATION_ID'],
                  )
                )
            
            elif i=='street_segment':
              if len(line.get('PRECINCT_SPLIT_ID',""))>0:
                line['PRECINCT_SPLIT_ID'] = sanitize(line,'PRECINCT_SPLIT_ID')
              
              if line.get('STREET_DIRECTION','')=='NULL':
                line['STREET_DIRECTION'] = None
                
              cursor.execute(
                """INSERT INTO
                  Street_Segment(
                  id,
                  start_house_number,
                  end_house_number,
                  odd_even_both,
                  start_apartment_number,
                  end_apartment_number,
                  street_direction,
                  street_name,
                  street_suffix,
                  address_direction,
                  state,
                  city,
                  zip,
                  precinct_id,
                  precinct_split_id
                ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
                (
                  line.get('ID'),
                  line.get('START_HOUSE_NUMBER', None),
                  line.get('END_HOUSE_NUMBER', None),
                  line.get('ODD_EVEN_BOTH', None),
                  line.get('START_APARTMENT_NUMBER', None),
                  line.get('END_APARTMENT_NUMBER', None),
                  line.get('STREET_DIRECTION', None),
                  line.get('STREET_NAME', None),
                  line.get('STREET_SUFFIX', None),
                  line.get('ADDRESS_DIRECTION', None),
                  line.get('STATE', config.get('Main', 'state_abbreviation')),
                  line.get('CITY', ''),
                  line.get('ZIP', ''),
                  line.get('PRECINCT_ID', None),
                  line.get('PRECINCT_SPLIT_ID', None),
                )
              )
              
        except CSVError, e:
          sys.exit("file {0}, line {1}: {2}".format(filename, reader.line_num, e))
        
        except sqlite3.IntegrityError, e:
          sys.exit("file {0}, line {1}: {2}".format(filename, reader.line_num, e))
    storage = Storage(location=args.storage)
    storage.setup_pipeline(output_paths=[output_path], input_path=input_path)
    last_updated = storage.pipeline_last_updated
    print('\nSTART DATE (LAST UPDATE):\t', last_updated.isoformat())

    print(f'\n--- START {file_name} ---\n')

    field_map = field_maps.get(input_path)
    tool_map = tool_maps.get(input_path)

    with open(output_path, 'a', newline='') as out_file:
        writer = DictWriter(out_file, fieldnames=list(field_map.values()))
        writer.writeheader()

        with open(input_path, 'r', newline='') as in_file:
            reader = DictReader(in_file)
            # process input file by row
            for row in reader:
                row_clean = dict.fromkeys(field_map.values())
                for col, value in row.items():
                    if value:
                        if col in tool_map:
                            row_clean[field_map[col]] = tool_map[col](
                                value.strip())
                        else:
                            row_clean[field_map[col]] = value.strip()
                # write processed row to output file
                writer.writerow(row_clean)

    print(f"\nUpdating {args.storage} storage ...")
    storage.finish_pipeline()
Exemplo n.º 4
0
    def __init__(self, master):
        # load data
        datetime_list, barpress_list = [], []
        datetime_re = re.compile(r'[\d]{2,4}')  # regex to get datetime info

        for year in range(2012, 2016):
            file = Path(f'{DATA_FOLDER}/Environmental_Data_Deep_Moor_{year}.txt')
            print('Loading {0}'.format(file.name))
            for row in DictReader(file.open('r'), delimiter='\t'):
                barpress_list.append(float(row['Barometric_Press']))
                datetime_list.append(date2num(
                    datetime(*list(map(int, datetime_re.findall(row['date       time    ']))))))

        self.datetime_array = np.array(datetime_list)
        self.barpress_array = np.array(barpress_list)

        # build the gui
        master.title('Weather Statistics')
        master.resizable(True, True)

        # maximize Tkinter windows
        # ref: https://stackoverflow.com/questions/15981000/tkinter-python-maximize-window
        try:
            master.state('zoomed')
        except (TclError):
            size = master.maxsize()
            master.geometry('{}x{}+0+0'.format(*size))

        # draw the figure
        matplotlib.rc('font', size=18)
        figure = Figure()
        figure.set_facecolor((0, 0, 0, 0))
        self.a = figure.add_subplot(111)
        self.canvas = FigureCanvasTkAgg(figure, master)
        self.canvas.draw()

        # add toolbar
        toolbar_frame = ttk.Frame(master)  # needed to put navbar above plot
        toolbar = NavigationToolbar2Tk(self.canvas, toolbar_frame)
        toolbar.update()
        toolbar_frame.pack(side=TOP, fill=X, expand=0)
        self.canvas._tkcanvas.pack(fill=BOTH, expand=1)

        controls_frame = ttk.Frame(master)
        controls_frame.pack()

        ttk.Label(controls_frame, text='Start', font='Arial 18 bold').grid(row=0, column=0, pady=5)
        ttk.Label(controls_frame, text='(YYYY-MM-DD HH:MM:SS)', font='Courier 12').grid(row=1, column=0, padx=50, sticky='s')
        self.start = StringVar()
        ttk.Entry(controls_frame, width=19, textvariable=self.start, font='Courier 12').grid(row=2, column=0, sticky='n')
        self.start.set(str(num2date(self.datetime_array[0]))[0:19])

        ttk.Label(controls_frame, text='End', font='Arial 18 bold').grid(row=0, column=1, pady=5)
        ttk.Label(controls_frame, text='(YYYY-MM-DD HH:MM:SS)', font='Courier 12').grid(row=1, column=1, padx=50, sticky='s')
        self.end = StringVar()
        ttk.Entry(controls_frame, width=19, textvariable=self.end, font='Courier 12').grid(row=2, column=1, sticky='n')
        self.end.set(str(num2date(self.datetime_array[-1]))[0:19])

        # add button for update time range
        ttk.Button(controls_frame, text='Update', command=self._update).grid(row=3, column=0, columnspan=2, pady=10)
        ttk.Style().configure('TButton', font='Arial 18 bold')

        # call _update() to draw default figure
        self._update()
Exemplo n.º 5
0
try:
    default = db.cost_center_group.lookup("Default")
except KeyError:
    default = db.cost_center_group.create(name="Default")
st_open = db.cost_center_status.lookup('Open')

ccs = {}

for cc in db.cost_center.getnodeids():
    n = db.cost_center.getnode(cc)
    ccs[n.name.strip()] = cc

cc_file = open('Cost-Center_Without-hours.csv', 'r')

for line in DictReader(cc_file, delimiter=';'):
    try:
        tc = line["Time Category"]
    except KeyError:
        tc = line["Name  "]
    try:
        cc = line["Cost Center"].strip()
    except KeyError:
        cc = line["Cost Center  "].strip()
    if not cc:
        assert (tc.startswith('Gesamt'))
        continue
    try:
        tcid = db.time_project.lookup(tc)
    except KeyError:
        try:
Exemplo n.º 6
0
def read_quotes(filename):
    with open(filename, "r") as file:
        csv_reader = DictReader(file)
        return list(csv_reader)
Exemplo n.º 7
0
from csv import DictReader

with open("fighters.csv") as file:
    csv_reader = DictReader(file)
    next(csv_reader)
    for fighter in csv_reader:
        print(f"{fighter['Name']} is from {fighter['Country']} and is {fighter['Height (in cm)']} cm tall.")
        ex, correct_label = ex_tuple
        if correct_label != predictions[ii]:
            errors[(ex.split()[0], labels[predictions[ii]])] += 1

    for ww, cc in sorted(errors.items(), key=operator.itemgetter(1),
                         reverse=True)[:10]:
        print("%s\t%i" % (ww, cc))




if __name__ == "__main__":
    ####################
    ####################
    # Cast to list to keep it all in memory
    train_full = list(DictReader(open("train.csv", 'r')))
    train=[]
    temp_len=len(train_full)
    for i in range(temp_len):
        if i % 1 == 0:
            train.append(train_full[i])
    test = list(DictReader(open("test.csv", 'r')))
#     limit=len(train)
    limit_from=4*len(train)/10 + 1
    limit=5*len(train)/10
    feat = Featurizer()

    labels = []
    for line in train:
        if not line['cat'] in labels:
            labels.append(line['cat'])
def main():
    from scripts.utils import create_project_tarball, get_stencil_num
    from scripts.conf.conf import machine_conf, machine_info
    import os, sys
    from csv import DictReader
    import time, datetime

    dry_run = 1 if len(sys.argv) < 2 else int(sys.argv[1])

    time_stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y%m%d_%H_%M')
    exp_name = "pluto_increasing_grid_size_at_%s_%s" % (
        machine_info['hostname'], time_stamp)

    tarball_dir = 'results/' + exp_name
    if (dry_run == 0): create_project_tarball(tarball_dir, "test_" + exp_name)
    target_dir = 'results/' + exp_name

    # parse the results to find out which of the already exist
    data = []
    data_file = os.path.join('results', 'summary.csv')
    try:
        with open(data_file, 'rb') as output_file:
            raw_data = DictReader(output_file)
            for k in raw_data:
                kernel = get_stencil_num(k)
                if (kernel == 0):
                    k['stencil'] = '3d25pt'
                elif (kernel == 1):
                    k['stencil'] = '3d7pt'
                elif (kernel == 4):
                    k['stencil'] = '3d25pt_var'
                elif (kernel == 5):
                    k['stencil'] = '3d7pt_var'
                else:
                    raise
                data.append(k)
    except:
        pass
    param_l = dict()
    for k in data:
        try:
            param_l[(k['stencil'], int(k['Global NX']),
                     k['LIKWID performance counter'])] = ([
                         int(k['PLUTO tile size of loop 1']),
                         int(k['PLUTO tile size of loop 3']),
                         int(k['PLUTO tile size of loop 4'])
                     ], int(k['Number of time steps']))
        except:
            print k
            raise

    #update the pinning information to use all cores
    th = machine_info['n_cores']

    pin_str = "0-%d " % (th - 1)

    count = 0
    for group in ['MEM']:
        #  for group in ['MEM', 'L2', 'L3', 'DATA', 'TLB_DATA', 'ENERGY']:
        if (machine_info['hostname'] == 'Haswell_18core'):
            machine_conf[
                'pinning_args'] = " -m -g " + group + " -C S1:" + pin_str
        elif (machine_info['hostname'] == 'IVB_10core'):
            if group == 'TLB_DATA': group = 'TLB'
            machine_conf['pinning_args'] = " -g " + group + " -C S0:" + pin_str


#    for k,v in param_l.iteritems(): print k,v
        count = count + igs_test(
            dry_run, target_dir, exp_name, param_l=param_l, group=group)

    print "experiments count =" + str(count)
def get_int_field(field, path):
    with open(path) as f:
        for row in DictReader(f):
            yield (int(row[field]) if row[field] != '' else 0)
Exemplo n.º 11
0
FILE = r'../data/iris.csv'
FIELDNAMES = [
    'Sepal length',
    'Sepal width',
    'Petal length',
    'Petal width',
    'Species',
]

with open(FILE) as file:
    header = file.readline()

    data = DictReader(
        f=file,
        fieldnames=FIELDNAMES,
        delimiter=',',
        quoting=QUOTE_NONE)

    for row in data:
        print(dict(row))

## Alternative solution
# with open(FILE) as file:
#     header, *data = DictReader(
#         f=file,
#         fieldnames=FIELDNAMES,
#         delimiter=',',
#         quoting=QUOTE_NONE)
#
#     for row in data:
Exemplo n.º 12
0
def downloadRepo():
    numero = 136
    with open('arquivo.csv') as repositorios:
        file = open('arquivo_loc.csv', 'w')
        fieldnames = [
            "Nome", "url", "Data Criacao", "Data de Atualizacao",
            "Total de releases", "Linguagem", "Idade",
            "Tempo de Atualizacao em dias", "Loc"
        ]
        csv_writer = DictWriter(file, fieldnames=fieldnames)

        reader = DictReader(repositorios)
        repoF = []
        for repo in reader:
            status = 'ok'
            linhas = 0

            try:
                signal.signal(signal.SIGALRM, handler)
                signal.alarm(600)
                print("Baixando repositorio " + repo['Nome'] + "" +
                      str(numero))
                Git('repositorios').clone(repo['url'])
                # Analise
                path = getPath(repo['Nome'])
                print('analisando o repositorio: ' + repo['Nome'])
                linhas = int(countlines(path))

            except MyTimeout:
                status = 'falha'
                print('falha')
                continue
            except TimeoutError as exc:
                repoF.append(repo['url'])
                status = 'falha'
                print('falha')
                continue
            except Exception as e:
                repoF.append(repo['url'])
                status = 'falha'
                print('falha')
                continue
            finally:
                numero += 1
                csv_writer.writerow({
                    "Nome":
                    repo['Nome'],
                    "url":
                    repo['url'],
                    "Data Criacao":
                    repo['Data Criacao'],
                    "Data de Atualizacao":
                    repo['Data de Atualizacao'],
                    "Linguagem":
                    repo['Linguagem'],
                    "Total de releases":
                    repo['Total de releases'],
                    "Idade":
                    repo['Idade'],
                    "Tempo de Atualizacao em dias":
                    repo['Tempo de Atualizacao em dias'],
                    "Loc":
                    linhas
                })
                print("download do repositorio: " + repo['Nome'] + " status:" +
                      status)
Exemplo n.º 13
0
    def __init__(self, pz, size, price_tick, variable_commission,
                 fixed_commission, slippage, exchangeID):
        """Constructor"""
        self.pz = pz
        self.size = size
        self.price_tick = price_tick
        self.variable_commission = variable_commission
        self.fixed_commission = fixed_commission
        self.slippage = slippage
        self.exchangeID = exchangeID


contract_dict = {}
filename_setting_fut = get_dss() + 'fut/cfg/setting_pz.csv'
with open(filename_setting_fut, encoding='utf-8') as f:
    r = DictReader(f)
    for d in r:
        contract_dict[d['pz']] = Contract(d['pz'], int(d['size']),
                                          float(d['priceTick']),
                                          float(d['variableCommission']),
                                          float(d['fixedCommission']),
                                          float(d['slippage']),
                                          d['exchangeID'])


def get_contract(symbol):
    pz = symbol[:2]
    if pz.isalpha():
        pass
    else:
        pz = symbol[:1]
Exemplo n.º 14
0
from csv import DictReader

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score

data_path = sys.argv[1]
result_path = sys.argv[2]

label_path = data_path + 'validation.csv'
predict_path = result_path + 'submission.csv'

label_reader = DictReader(open(label_path))
predict_reader = DictReader(open(predict_path))

count = 0
y_true = []
y_pred = []
y_scores = []
for t, row in enumerate(label_reader):
    predict = predict_reader.__next__()
    actual = float(row['label'])
    predicted = float(predict['prob'])
    y_true.append(actual)
    y_scores.append(predicted)

    # 大于阈值的即视为点击
    if (predicted >= 0.5):
Exemplo n.º 15
0
from codecs import open
from csv import DictReader
from lxml import etree as et

with open("sanisettesparis.csv", encoding="utf8") as inp:
    data = DictReader(inp, delimiter=";")
    wcs = et.Element("toilettes")
    for item in data:
        wc = et.SubElement(wcs,
                           "toilette",
                           type=item["TYPE"],
                           statut=item["STATUT"])
        ad = et.SubElement(wc, "adresse")
        libelle = et.SubElement(ad, "libelle")
        libelle.text = item["ADRESSE"]
        district = et.SubElement(ad, "arrondissement")
        district.text = item["ARRONDISSEMENT"]
        openh = et.SubElement(wc, "horaire")
        openh.text = item["HORAIRE"]
        serv = et.SubElement(wc, "services")
        access = et.SubElement(serv, "acces-pmr")
        access.text = item["ACCES_PMR"]
        bebe = et.SubElement(serv, "relais-bebe")
        bebe.text = item["RELAIS_BEBE"]
        equipement = et.SubElement(wc, "equipement")
        equipement.text = item["URL_FICHE_EQUIPEMENT"]
with open("toilettes-paris.xml", "w", encoding="utf8") as outp:
    outp.write(
        et.tostring(wcs,
                    pretty_print=True,
                    xml_declaration=True,
Exemplo n.º 16
0
from math import sqrt, exp, log
from csv import DictReader, writer
import pandas as pd
import numpy as np

if __name__ == "__main__":
    adsfile = "../Data/AdsInfo.tsv"
    outfile = writer(open("../Data/AdsPreProcessed.tsv", "w"))
    outfile.writerow([
        'AdID', 'Price', 'CategoryID', 'NumParams', 'Title', 'CatLevel',
        'ParentCategoryID', 'SubCategoryID'
    ])

    # Reading Categories file and store in a dict #
    catfile = "../Data/Category.tsv"
    tsv_reader = DictReader(open(catfile), delimiter='\t')
    cat_dict = {}
    for row in tsv_reader:
        cat_dict[row['CategoryID']] = [
            row['Level'], row['ParentCategoryID'], row['SubcategoryID']
        ]

    count = 0
    total_count = 0
    for t, line in enumerate(DictReader(open(adsfile), delimiter='\t')):
        total_count += 1
        if line['IsContext'] == '1':
            count += 1
            try:
                num_params = len(eval(line['Params']).keys())
            except:
Exemplo n.º 17
0
# -*- coding: utf-8 -*-

from csv import DictReader

from collections import Counter

from os import path
import sys

sys.path.append( path.join( path.dirname(__file__), '..', 'lib' ) )
from lib import *

# get docs which weren't coded
notCodedfn = path.join( path.dirname(__file__), "secondStabCoding.notCoded.csv" )
with open(notCodedfn) as notCodedf:
    notCoded = list(DictReader(notCodedf))
notCoded = [x['fn'] for x in notCoded]



inFn = path.join( path.dirname(__file__), "..", "data","extracted.nice.csv" )

fsC = Counter()
f500C = Counter()
bodyC = Counter()
nC = Counter()
vC = Counter()

globalC = Counter()

examples = {}
Exemplo n.º 18
0
 def __iter__(self):
     for line in DictReader(self._file):
         converted = dict([(key.upper(), self._convert(val))
                           for key, val in six.iteritems(line)])
         yield converted
Exemplo n.º 19
0
amounts = [float(row['LoanAmount']) for row in loan_table if row['LoanAmount'] != '']
loan_account_mean = sum(amounts)/len(amounts)
'''
import sys

def report( name, shortd, longd):
	d = {'Name': name, 'Short': shortd, 'Long': longd}
	print(str(d))

#Mock data goes first

from csv import DictReader # helps with handling csv formatted data
from urllib2 import urlopen # helps with pulling data off the web
url = 'https://docs.google.com/spreadsheets/d/1_artlzgoj6pDBCBfdt9-Jmc9RT9yLsZ0vTnk3zJmt_E/pub?gid=1291197392&single=true&output=csv'
response = urlopen(url)
loan_table = [row for row in DictReader(response)]  # a mapping function using identity

xloan_table = loan_table  # in case user screws with loan_table

try:
	&&&  # paste user code here 

except Exception as e:
	report('Generic error', 'On your own', e)
	sys.exit(1)

try:
	loan_account_mean		# does var exist?
except NameError as e:
	report('Name error', 'Typically a typo', e)
	sys.exit(1)
Exemplo n.º 20
0
def read_csv(csvfile):
    with open(csvfile) as f:
        reader = DictReader(f)
        for row in reader:
            yield row
Exemplo n.º 21
0
    "A balanced portfolio is most appropriate for those with medium time horizons and moderate risk tolerance. This portfolio is balanced between equity and fixed income"
)
p4 = Portfolios(
    name='Aggressive',
    ITOT=50,
    VEA=15,
    VNQ=10,
    GLD=5,
    AGG=20,
    fees=.0006,
    desc=
    "A aggressive portfolio is most appropriate for those with longer time horizons, young in age, and a higher risk tolerance."
)
p5 = Portfolios(
    name='All Equity',
    ITOT=60,
    VEA=20,
    VNQ=15,
    GLD=5,
    fees=.0007,
    desc=
    "A all equity portfolio is most appropriate for those with longer time horizons, young in age, higher risk tolerance and high capacity to take risk.This portfolio is 100% weighted towards equity"
)

db.session.add_all([p1, p2, p3, p4, p5])
db.session.commit()

with open('generator/etfs.csv') as etfs:
    db.session.bulk_insert_mappings(ETFs, DictReader(etfs))

db.session.commit()
Exemplo n.º 22
0
                "text":
                quote.find(class_="text").get_text(),
                "author":
                quote.find(class_="author").get_text(),
                "bio-link":
                quote.find("a")["href"]
            })

    #automate parsing every page using Next link
        next_btn = soup.find(class_="next")
        url = next_btn.find("a")["href"] if next_btn else None
        # sleep(1) #wait 2 second btwn scrapping.

    with open("test333csvresult.csv", "r") as file:
        # csv_reader = reader(file)
        csv_reader = DictReader(file)  #OrderedDictionary
        for quote in csv_reader:
            print(quote)
            # print(quote['text'])
        print("------------------------------------")
        # choice = choice(list(csv_reader))
        # print(choice)
        quote = csv_reader
    # print(list(csv_reader)) #one list consits of lists

    # quote = choice(csv_reader) #choose from list or set, not dict.
    # print(quote)
    # # print(quote[0])
    # print(quote["text"])

    # remaining_guesses = 4
Exemplo n.º 23
0
from importlib import reload
from csv import DictReader, DictWriter
import os

from matplotlib import pyplot
import numpy as np

from bioenergetics.model import Model, InterpolatedFunction
from bioenergetics.prey import DaphniaData
from bioenergetics import util

reload(util)

with open("tests/data/monthly-values-max.csv") as fid:
    reader = DictReader(fid)
    monthly_values = [r for r in reader]


def get_values(site, year, month):
    for row in monthly_values:
        if (row["site"] == site and row["year"] == year
                and row["month"] == str(month)):

            try:
                return (
                    float(row["daphnia density"]),
                    float(row["daphnia size"]),
                    float(row["light extinction"]),
                )
            except ValueError:
                return None
Exemplo n.º 24
0
def _parse():
    with open(os.path.join(
        os.path.dirname(__file__), 'data',
        'Average-prices-Property-Type-2018-09.csv',
    )) as f:
        yield from DictReader(f)
Exemplo n.º 25
0
def find_probes(contigs_csv, probes_csv):
    reader = DictReader(contigs_csv)
    columns = ['sample', 'contig']
    for target_name in TARGET_SEQUENCES:
        for column_type in [
                'in_contig_start', 'in_contig_size', 'in_hxb2_start',
                'in_hxb2_size', 'merged_hxb2_start', 'merged_hxb2_size',
                'dist', 'end_dist', 'score', 'is_reversed', 'seq'
        ]:
            columns.append(target_name + '_' + column_type)
    writer = DictWriter(probes_csv, columns)
    writer.writeheader()
    # projects = ProjectConfig.loadDefault()
    # hxb2 = projects.getReference('HIV1-B-FR-K03455-seed')
    hxb2 = utils.hxb2
    gap_open_penalty = 15
    gap_extend_penalty = 3
    use_terminal_gap_penalty = 1
    for sample_name, sample_rows in groupby(reader, itemgetter('sample')):
        contig_num = 0
        for row in sample_rows:
            seed_name = row.get('genotype') or row.get('ref') or row['region']
            conseq_cutoff = row.get('consensus-percent-cutoff')
            if conseq_cutoff and conseq_cutoff != 'MAX':
                continue
            contig_num += 1
            contig_name = f'{contig_num}-{seed_name}'
            contig_seq: str = row.get('contig') or row['sequence']
            aligned_hxb2, aligned_contig_to_hxb2, _ = align_it(
                hxb2, contig_seq, gap_open_penalty, gap_extend_penalty,
                use_terminal_gap_penalty)
            new_row = dict(sample=sample_name, contig=contig_name)
            for target_name, target_seq in TARGET_SEQUENCES.items():
                finder = ProbeFinder(contig_seq, target_seq)
                if not finder.valid:
                    return None
                size = len(finder.contig_match)
                start_pos = finder.start + 1
                end_pos = finder.start + size
                hxb2_pos = contig_pos = 0
                merged_hxb2_start = merged_hxb2_size = None
                for hxb2_nuc, contig_nuc in zip(aligned_hxb2,
                                                aligned_contig_to_hxb2):
                    if hxb2_nuc != '-':
                        hxb2_pos += 1
                    if contig_nuc != '-':
                        contig_pos += 1
                        if contig_pos == start_pos:
                            merged_hxb2_start = hxb2_pos
                        if contig_pos == end_pos:
                            merged_hxb2_size = hxb2_pos - merged_hxb2_start + 1
                            break

                aligned_ref, aligned_match, _ = align_it(
                    hxb2, finder.contig_match, gap_open_penalty,
                    gap_extend_penalty, use_terminal_gap_penalty)
                lstripped_match = aligned_match.lstrip('-')
                in_hxb2_start = len(aligned_match) - len(lstripped_match)
                tail_len = len(lstripped_match) - len(
                    lstripped_match.rstrip('-'))
                ref_match = aligned_ref[in_hxb2_start:-tail_len or None]
                in_hxb2_size = len(ref_match.replace('-', ''))

                prefix = target_name + '_'
                new_row[prefix + 'in_contig_start'] = start_pos
                new_row[prefix + 'in_contig_size'] = size
                new_row[prefix + 'in_hxb2_start'] = in_hxb2_start
                new_row[prefix + 'in_hxb2_size'] = in_hxb2_size
                new_row[prefix + 'merged_hxb2_start'] = merged_hxb2_start
                new_row[prefix + 'merged_hxb2_size'] = merged_hxb2_size
                new_row[prefix + 'dist'] = finder.dist
                new_row[prefix + 'end_dist'] = finder.end_dist
                new_row[prefix + 'score'] = finder.score
                new_row[prefix +
                        'is_reversed'] = ('Y' if finder.is_reversed else 'N')
                new_row[prefix + 'seq'] = finder.contig_match
            writer.writerow(new_row)
from csv import DictReader

data = DictReader(
    open("globalterrorismdb_0616dist.csv", "rt", encoding="ISO-8859-1"))
country_attacks = dict()
for row in data:
    country = row["country_txt"]
    if country in country_attacks.keys():
        country_attacks[country] += 1
    else:
        country_attacks[country] = 1

s = [(k, country_attacks[k])
     for k in sorted(country_attacks, key=country_attacks.get)]
for k, v in s:
    print(k, v)
Exemplo n.º 27
0
    'postcode', 'positional_quality_indicator', 'eastings', 'northings'
]

##
## Transform
##
# We convert OSGB 1936 (https://epsg.io/27700) to WGS 84 (https://epsg.io/4326)
transformer = Transformer.from_crs('EPSG:27700', 'EPSG:4326')

##
## CSV Output
##
out_filenames = ['postcode', 'lat', 'lon']

csv_writer = DictWriter(sys.stdout, fieldnames=out_filenames)
csv_writer.writeheader()

for fieldname in in_filenames:
    with open(fieldname) as file:
        csv_reader = DictReader(file, fieldnames=in_fieldnames)
        for row in csv_reader:
            # Starting Proj version 6 the order of the coordinates changed
            latitude, longitude = transformer.transform(
                row['eastings'], row['northings'])

            csv_writer.writerow({
                'postcode': row['postcode'],
                'lat': '%0.5f' % latitude,
                'lon': '%0.5f' % longitude
            })
Exemplo n.º 28
0
if __name__ == "__main__":
    current_year = 2001
    END_DATE = 2006
    while True:
        if current_year > END_DATE:
            break
        else:
            with open("./feat_data/" + str(current_year) + "_features_2.csv",
                      "a") as analysisfile:
                analysisfile.write(
                    "Identifier,CIK,Ticker,Co_name,IPO_year,Sector,Industry,Historical_Volatilities,Wikipedia_1st_Sent\n"
                )
                csv = list(
                    DictReader(
                        open(
                            "./feat_data/" + str(current_year) +
                            "_features_1.csv", 'rU')))
                csv = sorted(csv)
                dirs = listdir("./org_data/" + str(current_year) + "_train_x/")
                dirs = sorted(dirs)
                print "DIRS ", len(dirs)
                print "FEATURES", len(csv)
                found_count = 0
                not_found = 0
                for name in dirs:
                    found = 0
                    for x in csv:
                        if name == str(x['Identifier']) + ".mda":
                            found_count = found_count + 1
                            found = 1
                            analysisfile.write(
Exemplo n.º 29
0
from csv import DictReader
with open("fighters.csv") as file:
    csv_dictreader = DictReader(file)
    for each in csv_dictreader:
        #     print(each)
        #each row is an ordered dict object
        #here headers will be served as keys.
        #can use this keys to get the values.
        print(each['Name'])
        print(each['Country'])
        print(each['Height (in cm)'])
def read_quotes(filename):
	with open(filename, "r", encoding="utf-8") as file:
		csv_reader = DictReader(file)
		return list(csv_reader)