Expects a single filename argument converts tab delimitation to CSV cuts out second-line of inkjunk python scripts/wrangle-starbucks.py \ data/raw/starbucks_locations.csv \ > data/wrangled/starbucks_locations.csv """ from csv import DictReader, DictWriter from datetime import date from re import search from sys import argv, stdout HEADERS = [ "Store ID", "Name", "Store Number", "Phone Number", "Ownership Type", "Street Combined", "City", "Country Subdivision", "Country", "Postal Code", "Latitude", "Longitude" ] if __name__ == '__main__': srcpath = argv[1] wcsv = DictWriter(stdout, fieldnames=HEADERS) wcsv.writeheader() with open(srcpath, 'r') as rf: rcsv = DictReader(rf) for row in rcsv: wcsv.writerow( {k: v.strip() for k, v in row.items() if k in HEADERS})
def load_data(cursor, config): file_tmpl = '{0}{1}.txt' possible_data = ( 'source', 'election_administration', 'state', 'election', 'election_official', 'locality', 'polling_location', 'precinct', 'precinct_split', 'street_segment', ) for i in possible_data: filename = file_tmpl.format(config.get('Main','data_dir'),i) print "Currently looking at {0}".format(i) if os.path.exists(filename): with open(filename,'r') as r: print "Parsing and loading data from {0}".format(i) sect = i.title() parser_type = config.get(sect,'parser_type') if config.has_section(sect): if parser_type=='csv': klass = dyn_class( config.get(sect,'parser_module'), config.get(sect,'parser_class') ) reader = klass( r, delimiter=chr(config.getint(sect,'delimiter')), quotechar=config.get(sect, 'quotechar'), quoting=QUOTE_MINIMAL ) reader.fieldnames = map(str.upper, reader.fieldnames) elif parser_type=='regex': reader = re.compile(config.get(sect,'regex')) #reader = DictReader(r, delimiter=chr(config.getint(i.title(),'delimiter')), quotechar='"', quoting=QUOTE_MINIMAL) else: reader = DictReader( r, delimiter=chr(config.getint('Parser','delimiter')), quotechar='"', quoting=QUOTE_MINIMAL ) try: for line in get_line(r, reader, parser_type): # trim off any whitespace and escape the values try: line.update(zip(line.iterkeys(), map(str.strip, line.itervalues()))) line.update(zip(line.iterkeys(), map(escape, line.itervalues()))) except: # if this fails, print the offending line print line if i=='source': print line cursor.execute( "INSERT INTO VIP_Info(id,source_id,description,state_id) VALUES (?,?,?,?)", ( line['VIP_ID'], line['ID'], line.get('DESCRIPTION',None), line.get('STATE_ID', config.get('Main','fips')), ) ) elif i=='election_administration': cursor.execute( "INSERT OR IGNORE INTO Election_Administration(id,name,eo_id,mailing_address,city,state,zip,zip_plus,elections_url,registration_url,am_i_registered_url,absentee_url,where_do_i_vote_url) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)", ( line['ID'], line.get('NAME', None), line.get('EO_ID', None), line.get('MAILING_ADDRESS', None), line.get('CITY', None), line.get('STATE', None), line.get('ZIP', None), line.get('ZIP_PLUS', None), line.get('ELECTIONS_URL', None), line.get('REGISTRATION_URL', None), line.get('AM_I_REGISTERED_URL', None), line.get('ABSENTEE_URL', None), line.get('WHERE_DO_I_VOTE_URL', None), ) ) elif i=='state': print line cursor.execute( "INSERT INTO State(id,name,election_administration_id,organization_url) VALUES (?,?,?,?)", ( line.get('ID', config.get('Main','fips')), line.get('NAME'), line.get('ELECTION_ADMINISTRATION_ID'), line.get('ORGANIZATION_URL',''), ) ) elif i=='election': cursor.execute( "INSERT OR IGNORE INTO Election(id,date,election_type,state_id,statewide,registration_info) VALUES (?,?,?,?,?,?)", ( line['ID'], datetime.strptime(line.get('DATE'), config.get('Main','time_format')).strftime('%Y-%m-%d'), line.get('ELECTION_TYPE', "General"), line.get('STATE_ID', config.get('Main','fips')), line.get('STATEWIDE', "Yes"), line.get('REGISTRATION_INFO', None), ) ) elif i=='election_official': cursor.execute( "INSERT INTO Election_Official(id,name,title,phone,fax,email) VALUES (?,?,?,?,?,?)", ( line['ID'], line.get('NAME'), line.get('TITLE', None), line.get('PHONE', None), line.get('FAX', None), line.get('EMAIL', None) ) ) elif i=='locality': cursor.execute( """INSERT OR IGNORE INTO Locality( id, name, state_id, type, election_administration_id ) VALUES (?,?,?,?,?)""", ( line['ID'], line.get('NAME'), line.get('STATE_ID', config.get('Main','fips')), line.get('TYPE'), line.get('ELECTION_ADMINISTRATION_ID'), ) ) elif i=='polling_location': cursor.execute( """INSERT OR IGNORE INTO Polling_Location( id, location_name, line1, city, state, zip ) VALUES (?,?,?,?,?,?)""", ( line['ID'], line.get('LOCATION_NAME'), line.get('LINE1'), line.get('CITY'), line.get('STATE'), line.get('ZIP'), ) ) elif i=='precinct': cursor.execute( "INSERT OR IGNORE INTO Precinct(id,name,locality_id,mail_only) VALUES (?,?,?,?)", ( line['ID'], line.get('NAME'), line.get('LOCALITY_ID', config.get('Main','locality_id')), line.get('MAIL_ONLY',"No"), ) ) if len(line.get('POLLING_LOCATION_ID',""))>0: cursor.execute( "INSERT OR IGNORE INTO Precinct_Polling(precinct_id,polling_location_id) VALUES (?,?)", ( line['ID'], line['POLLING_LOCATION_ID'], ) ) if len(line.get('EARLY_VOTE_SITE_ID',""))>0: cursor.execute( "INSERT OR IGNORE INTO Precinct_Early_Vote(precinct_id,early_vote_site_id) VALUES (?,?)", ( line['ID'], line['EARLY_VOTE_SITE_ID'], ) ) elif i=='precinct_split': if len(line.get('PRECINCT_ID',""))>0: if len(line.get('NAME',''))==0: line['NAME']=line['ID'] line['ID'] = sanitize(line,'ID') cursor.execute( "INSERT OR IGNORE INTO Precinct_Split(id,name,precinct_id) VALUES (?,?,?)", ( line['ID'], line.get('NAME'), line['PRECINCT_ID'], ) ) if len(line.get('ELECTORAL_DISTRICT_ID',""))>0: cursor.execute( "INSERT OR IGNORE INTO Precinct_Split_District(precinct_split_id,electoral_district_id) VALUES (?,?)", ( line['ID'], line['ELECTORAL_DISTRICT_ID'], ) ) if len(line.get('POLLING_LOCATION_ID',""))>0: cursor.execute( "INSERT OR IGNORE INTO Split_Polling(split_id,polling_location_id) VALUES (?,?)", ( line['ID'], line['POLLING_LOCATION_ID'], ) ) elif i=='street_segment': if len(line.get('PRECINCT_SPLIT_ID',""))>0: line['PRECINCT_SPLIT_ID'] = sanitize(line,'PRECINCT_SPLIT_ID') if line.get('STREET_DIRECTION','')=='NULL': line['STREET_DIRECTION'] = None cursor.execute( """INSERT INTO Street_Segment( id, start_house_number, end_house_number, odd_even_both, start_apartment_number, end_apartment_number, street_direction, street_name, street_suffix, address_direction, state, city, zip, precinct_id, precinct_split_id ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", ( line.get('ID'), line.get('START_HOUSE_NUMBER', None), line.get('END_HOUSE_NUMBER', None), line.get('ODD_EVEN_BOTH', None), line.get('START_APARTMENT_NUMBER', None), line.get('END_APARTMENT_NUMBER', None), line.get('STREET_DIRECTION', None), line.get('STREET_NAME', None), line.get('STREET_SUFFIX', None), line.get('ADDRESS_DIRECTION', None), line.get('STATE', config.get('Main', 'state_abbreviation')), line.get('CITY', ''), line.get('ZIP', ''), line.get('PRECINCT_ID', None), line.get('PRECINCT_SPLIT_ID', None), ) ) except CSVError, e: sys.exit("file {0}, line {1}: {2}".format(filename, reader.line_num, e)) except sqlite3.IntegrityError, e: sys.exit("file {0}, line {1}: {2}".format(filename, reader.line_num, e))
storage = Storage(location=args.storage) storage.setup_pipeline(output_paths=[output_path], input_path=input_path) last_updated = storage.pipeline_last_updated print('\nSTART DATE (LAST UPDATE):\t', last_updated.isoformat()) print(f'\n--- START {file_name} ---\n') field_map = field_maps.get(input_path) tool_map = tool_maps.get(input_path) with open(output_path, 'a', newline='') as out_file: writer = DictWriter(out_file, fieldnames=list(field_map.values())) writer.writeheader() with open(input_path, 'r', newline='') as in_file: reader = DictReader(in_file) # process input file by row for row in reader: row_clean = dict.fromkeys(field_map.values()) for col, value in row.items(): if value: if col in tool_map: row_clean[field_map[col]] = tool_map[col]( value.strip()) else: row_clean[field_map[col]] = value.strip() # write processed row to output file writer.writerow(row_clean) print(f"\nUpdating {args.storage} storage ...") storage.finish_pipeline()
def __init__(self, master): # load data datetime_list, barpress_list = [], [] datetime_re = re.compile(r'[\d]{2,4}') # regex to get datetime info for year in range(2012, 2016): file = Path(f'{DATA_FOLDER}/Environmental_Data_Deep_Moor_{year}.txt') print('Loading {0}'.format(file.name)) for row in DictReader(file.open('r'), delimiter='\t'): barpress_list.append(float(row['Barometric_Press'])) datetime_list.append(date2num( datetime(*list(map(int, datetime_re.findall(row['date time '])))))) self.datetime_array = np.array(datetime_list) self.barpress_array = np.array(barpress_list) # build the gui master.title('Weather Statistics') master.resizable(True, True) # maximize Tkinter windows # ref: https://stackoverflow.com/questions/15981000/tkinter-python-maximize-window try: master.state('zoomed') except (TclError): size = master.maxsize() master.geometry('{}x{}+0+0'.format(*size)) # draw the figure matplotlib.rc('font', size=18) figure = Figure() figure.set_facecolor((0, 0, 0, 0)) self.a = figure.add_subplot(111) self.canvas = FigureCanvasTkAgg(figure, master) self.canvas.draw() # add toolbar toolbar_frame = ttk.Frame(master) # needed to put navbar above plot toolbar = NavigationToolbar2Tk(self.canvas, toolbar_frame) toolbar.update() toolbar_frame.pack(side=TOP, fill=X, expand=0) self.canvas._tkcanvas.pack(fill=BOTH, expand=1) controls_frame = ttk.Frame(master) controls_frame.pack() ttk.Label(controls_frame, text='Start', font='Arial 18 bold').grid(row=0, column=0, pady=5) ttk.Label(controls_frame, text='(YYYY-MM-DD HH:MM:SS)', font='Courier 12').grid(row=1, column=0, padx=50, sticky='s') self.start = StringVar() ttk.Entry(controls_frame, width=19, textvariable=self.start, font='Courier 12').grid(row=2, column=0, sticky='n') self.start.set(str(num2date(self.datetime_array[0]))[0:19]) ttk.Label(controls_frame, text='End', font='Arial 18 bold').grid(row=0, column=1, pady=5) ttk.Label(controls_frame, text='(YYYY-MM-DD HH:MM:SS)', font='Courier 12').grid(row=1, column=1, padx=50, sticky='s') self.end = StringVar() ttk.Entry(controls_frame, width=19, textvariable=self.end, font='Courier 12').grid(row=2, column=1, sticky='n') self.end.set(str(num2date(self.datetime_array[-1]))[0:19]) # add button for update time range ttk.Button(controls_frame, text='Update', command=self._update).grid(row=3, column=0, columnspan=2, pady=10) ttk.Style().configure('TButton', font='Arial 18 bold') # call _update() to draw default figure self._update()
try: default = db.cost_center_group.lookup("Default") except KeyError: default = db.cost_center_group.create(name="Default") st_open = db.cost_center_status.lookup('Open') ccs = {} for cc in db.cost_center.getnodeids(): n = db.cost_center.getnode(cc) ccs[n.name.strip()] = cc cc_file = open('Cost-Center_Without-hours.csv', 'r') for line in DictReader(cc_file, delimiter=';'): try: tc = line["Time Category"] except KeyError: tc = line["Name "] try: cc = line["Cost Center"].strip() except KeyError: cc = line["Cost Center "].strip() if not cc: assert (tc.startswith('Gesamt')) continue try: tcid = db.time_project.lookup(tc) except KeyError: try:
def read_quotes(filename): with open(filename, "r") as file: csv_reader = DictReader(file) return list(csv_reader)
from csv import DictReader with open("fighters.csv") as file: csv_reader = DictReader(file) next(csv_reader) for fighter in csv_reader: print(f"{fighter['Name']} is from {fighter['Country']} and is {fighter['Height (in cm)']} cm tall.")
ex, correct_label = ex_tuple if correct_label != predictions[ii]: errors[(ex.split()[0], labels[predictions[ii]])] += 1 for ww, cc in sorted(errors.items(), key=operator.itemgetter(1), reverse=True)[:10]: print("%s\t%i" % (ww, cc)) if __name__ == "__main__": #################### #################### # Cast to list to keep it all in memory train_full = list(DictReader(open("train.csv", 'r'))) train=[] temp_len=len(train_full) for i in range(temp_len): if i % 1 == 0: train.append(train_full[i]) test = list(DictReader(open("test.csv", 'r'))) # limit=len(train) limit_from=4*len(train)/10 + 1 limit=5*len(train)/10 feat = Featurizer() labels = [] for line in train: if not line['cat'] in labels: labels.append(line['cat'])
def main(): from scripts.utils import create_project_tarball, get_stencil_num from scripts.conf.conf import machine_conf, machine_info import os, sys from csv import DictReader import time, datetime dry_run = 1 if len(sys.argv) < 2 else int(sys.argv[1]) time_stamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y%m%d_%H_%M') exp_name = "pluto_increasing_grid_size_at_%s_%s" % ( machine_info['hostname'], time_stamp) tarball_dir = 'results/' + exp_name if (dry_run == 0): create_project_tarball(tarball_dir, "test_" + exp_name) target_dir = 'results/' + exp_name # parse the results to find out which of the already exist data = [] data_file = os.path.join('results', 'summary.csv') try: with open(data_file, 'rb') as output_file: raw_data = DictReader(output_file) for k in raw_data: kernel = get_stencil_num(k) if (kernel == 0): k['stencil'] = '3d25pt' elif (kernel == 1): k['stencil'] = '3d7pt' elif (kernel == 4): k['stencil'] = '3d25pt_var' elif (kernel == 5): k['stencil'] = '3d7pt_var' else: raise data.append(k) except: pass param_l = dict() for k in data: try: param_l[(k['stencil'], int(k['Global NX']), k['LIKWID performance counter'])] = ([ int(k['PLUTO tile size of loop 1']), int(k['PLUTO tile size of loop 3']), int(k['PLUTO tile size of loop 4']) ], int(k['Number of time steps'])) except: print k raise #update the pinning information to use all cores th = machine_info['n_cores'] pin_str = "0-%d " % (th - 1) count = 0 for group in ['MEM']: # for group in ['MEM', 'L2', 'L3', 'DATA', 'TLB_DATA', 'ENERGY']: if (machine_info['hostname'] == 'Haswell_18core'): machine_conf[ 'pinning_args'] = " -m -g " + group + " -C S1:" + pin_str elif (machine_info['hostname'] == 'IVB_10core'): if group == 'TLB_DATA': group = 'TLB' machine_conf['pinning_args'] = " -g " + group + " -C S0:" + pin_str # for k,v in param_l.iteritems(): print k,v count = count + igs_test( dry_run, target_dir, exp_name, param_l=param_l, group=group) print "experiments count =" + str(count)
def get_int_field(field, path): with open(path) as f: for row in DictReader(f): yield (int(row[field]) if row[field] != '' else 0)
FILE = r'../data/iris.csv' FIELDNAMES = [ 'Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'Species', ] with open(FILE) as file: header = file.readline() data = DictReader( f=file, fieldnames=FIELDNAMES, delimiter=',', quoting=QUOTE_NONE) for row in data: print(dict(row)) ## Alternative solution # with open(FILE) as file: # header, *data = DictReader( # f=file, # fieldnames=FIELDNAMES, # delimiter=',', # quoting=QUOTE_NONE) # # for row in data:
def downloadRepo(): numero = 136 with open('arquivo.csv') as repositorios: file = open('arquivo_loc.csv', 'w') fieldnames = [ "Nome", "url", "Data Criacao", "Data de Atualizacao", "Total de releases", "Linguagem", "Idade", "Tempo de Atualizacao em dias", "Loc" ] csv_writer = DictWriter(file, fieldnames=fieldnames) reader = DictReader(repositorios) repoF = [] for repo in reader: status = 'ok' linhas = 0 try: signal.signal(signal.SIGALRM, handler) signal.alarm(600) print("Baixando repositorio " + repo['Nome'] + "" + str(numero)) Git('repositorios').clone(repo['url']) # Analise path = getPath(repo['Nome']) print('analisando o repositorio: ' + repo['Nome']) linhas = int(countlines(path)) except MyTimeout: status = 'falha' print('falha') continue except TimeoutError as exc: repoF.append(repo['url']) status = 'falha' print('falha') continue except Exception as e: repoF.append(repo['url']) status = 'falha' print('falha') continue finally: numero += 1 csv_writer.writerow({ "Nome": repo['Nome'], "url": repo['url'], "Data Criacao": repo['Data Criacao'], "Data de Atualizacao": repo['Data de Atualizacao'], "Linguagem": repo['Linguagem'], "Total de releases": repo['Total de releases'], "Idade": repo['Idade'], "Tempo de Atualizacao em dias": repo['Tempo de Atualizacao em dias'], "Loc": linhas }) print("download do repositorio: " + repo['Nome'] + " status:" + status)
def __init__(self, pz, size, price_tick, variable_commission, fixed_commission, slippage, exchangeID): """Constructor""" self.pz = pz self.size = size self.price_tick = price_tick self.variable_commission = variable_commission self.fixed_commission = fixed_commission self.slippage = slippage self.exchangeID = exchangeID contract_dict = {} filename_setting_fut = get_dss() + 'fut/cfg/setting_pz.csv' with open(filename_setting_fut, encoding='utf-8') as f: r = DictReader(f) for d in r: contract_dict[d['pz']] = Contract(d['pz'], int(d['size']), float(d['priceTick']), float(d['variableCommission']), float(d['fixedCommission']), float(d['slippage']), d['exchangeID']) def get_contract(symbol): pz = symbol[:2] if pz.isalpha(): pass else: pz = symbol[:1]
from csv import DictReader from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score from sklearn.metrics import log_loss from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import roc_auc_score data_path = sys.argv[1] result_path = sys.argv[2] label_path = data_path + 'validation.csv' predict_path = result_path + 'submission.csv' label_reader = DictReader(open(label_path)) predict_reader = DictReader(open(predict_path)) count = 0 y_true = [] y_pred = [] y_scores = [] for t, row in enumerate(label_reader): predict = predict_reader.__next__() actual = float(row['label']) predicted = float(predict['prob']) y_true.append(actual) y_scores.append(predicted) # 大于阈值的即视为点击 if (predicted >= 0.5):
from codecs import open from csv import DictReader from lxml import etree as et with open("sanisettesparis.csv", encoding="utf8") as inp: data = DictReader(inp, delimiter=";") wcs = et.Element("toilettes") for item in data: wc = et.SubElement(wcs, "toilette", type=item["TYPE"], statut=item["STATUT"]) ad = et.SubElement(wc, "adresse") libelle = et.SubElement(ad, "libelle") libelle.text = item["ADRESSE"] district = et.SubElement(ad, "arrondissement") district.text = item["ARRONDISSEMENT"] openh = et.SubElement(wc, "horaire") openh.text = item["HORAIRE"] serv = et.SubElement(wc, "services") access = et.SubElement(serv, "acces-pmr") access.text = item["ACCES_PMR"] bebe = et.SubElement(serv, "relais-bebe") bebe.text = item["RELAIS_BEBE"] equipement = et.SubElement(wc, "equipement") equipement.text = item["URL_FICHE_EQUIPEMENT"] with open("toilettes-paris.xml", "w", encoding="utf8") as outp: outp.write( et.tostring(wcs, pretty_print=True, xml_declaration=True,
from math import sqrt, exp, log from csv import DictReader, writer import pandas as pd import numpy as np if __name__ == "__main__": adsfile = "../Data/AdsInfo.tsv" outfile = writer(open("../Data/AdsPreProcessed.tsv", "w")) outfile.writerow([ 'AdID', 'Price', 'CategoryID', 'NumParams', 'Title', 'CatLevel', 'ParentCategoryID', 'SubCategoryID' ]) # Reading Categories file and store in a dict # catfile = "../Data/Category.tsv" tsv_reader = DictReader(open(catfile), delimiter='\t') cat_dict = {} for row in tsv_reader: cat_dict[row['CategoryID']] = [ row['Level'], row['ParentCategoryID'], row['SubcategoryID'] ] count = 0 total_count = 0 for t, line in enumerate(DictReader(open(adsfile), delimiter='\t')): total_count += 1 if line['IsContext'] == '1': count += 1 try: num_params = len(eval(line['Params']).keys()) except:
# -*- coding: utf-8 -*- from csv import DictReader from collections import Counter from os import path import sys sys.path.append( path.join( path.dirname(__file__), '..', 'lib' ) ) from lib import * # get docs which weren't coded notCodedfn = path.join( path.dirname(__file__), "secondStabCoding.notCoded.csv" ) with open(notCodedfn) as notCodedf: notCoded = list(DictReader(notCodedf)) notCoded = [x['fn'] for x in notCoded] inFn = path.join( path.dirname(__file__), "..", "data","extracted.nice.csv" ) fsC = Counter() f500C = Counter() bodyC = Counter() nC = Counter() vC = Counter() globalC = Counter() examples = {}
def __iter__(self): for line in DictReader(self._file): converted = dict([(key.upper(), self._convert(val)) for key, val in six.iteritems(line)]) yield converted
amounts = [float(row['LoanAmount']) for row in loan_table if row['LoanAmount'] != ''] loan_account_mean = sum(amounts)/len(amounts) ''' import sys def report( name, shortd, longd): d = {'Name': name, 'Short': shortd, 'Long': longd} print(str(d)) #Mock data goes first from csv import DictReader # helps with handling csv formatted data from urllib2 import urlopen # helps with pulling data off the web url = 'https://docs.google.com/spreadsheets/d/1_artlzgoj6pDBCBfdt9-Jmc9RT9yLsZ0vTnk3zJmt_E/pub?gid=1291197392&single=true&output=csv' response = urlopen(url) loan_table = [row for row in DictReader(response)] # a mapping function using identity xloan_table = loan_table # in case user screws with loan_table try: &&& # paste user code here except Exception as e: report('Generic error', 'On your own', e) sys.exit(1) try: loan_account_mean # does var exist? except NameError as e: report('Name error', 'Typically a typo', e) sys.exit(1)
def read_csv(csvfile): with open(csvfile) as f: reader = DictReader(f) for row in reader: yield row
"A balanced portfolio is most appropriate for those with medium time horizons and moderate risk tolerance. This portfolio is balanced between equity and fixed income" ) p4 = Portfolios( name='Aggressive', ITOT=50, VEA=15, VNQ=10, GLD=5, AGG=20, fees=.0006, desc= "A aggressive portfolio is most appropriate for those with longer time horizons, young in age, and a higher risk tolerance." ) p5 = Portfolios( name='All Equity', ITOT=60, VEA=20, VNQ=15, GLD=5, fees=.0007, desc= "A all equity portfolio is most appropriate for those with longer time horizons, young in age, higher risk tolerance and high capacity to take risk.This portfolio is 100% weighted towards equity" ) db.session.add_all([p1, p2, p3, p4, p5]) db.session.commit() with open('generator/etfs.csv') as etfs: db.session.bulk_insert_mappings(ETFs, DictReader(etfs)) db.session.commit()
"text": quote.find(class_="text").get_text(), "author": quote.find(class_="author").get_text(), "bio-link": quote.find("a")["href"] }) #automate parsing every page using Next link next_btn = soup.find(class_="next") url = next_btn.find("a")["href"] if next_btn else None # sleep(1) #wait 2 second btwn scrapping. with open("test333csvresult.csv", "r") as file: # csv_reader = reader(file) csv_reader = DictReader(file) #OrderedDictionary for quote in csv_reader: print(quote) # print(quote['text']) print("------------------------------------") # choice = choice(list(csv_reader)) # print(choice) quote = csv_reader # print(list(csv_reader)) #one list consits of lists # quote = choice(csv_reader) #choose from list or set, not dict. # print(quote) # # print(quote[0]) # print(quote["text"]) # remaining_guesses = 4
from importlib import reload from csv import DictReader, DictWriter import os from matplotlib import pyplot import numpy as np from bioenergetics.model import Model, InterpolatedFunction from bioenergetics.prey import DaphniaData from bioenergetics import util reload(util) with open("tests/data/monthly-values-max.csv") as fid: reader = DictReader(fid) monthly_values = [r for r in reader] def get_values(site, year, month): for row in monthly_values: if (row["site"] == site and row["year"] == year and row["month"] == str(month)): try: return ( float(row["daphnia density"]), float(row["daphnia size"]), float(row["light extinction"]), ) except ValueError: return None
def _parse(): with open(os.path.join( os.path.dirname(__file__), 'data', 'Average-prices-Property-Type-2018-09.csv', )) as f: yield from DictReader(f)
def find_probes(contigs_csv, probes_csv): reader = DictReader(contigs_csv) columns = ['sample', 'contig'] for target_name in TARGET_SEQUENCES: for column_type in [ 'in_contig_start', 'in_contig_size', 'in_hxb2_start', 'in_hxb2_size', 'merged_hxb2_start', 'merged_hxb2_size', 'dist', 'end_dist', 'score', 'is_reversed', 'seq' ]: columns.append(target_name + '_' + column_type) writer = DictWriter(probes_csv, columns) writer.writeheader() # projects = ProjectConfig.loadDefault() # hxb2 = projects.getReference('HIV1-B-FR-K03455-seed') hxb2 = utils.hxb2 gap_open_penalty = 15 gap_extend_penalty = 3 use_terminal_gap_penalty = 1 for sample_name, sample_rows in groupby(reader, itemgetter('sample')): contig_num = 0 for row in sample_rows: seed_name = row.get('genotype') or row.get('ref') or row['region'] conseq_cutoff = row.get('consensus-percent-cutoff') if conseq_cutoff and conseq_cutoff != 'MAX': continue contig_num += 1 contig_name = f'{contig_num}-{seed_name}' contig_seq: str = row.get('contig') or row['sequence'] aligned_hxb2, aligned_contig_to_hxb2, _ = align_it( hxb2, contig_seq, gap_open_penalty, gap_extend_penalty, use_terminal_gap_penalty) new_row = dict(sample=sample_name, contig=contig_name) for target_name, target_seq in TARGET_SEQUENCES.items(): finder = ProbeFinder(contig_seq, target_seq) if not finder.valid: return None size = len(finder.contig_match) start_pos = finder.start + 1 end_pos = finder.start + size hxb2_pos = contig_pos = 0 merged_hxb2_start = merged_hxb2_size = None for hxb2_nuc, contig_nuc in zip(aligned_hxb2, aligned_contig_to_hxb2): if hxb2_nuc != '-': hxb2_pos += 1 if contig_nuc != '-': contig_pos += 1 if contig_pos == start_pos: merged_hxb2_start = hxb2_pos if contig_pos == end_pos: merged_hxb2_size = hxb2_pos - merged_hxb2_start + 1 break aligned_ref, aligned_match, _ = align_it( hxb2, finder.contig_match, gap_open_penalty, gap_extend_penalty, use_terminal_gap_penalty) lstripped_match = aligned_match.lstrip('-') in_hxb2_start = len(aligned_match) - len(lstripped_match) tail_len = len(lstripped_match) - len( lstripped_match.rstrip('-')) ref_match = aligned_ref[in_hxb2_start:-tail_len or None] in_hxb2_size = len(ref_match.replace('-', '')) prefix = target_name + '_' new_row[prefix + 'in_contig_start'] = start_pos new_row[prefix + 'in_contig_size'] = size new_row[prefix + 'in_hxb2_start'] = in_hxb2_start new_row[prefix + 'in_hxb2_size'] = in_hxb2_size new_row[prefix + 'merged_hxb2_start'] = merged_hxb2_start new_row[prefix + 'merged_hxb2_size'] = merged_hxb2_size new_row[prefix + 'dist'] = finder.dist new_row[prefix + 'end_dist'] = finder.end_dist new_row[prefix + 'score'] = finder.score new_row[prefix + 'is_reversed'] = ('Y' if finder.is_reversed else 'N') new_row[prefix + 'seq'] = finder.contig_match writer.writerow(new_row)
from csv import DictReader data = DictReader( open("globalterrorismdb_0616dist.csv", "rt", encoding="ISO-8859-1")) country_attacks = dict() for row in data: country = row["country_txt"] if country in country_attacks.keys(): country_attacks[country] += 1 else: country_attacks[country] = 1 s = [(k, country_attacks[k]) for k in sorted(country_attacks, key=country_attacks.get)] for k, v in s: print(k, v)
'postcode', 'positional_quality_indicator', 'eastings', 'northings' ] ## ## Transform ## # We convert OSGB 1936 (https://epsg.io/27700) to WGS 84 (https://epsg.io/4326) transformer = Transformer.from_crs('EPSG:27700', 'EPSG:4326') ## ## CSV Output ## out_filenames = ['postcode', 'lat', 'lon'] csv_writer = DictWriter(sys.stdout, fieldnames=out_filenames) csv_writer.writeheader() for fieldname in in_filenames: with open(fieldname) as file: csv_reader = DictReader(file, fieldnames=in_fieldnames) for row in csv_reader: # Starting Proj version 6 the order of the coordinates changed latitude, longitude = transformer.transform( row['eastings'], row['northings']) csv_writer.writerow({ 'postcode': row['postcode'], 'lat': '%0.5f' % latitude, 'lon': '%0.5f' % longitude })
if __name__ == "__main__": current_year = 2001 END_DATE = 2006 while True: if current_year > END_DATE: break else: with open("./feat_data/" + str(current_year) + "_features_2.csv", "a") as analysisfile: analysisfile.write( "Identifier,CIK,Ticker,Co_name,IPO_year,Sector,Industry,Historical_Volatilities,Wikipedia_1st_Sent\n" ) csv = list( DictReader( open( "./feat_data/" + str(current_year) + "_features_1.csv", 'rU'))) csv = sorted(csv) dirs = listdir("./org_data/" + str(current_year) + "_train_x/") dirs = sorted(dirs) print "DIRS ", len(dirs) print "FEATURES", len(csv) found_count = 0 not_found = 0 for name in dirs: found = 0 for x in csv: if name == str(x['Identifier']) + ".mda": found_count = found_count + 1 found = 1 analysisfile.write(
from csv import DictReader with open("fighters.csv") as file: csv_dictreader = DictReader(file) for each in csv_dictreader: # print(each) #each row is an ordered dict object #here headers will be served as keys. #can use this keys to get the values. print(each['Name']) print(each['Country']) print(each['Height (in cm)'])
def read_quotes(filename): with open(filename, "r", encoding="utf-8") as file: csv_reader = DictReader(file) return list(csv_reader)