main_metric_reader = csv.DictReader( open('../data/all_treated_editor_set_sort_by_date.csv', 'r')) all_wiki_reader = csv.DictReader( open('../data/history_all_wikipedia_sort_by_date.csv', 'r')) all_wiki_line = all_wiki_reader.next() writer = csv.DictWriter( open('../data/all_treated_allwikireten_currentweek.csv', 'wb'), fieldnames=[ 'ArticleId', 'RelWeek', 'SumOldEditorRetenAllWiki', 'SumNewEditorRetenAllWiki', 'SumPreShockRetenAllWiki', 'SumPostShockRetenAllWiki', 'SumNewWikiRetenAllWiki', 'SumNewNonWikiRetenAllWiki', 'MeanOldEditorRetenAllWiki', 'MeanNewEditorRetenAllWiki', 'MeanPreShockRetenAllWiki', 'MeanPostShockRetenAllWiki', 'MeanNewWikiRetenAllWiki', 'MeanNewNonWikiRetenAllWiki', 'MedOldEditorRetenAllWiki', 'MedNewEditorRetenAllWiki', 'MedPreShockRetenAllWiki', 'MedPostShockRetenAllWiki', 'MedNewWikiRetenAllWiki', 'MedNewNonWikiRetenAllWiki', 'LogMeanOldEditorRetenAllWiki', 'LogMeanNewEditorRetenAllWiki', 'LogMeanPreShockRetenAllWiki', 'LogMeanPostShockRetenAllWiki', 'LogMeanNewWikiRetenAllWiki', 'LogMeanNewNonWikiRetenAllWiki' ], extrasaction='ignore') writer.writeheader() CurrStartDate = date(2000, 1, 1) CurrEndDate = CurrStartDate + timedelta(days=6) CurrRetentionStartDate = CurrStartDate + timedelta(days=7) CurrRetentionEndDate = CurrStartDate + timedelta(days=34) AllWikiRevDict = {}
def writeHeader(file): if os.stat(file).st_size == 0: with open(file, "wb") as write: fieldnames = ["item_id", "item_name", "item_price"] writer = csv.DictWriter(write, fieldnames=fieldnames) writer.writeheader()
def y_cruncher(self): needHeader = False if not os.path.isfile(const.datadir+'y_cruncher.csv'): needHeader = True os.system("mkdir "+const.datadir) with open(const.datadir+'y_cruncher.csv', 'a') as fout: row = OrderedDict([('instanceID', None), ('experimentID', None), ('instanceType', None), ('memoryInfo', None), ('processorInfo', None), ('sysTopology', None), ('osVersion', None), ('testStartTime', None), ('availableMemory', None), ('isMultiThread', None), ('cpuUtilization', None), ('multiCoreEfficiency', None), ('computationTime', None), ('benchmarkTime', None), ('wallTime', None) ]) # benchmarkTime = computationTime + I/O operation overhead writer = csv.DictWriter(fout, fieldnames=row) if needHeader: writer.writeheader() row['instanceType'] = self.kw['instanceType'] row['instanceID'] = self.kw['instanceID'] row['experimentID'] = self.kw['experimentID'] row['wallTime'] = self.kw['duration'] # row['testOption']=self.kw['testOption'] for line in self.string: if line.find('Multi-core Efficiency') != -1: obj = re.search(r'(\d*\.\d* %)', line) row['multiCoreEfficiency'] = obj.group(1) if line.find('CPU Utilization') != -1: obj = re.search(r'(\d*\.\d* %)', line) row['cpuUtilization'] = obj.group(1) if line.find('Multi-Threading') != -1: obj = re.search(r'\[01;36m(\w*)', line) row['isMultiThread'] = obj.group(1) if line.find('Available Memory') != -1: obj = re.search(r'1;33m(.*?B)', line) row['availableMemory'] = obj.group(1) if line.find('Version') != -1: obj = re.search(r'(\s+)(.*)', line) row['osVersion'] = obj.group(2) if line.find('Topology') != -1: obj = re.search(r'(\s+)(.*)', line) row['sysTopology'] = obj.group(2) if line.find('Processor(s):') != -1: obj = re.search(r'(\s+)(.*)', line) row['processorInfo'] = obj.group(2) if line.find('Usable Memory') != -1: obj = re.search(r'\((.*?B)', line) row['memoryInfo'] = obj.group(1) if line.find('Start Time') != -1: obj = re.search( r'Start Time: .*?(01;33m)(.*)(\[01;37m)', line) row['testStartTime'] = obj.group(2) if line.find('Wall Time') != -1: obj = re.search(r'(\d*\.\d*).*seconds', line) row['benchmarkTime'] = obj.group(1) if line.find('Total Computation') != -1: obj = re.search(r'(\d*\.\d*).*seconds', line) row['computationTime'] = obj.group(1) # TODO more attributes writer.writerow(row)
def draw(self, screen, weather, updated): if weather is None or not updated: return current = weather["current"] daily = weather["daily"][0] short_summary = _(current["weather"][0]["main"]) icon = current["weather"][0]["icon"] temperature = current["temp"] humidity = current["humidity"] feels_like = current["feels_like"] pressure = current["pressure"] uv_index = int(current["uvi"]) try: rain_1h = current["rain"]["1h"] except KeyError: rain_1h = '0' windspeed = current["wind_speed"] try: windgust = current["wind_gust"] except KeyError: windgust = 'nan' print(windgust) long_summary = daily["weather"][0]["description"] temperature_high = daily["temp"]["max"] temperature_low = daily["temp"]["min"] heat_color = Utils.heat_color(temperature, humidity, self.units) uv_color = Utils.uv_color(uv_index) weather_icon = Utils.weather_icon(icon, self.icon_size) #temperature = Utils.temperature_text(int(temperature), self.units) temperature = Utils.temperature_text(round(temperature, 1), self.units) feels_like = Utils.temperature_text(int(feels_like), self.units) temperature_low = Utils.temperature_text(int(temperature_low), self.units) temperature_high = Utils.temperature_text(int(temperature_high), self.units) humidity = Utils.percentage_text(humidity) uv_index = str(uv_index) pressure = Utils.pressure_text(int(pressure)) """ HistoryGraphLog - log data to GraphDatalog.txt """ # TODO: Add maintenance of GraphDataLog.txt for removing old data to keep file small. xtemperature = temperature xtemperature = xtemperature[:-2] xpressure = pressure xpressure = xpressure[:-2] xtimestamp = time.strftime('%m-%d-%Y %H:%M:%S') graph = "GraphDataLog.txt" file = open(graph, "a", newline='') with file: myfields = ['xdate', 'temp', 'press', 'rain_1h', 'windspeed', 'windgust'] writer = csv.DictWriter(file, fieldnames=myfields) #writer.writeheader() writer.writerow({'xdate': xtimestamp, 'temp': xtemperature, 'press': xpressure, 'rain_1h': rain_1h, 'windspeed': windspeed, 'windgust': windgust}) #file.close() df = pandas.read_csv(graph) # convert to datetime df['xdate'] = pandas.to_datetime(df['xdate']) # calculate mask m1 = df['xdate'] >= (pandas.to_datetime('now') - pandas.DateOffset(days=1)) m2 = df['xdate'] <= pandas.to_datetime('now') #mask = m1 & m2 mask = m1 # output masked dataframes # df[~mask].to_csv('out1.csv', index=False) #Remove time from datetime #df['xdate'] = pandas.to_datetime(df['xdate']).dt.date df[mask].to_csv('GraphData.csv', index=False) """ END GraphLog """ text_x = weather_icon.get_size()[0] text_width = self.rect.width - text_x message1 = self.text_warp("{} {}".format(temperature, short_summary), text_width, "medium", bold=True, max_lines=1)[0] message2 = "{} {} {} {} {} {}".format(_("Feels Like"), feels_like, _("Low"), temperature_low, _("High"), temperature_high) if self.text_size(message2, "small")[0] > text_width: message2 = "Feel {} {} - {}".format(feels_like, temperature_low, temperature_high) message3 = "{} {} {} {} {} {}".format(_("Humidity"), humidity, _("Pressure"), pressure, _("UVindex"), uv_index) if self.text_size(message3, "small")[0] > text_width: message3 = "{} {} UV {}".format(humidity, pressure, uv_index) max_lines = int((self.rect.height - 55) / 15) message4s = self.text_warp(long_summary, text_width, "small", bold=True, max_lines=max_lines) self.clear_surface() self.draw_image(weather_icon, (0, 0)) self.draw_text(message1, (text_x, 15), "large", heat_color, bold=True) self.draw_text(message2, (text_x, 52), "small", "white") i = message3.index("UV") (right, _bottom) = self.draw_text(message3[:i], (text_x, 70), "small", "white") self.draw_text(message3[i:], (right, 70), "small", uv_color, bold=True) height = 70 + (15 * (max_lines - len(message4s))) / 2 for message in message4s: self.draw_text(message, (text_x, height), "small", "blue", bold=True) height += 15 self.update_screen(screen)
def DictWriter(f, fieldnames): return csv.DictWriter(f, fieldnames, delimiter='\t', dialect='excel-tab')
def results(fname, cfg): with open(fname, 'w', newline='') as csvfile: lines = [] fieldnames = ['ID', 'GeCo2 bytes', 'GeCo3 bytes', 'GeCo2 secs', 'GeCo3 secs', 'Mode', 'L.Rate', 'H.Nodes'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() totalb2 = 0 totalb3 = 0 totals2 = 0 totals3 = 0 for [l, lr, hs, f] in cfg: if l == '16': cmd = ['./GeCo2', '-lr', lr, '-hs', hs, f] cmd[1:1] = g2l16.split() out = subprocess.check_output(cmd) else: out = subprocess.check_output(['./GeCo2','-l', l, '-lr', lr, '-hs', hs, f]) sout = str(out) bytes2 = sout.split('Total bytes: ')[1].split()[0] bytes2 = int(bytes2) totalb2 = totalb2 + bytes2 bytes2 = format(bytes2, ',d') secs2 = sout.split('Spent ')[1].split()[0] secs2 = float(secs2) totals2 = totals2 + secs2 secs2 = str(round(secs2, 1)) out = subprocess.check_output(['./GeCo3','-l', l, '-lr', lr, '-hs', hs, f]) sout = str(out) bytes3 = sout.split('Total bytes: ')[1].split()[0] bytes3 = int(bytes3) totalb3 = totalb3 + bytes3 bytes3 = format(bytes3, ',d') secs3 = sout.split('Spent ')[1].split()[0] secs3 = float(secs3) totals3 = totals3 + secs3 secs3 = str(round(secs3, 1)) d = {'ID': os.path.basename(f), 'GeCo2 bytes': bytes2, 'GeCo3 bytes': bytes3, 'GeCo2 secs': secs2, 'GeCo3 secs': secs3, 'Mode': l, 'L.Rate': lr, 'H.Nodes': hs} lines.append(d) print(d) for l in reversed(lines): writer.writerow(l) totalb2 = format(totalb2, ',d') totalb3 = format(totalb3, ',d') totals2 = str(round(totals2, 1)) totals3 = str(round(totals3, 1)) d = {'ID': 'Total', 'GeCo2 bytes': totalb2, 'GeCo3 bytes': totalb3, 'GeCo2 secs': totals2, 'GeCo3 secs': totals3, 'Mode': '', 'L.Rate': '', 'H.Nodes': ''} writer.writerow(d)
result = re.search(r"\((\w.+)\)", user) #takes the username as catching group if result and result[1] not in username_list: username_list.append(result[1]) username_list.sort() for name in username_list: per_user = {"Username": "", "INFO": 0, "ERROR": 0} per_user["Username"] = name for line in events: if re.search(r"INFO \w.* \(" + name + "\)", line): per_user["INFO"] = per_user.get("INFO", 0) + 1 elif re.search(r"ERROR \w.* \(" + name + "\)", line): per_user["ERROR"] = per_user.get("ERROR", 0) + 1 userdata_list.append(per_user) #----------generate error_message.csv-----------# keys = ["Error", "Count"] with open("error_message.csv", "w") as error_message_report: writer = csv.DictWriter(error_message_report, fieldnames=keys) writer.writeheader() writer.writerows(error_sorted) #----------generate user_statistics.csv-----# keys = ["Username", "INFO", "ERROR"] with open("user_statistics.csv", "w") as user_statistics_report: writer = csv.DictWriter(user_statistics_report, fieldnames=keys) writer.writeheader() writer.writerows(userdata_list)
# plt.ylabel(column2) plt.title("{0} x {1}".format(column1, column2)) for poly_order in polyDeg: coefs = np.polyfit( x, y, poly_order) # we also want to do this for 2, 3 f = np.poly1d(coefs) #print(np.poly1d(f)) xs, new_line = generate_points(f, min(x), max(x)) # plt.plot(xs, new_line) plt.plot(xs, new_line, color="red") #Uncomment this line for the pairs plot if not debug: # Note: I have spent no effort making it pretty, and recommend that you do :) plt.show() plt.legend() # plt.tight_layout() # plt.show() plt.savefig("./my_pairs_plot.png") plotting(our_dictionary) #plotting(different_dictionary) #print(our_dictionary) #split each string in list to get a lis of list and I use the function checkdelim to see which delimiter is used with open("myfile.csv", "w") as myCSV: w = csv.DictWriter(myCSV, our_dictionary.keys()) w.writeheader() w.writerow(our_dictionary)
def write_csv(data,filename): with open(filename, 'w+') as outf: writer = csv.DictWriter(outf, data[0].keys()) writer.writeheader() for row in data: writer.writerow(row)
infields = [ 'id', 'str_resource', 'str_description', 'website', 'meta_title', 'meta_description', 'stage_list', 'task_list' ] outfields = infields + ['stage_list_facet', 'task_list_facet'] with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile: # skip header row in order to use own fieldnames next(infile) # instantiate the reader and writer objects dr = csv.DictReader(infile, fieldnames=infields) dw = csv.DictWriter(outfile, fieldnames=outfields) dw.writeheader() exp = re.compile(r'\d+::([^\b])') # loop over the input file, writing results to output file for row in dr: # remove hash marks from URL m = re.search('#(.+)#', row['website']) if m: row['website'] = m.group(1) # remove spaces from all multivalued fields row['stage_list_facet'] = row['stage_list'].replace('; ', ';') row['task_list_facet'] = row['task_list'].replace('; ', ';')
uid = int(uid) try: val = float(val) except ValueError: print('非数字输入,将保留文本格式') df = pd.read_csv('rules') df.set_value(df.set_index('用户id').index.get_loc(uid), para, val) df.to_csv('rules', index=False) print('已成功更改', uid, '的认领信息') filename = 'rules' with open(filename, 'a', newline='', encoding='utf-8') as f: fieldnames = ['用户id', '最小单种体积', '最少总认领体积', \ '最少总认领数', '最少做种时间', \ '认领名次合格种子数比例', '认领名次合格种子数体积比例', \ '认领名次魔力比例', '合格认领小组', '不合格认领分类', \ '工资比例', '工资体积系数', '工资种子寿命系数', \ '工资做种时间系数', '工资做种人数系数', '最多允许同伴数', \ '最少第一认领占体积比', '备注'] thewriter = csv.DictWriter(f, fieldnames=fieldnames) existed_uid = set() # get all the existed uid in a set to aovid dup rules rf = csv.DictReader(open(filename, encoding='utf-8')) for i in rf: existed_uid.add(i['用户id'])
if domain not in href: if href not in link_list and href.startswith("http"): print "[*] Discovered external link: %s" % href link_list.append(href) return link_list record_list = search_domain(domain) link_list = [] for record in record_list: html_content = download_page(record) print "[*] Retrieved %d bytes for %s" % (len(html_content), record['url']) link_list = extract_external_links(html_content, link_list) print "[*] Total external links discovered: %d" % len(link_list) with codecs.open("%s-links.csv" % domain, "wb", encoding="utf-8") as output: fields = ["URL"] logger = csv.DictWriter(output, fieldnames=fields) logger.writeheader() for link in link_list: logger.writerow({"URL": link})
outfile = open(outfilename, 'w') # write the file data row to the first line of the output file outfile.write(filedata) reader = csv.DictReader(infile) dataheaders = reader.fieldnames parser = multiline_parser(line_captures) field_names = parser.get_keys(dataheaders) # also filetype field_names = ['filetype'] + field_names outfile.write(",".join(field_names) + "\n") dw = csv.DictWriter(outfile, fieldnames=field_names, restval='', extrasaction='ignore') (contracts, invoices, orders, nabs) = (0, 0, 0, 0) total_rows = 0 for row in reader: total_rows += 1 filename = "../../" + row['txt_location'] fileid = row['fcc_id'] intid = int(fileid) url_fixed = row['file_url'].upper() url_fixed = url_fixed.replace("%20", " ") url_fixed = url_fixed.replace("%2D", "/") ## This is a KYW-specific naming convention, apparently. #print "\n\n" + row['file_url']
def validate_and_process(): fips_set = None with open(os.path.join(repo_root, 'data/county_3220.geojson')) as in_file: data = json.load(in_file) features = data['features'] fips_list = [str(int(x['properties']['GEOID'])) for x in features] fips_set = set(fips_list) # note: we need to open the files with encoding `utf-8-sig` to correctly parse # the byte-order mark (bom) of the sources files # https://stackoverflow.com/a/49150749 with open(os.path.join(dir_path, '_working/testing_usafacts.csv'), encoding='utf-8-sig') as testing_in_file, open( os.path.join(dir_path, '_working/positivity_raw.csv'), encoding='utf-8-sig') as positivity_in_file, open( os.path.join(repo_root, 'docs/testing_usafacts.csv'), 'w+') as testing_out_file, open( os.path.join(repo_root, 'docs/testingpos_usafacts.csv'), 'w+') as positivity_out_file: testing_csv_reader = csv.DictReader(testing_in_file) testing_source_field_names = testing_csv_reader.fieldnames positivity_csv_reader = csv.DictReader(positivity_in_file) positivity_source_field_names = positivity_csv_reader.fieldnames ###!Not sure about the following error handling. The code worked without it. # VALIDATE: make sure testing contain yesterday's data #yesterday = datetime.now(pytz.timezone('US/Central')) - timedelta(days=1) #yesterday_source_field = yesterday.strftime('%-m/%-d/%y') #print(yesterday) #print(yesterday_source_field) #testing_last_date = testing_source_field_names[-1] #print(testing_last_date) #if testing_last_date != yesterday_source_field: # raise ValueError("Testing do not contain yesterday's data; last date {}".format(testing_last_date)) # pass # VALIDATE: make sure positivity contain yesterday's data #positivity_last_date = positivity_source_field_names[-1] #print(positivity_last_date) #if positivity_last_date != yesterday_source_field: # raise ValueError("Positivity do not contain yesterday's data; last date {}".format(positivity_last_date)) # pass testing_out_rows = [] positivity_out_rows = [] # VALIDATE: make sure all testing rows belong to a known county for testing_row in testing_csv_reader: fips = testing_row['countyFIPS'] county_name = testing_row['County Name'] state_abbr = testing_row['State'] if fips not in fips_set: print( 'WARNING: Testing - Skipping unknown county based on FIPS ({}): {} County, {}' .format(fips, county_name, state_abbr)) continue testing_out_rows.append(testing_row) # VALIDATE: make sure all positivity rows belong to a known county for positivity_row in positivity_csv_reader: fips = positivity_row['countyFIPS'] county_name = positivity_row['County Name'] state_abbr = positivity_row['State'] if fips not in fips_set: print( 'WARNING: Positivity - Skipping unknown county based on FIPS ({}): {} County, {}' .format(fips, county_name, state_abbr)) continue positivity_out_rows.append(positivity_row) ''' LOAD ''' out_field_names = list(testing_out_rows[0].keys()) testing_csv_writer = csv.DictWriter(testing_out_file, fieldnames=out_field_names) testing_csv_writer.writeheader() testing_csv_writer.writerows(testing_out_rows) positivity_csv_writer = csv.DictWriter(positivity_out_file, fieldnames=out_field_names) positivity_csv_writer.writeheader() positivity_csv_writer.writerows(positivity_out_rows) print('Finished.')
INPUT = sys.argv[1] OUTPUT = 'birthdates.csv' BASE_URL = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=claims' # START_AFTER = ('de', 'Nikolai_Gergow') START_AFTER = None def create_url(lang, name): return '%s&sites=%swiki&titles=%s' % (BASE_URL, lang, name) with open(INPUT, 'r') as input_file, \ open(OUTPUT, 'a') as outputfile: reader = csv.DictReader(input_file) writer = csv.DictWriter(outputfile, fieldnames=['name', 'lang', 'birth_date']) writer.writeheader() if START_AFTER is not None: for line in reader: if (line['lang'], line['name']) == START_AFTER: break for line in reader: url = create_url(line['lang'], line['name']) print(url) r = requests.get(url) data = r.json()
def create(self,row,schema): with open(self.table_name, mode='a') as f: writer = csv.DictWriter(f, fieldnames=schema) writer.writerow(row)
inStoreAvailability = item.get('inStoreAvailability', 'false') if inStoreAvailability == 'false': inStoreAvailability = False else: inStoreAvailability = True data = { 'sku': sku, 'name': name, 'regularPrice': regular_price, 'salePrice': sale_price, 'type': typex, # 'upc': upc, 'url': url, 'image': image, 'inStoreAvailability': inStoreAvailability } csv_writer.writerow(data) if __name__ == '__main__': with open(output_filename, 'w') as fout: csv_writer = csv.DictWriter( fout, fieldnames=('sku', 'name', 'regularPrice', 'salePrice', 'type', 'url', 'image', 'inStoreAvailability')) xml_files = sorted(os.listdir(data_folder)) for filename in xml_files: full_path = os.path.join(data_folder, filename) get_products_from_file(full_path, csv_writer)
outjson.close() # Load the json to a Python object and write to csv outputfile_csv = datapath + ddate + '/' + 'irs_index_' + str(year) + '_' + udate + '.csv' with open(outputfile_json, 'r') as f: data = json.load(f) print(len(data.keys())) ''' The json is a dictionary with one item: key=filings2011, value=list of dictionaries. ''' with open(outputfile_csv, 'w', newline='') as c: varnames = data['Filings' + str(year)][0].keys() writer = csv.DictWriter(c, varnames) print('---------------------') print(' ') writer.writeheader() writer.writerows(data['Filings' + str(year)]) year +=1 ''' # Append files together to form one dataset # outputfile = datapath + ddate + '/' + 'irs_index_' + udate + '.json' file1 = datapath + ddate + '/' + 'irs_index_2011_' + udate + '.json' file2 = datapath + ddate + '/' + 'irs_index_2012_' + udate + '.json' file3 = datapath + ddate + '/' + 'irs_index_2013_' + udate + '.json' file4 = datapath + ddate + '/' + 'irs_index_2014_' + udate + '.json'
# Download the page using requests print("Downloading %s"%url) r = requests.get(url, headers=headers) # Simple check to check if page was blocked (Usually 503) if r.status_code > 500: if "To discuss automated access to Amazon data please contact" in r.text: print("Page %s was blocked by Amazon. Please try using better proxies\n"%url) else: print("Page %s must have been blocked by Amazon as the status code was %d"%(url,r.status_code)) return None # Pass the HTML of the page and create return e.extract(r.text) # product_data = [] with open("urls.txt",'r') as urllist, open('data.csv','w') as outfile: writer = csv.DictWriter(outfile, fieldnames=["title","content","date","variant","images","verified","author","rating","product","url"],quoting=csv.QUOTE_ALL) writer.writeheader() for url in urllist.readlines(): data = scrape(url) if data: for r in data['reviews']: r["product"] = data["product_title"] r['url'] = url if 'verified' in r: if 'Verified Purchase' in r['verified']: r['verified'] = 'Yes' else: r['verified'] = 'Yes' r['rating'] = r['rating'].split(' out of')[0] date_posted = r['date'].split('on ')[-1] if r['images']:
import csv #read and write the data import time import matplotlib.pyplot as plt #plotting library for python import numpy as np #scientific computing library which contains Fourier, Linear Algebra etc. import ecgF as e from decimal import getcontext #fast correctly rounded decimal points aritmetic #getcontext().prec = 4 with open("filter.csv","w") as csv_file: # with, It is designed to provide much cleaner syntax and # exceptions handling when you are working with code. #csv The csv module helps you to elegantly process data stored within a CSV file. # csv_writer = csv.DictWriter(csv_file, fieldnames = ["type","lowf","highf","order"]) csv_writer.writeheader() #Write a row with the field names (as specified in the constructor) info = { "type": "none", "lowf": 0.05, "highf":30, "order":5 } csv_writer.writerow(info) say = 2000 with open("Filtereddata.csv","w") as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames = ["t","f"]) csv_writer.writeheader()
def main(): r = np.genfromtxt('datasets/RawData_fourth.csv', delimiter=',', names=True, case_sensitive=True, dtype='int') t = np.genfromtxt('datasets/RawData_time_fourth.csv', delimiter=',', names=True, case_sensitive=True, dtype='float') obs1 = np.zeros((r['Behaviours__1'].size, 37), dtype='int') obs_time1 = np.zeros((t['Time__1'].size, 37), dtype='float') n = obs1[:, 0].size obs = np.zeros((n, 37), dtype='int') obs.fill(-1) obs_time = np.zeros((n, 37), dtype='float') animalID = np.zeros(n, int) targetID = np.zeros(n, int) for ro in range(obs1[:, 0].size): for col in range(36): obs[ro][col] = r[ro][col + 5] for row in range(obs[:, 0].size): for col in range(36): if (obs[row][col] == -1): obs[row][col] = 9 for row in range(obs_time1[:, 0].size): for col in range(36): obs_time[row][col] = t[row][col + 5] for row in range(obs[:, 0].size): animalID[row] = r[row][0] targetID[row] = r[row][4] pos = 0 count = 0 e = 0 group1 = np.zeros(27, float) group2 = np.zeros(27, float) g1 = 0 g2 = 0 plot_val = np.arange(27) error_matrix = np.zeros(54, dtype='float') with open('Results/MPS_per_AnimalID_300_70.csv', 'w') as csvfile: fieldnames = ['AnimalID', 'TargetID', 'PATH'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() while pos != obs[:, 0].size: g = 0 obs_set = np.zeros((12, 37), dtype='int') obs_time_set = np.zeros((12, 37), dtype='float') obs_set.fill(-1) for i in range(pos, pos + 12): for j in range(36): obs_set[i - pos][j] = obs[i][j] obs_time_set[i - pos][j] = obs_time[i][j] T = obs_set[0].shape[0] num_states = 9 trans_mat = hmm_train.trans_prob_matrix(obs_set) trans_mat = np.log(trans_mat) emi_mat_norm = hmm_train.emission_prob_matrix(obs_set) emi_mat_norm[:, 36] = 0 emi_mat = np.log(emi_mat_norm) emi_mat_time = hmm_train.emission_prob_matrix_time( obs_set, obs_time_set) emi_mat_time[:, 36] = 0 emi_mat_time = np.log(emi_mat_time) path_set = np.empty(T, dtype='int') path_set.fill(-1) for t in range(T): if (emi_mat_norm[8, t] > 0.8): path_set[t] = -2 elif (emi_mat_norm[8, t] > 0.7 and emi_mat_norm[8, t] < 0.8): for s in range(num_states - 1): path_set[t] = np.argmax(emi_mat[:, t] + trans_mat[:, s]) elif (emi_mat_norm[8, t] > 0.5 and emi_mat_norm[8, t] < 0.7): for s in range(num_states - 1): path_set[t] = np.argmin(emi_mat[:, t - 1] + trans_mat[s, s]) else: for s in range(num_states - 1): path_set[t] = np.argmax(emi_mat[:, t - 1] + trans_mat[s, s] + emi_mat_time[:, t]) path_set[36] = -2 writer.writerow({ 'AnimalID': str(animalID[pos]), 'TargetID': str(targetID[pos]), 'PATH': str(path_set + 1) }) '''for r in range(37): if(path_set[r]+1==-1): g = g+1 if(targetID[pos]==1): val = (36-g)/36 group1[g1] = val g1 = g1 + 1 if(targetID[pos]==2): val = (36-g)/36 group2[g2] = val g2 = g2 + 1''' pos = pos + 12
def output_csv(rows): writer = csv.DictWriter(sys.stdout, FIELDS) writer.writeheader() for row in rows: writer.writerow(row)
def report_csv(self, all_ops: List[str], passed: List[Optional[str]], experimental: List[str]) -> None: for schema in _all_schemas: if schema.domain == '' or schema.domain == 'ai.onnx': all_ops.append(schema.name) if schema.support_level == defs.OpSchema.SupportType.EXPERIMENTAL: experimental.append(schema.name) all_ops.sort() nodes_path = os.path.join( str(os.environ.get('CSVDIR')), # type: ignore 'nodes.csv') # type: ignore models_path = os.path.join( str(os.environ.get('CSVDIR')), # type: ignore 'models.csv') # type: ignore existing_nodes: OrderedDict[str, Dict[str, str]] = OrderedDict() existing_models: OrderedDict[str, Dict[str, str]] = OrderedDict() frameworks: List[str] = [] if os.path.isfile(nodes_path): with open(nodes_path) as nodes_file: reader = csv.DictReader(nodes_file) assert reader.fieldnames frameworks = list(reader.fieldnames) for row in reader: op = row['Op'] del row['Op'] existing_nodes[str(op)] = row if os.path.isfile(models_path): with open(models_path) as models_file: reader = csv.DictReader(models_file) for row in reader: model = row['Model'] del row['Model'] existing_models[str(model)] = row backend = os.environ.get('BACKEND') other_frameworks = frameworks[1:] with open(nodes_path, 'w') as nodes_file: if 'Op' not in frameworks: frameworks.append('Op') if backend not in frameworks: frameworks.append(str(backend)) else: other_frameworks.remove(str(backend)) node_writer = csv.DictWriter(nodes_file, fieldnames=frameworks) node_writer.writeheader() for node in all_ops: node_name = node if node in experimental: node_name = node + ' (Experimental)' if node_name not in existing_nodes: # Also add Skipped for other nodes existing_nodes[node_name] = OrderedDict() for other_framework in other_frameworks: existing_nodes[node_name][other_framework] = "Skipped!" if node in passed: existing_nodes[node_name][str(backend)] = "Passed!" else: existing_nodes[node_name][str(backend)] = "Failed!" summaries: Dict[Any, Any] = dict() if "Summary" in existing_nodes: summaries = existing_nodes["Summary"] del existing_nodes["Summary"] summaries[str(backend)] = \ f"{len(passed)}/{len(all_ops)} node tests passed" summaries['Op'] = 'Summary' for node in existing_nodes: existing_nodes[node]['Op'] = str(node) node_writer.writerow(existing_nodes[node]) node_writer.writerow(summaries) with open(models_path, 'w') as models_file: frameworks[0] = "Model" model_writer = csv.DictWriter(models_file, fieldnames=frameworks) model_writer.writeheader() # Consider both buckets num_models = 0 for bucket in self.models: for model in self.models[bucket]: # type: ignore # Both analyze and run the model on the backend num_covered = 0 for node in self.models[bucket][model].node_coverages: if node in passed: num_covered += 1 # TODO: Identify if there are models that are being # skipped/not loaded, but that are in other frameworks msg = "Passed!" if bucket == 'loaded': if model in self.models['passed']: continue msg = "Failed!" num_models += 1 if model not in existing_models: # Also add Skipped for other models existing_models[model] = OrderedDict() for other_framework in other_frameworks: existing_models[model][ other_framework] = "Skipped!" existing_models[model][str(backend)] = str( "{}/{} nodes covered: {}".format( num_covered, len(self.models[bucket][model].node_coverages), msg)) summaries.clear() if "Summary" in existing_models: summaries = existing_models["Summary"] del existing_models["Summary"] if str(backend) in summaries: del summaries[str(backend)] summaries[str(backend)] = "{}/{} model tests passed" \ .format(len(self.models['passed']), num_models) summaries['Model'] = 'Summary' for model in existing_models: # type: ignore existing_models[model]['Model'] = model model_writer.writerow(existing_models[model]) model_writer.writerow(summaries) with open( os.path.join( str(os.environ.get('CSVDIR')), # type: ignore 'metadata.csv'), 'w') as metadata_file: # type: ignore metadata_writer = csv.writer(metadata_file) metadata_writer.writerow([ "Latest Update", datetime.datetime.now().isoformat().replace('T', ' ') ])
def get_list_batched(entity_desc, request_uri, out_file_json, out_file_csv, field_list, extra_params=None): result_count = 1000000 # does not matter atm, offset of 0 will always dominate current_offset = 0 result_limit = 1000 list_obtained = [] print(f"Looping through list of {entity_desc}") print(f"Current Offset: {current_offset}") print(f"Result Limit: {result_limit}") print(f"Result Count: {result_count}") while current_offset < result_count: request_uri_parameterized = f"{request_uri}?limit={result_limit}&offset={current_offset}" if extra_params: request_uri_parameterized = f"{request_uri_parameterized}&{extra_params}" current_offset = current_offset + result_limit stations_resp = requests.get(request_uri_parameterized, headers=head) if stations_resp.status_code != 200: print(stations_resp) raise ValueError(f"Error obtaining {entity_desc} list") print(stations_resp.json()) current_results = stations_resp.json() result_count = current_results["metadata"]["resultset"]["count"] current_batch_results = current_results["results"] list_obtained.extend(current_batch_results) print() print("------------------------") print(f"Completed pull via: {request_uri_parameterized}") print(current_results["metadata"]) print( f"Successfully obtained another batch of {len(current_batch_results)} {entity_desc}" ) print(f"We now have data for {len(list_obtained)} {entity_desc} total") print("Current Batch:") for cbr in current_batch_results: print(cbr) print("Looping") print(f"Current Offset: {current_offset}") print(f"Result Limit: {result_limit}") print(f"Result Count: {result_count}") print() print() print(f"Pull Complete, total stations extracted: {len(list_obtained)}") # Write JSON to disk with open(out_file_json, 'w') as f: json.dump(list_obtained, f) # Write CSV to disk with open(out_file_csv, 'w') as output_file: dict_writer = csv.DictWriter(output_file, fieldnames=field_list, delimiter="|") dict_writer.writeheader() dict_writer.writerows(list_obtained) return list_obtained
def make_bayesdb_files(exp_data, analysis_params, cm_params): ec_cache = {} expfiles = json.load(open(exp_data, 'rb'))['tasbe_experimental_data']['samples'] input_cols = [] output_cols = [] aparams = json.load(open(analysis_params, 'rb')) channels = aparams['tasbe_analysis_parameters']['channels'] output_dir = aparams['tasbe_analysis_parameters']['output'].get('output_folder', 'output') label_map = (json.load(open(cm_params, 'rb'))['tasbe_color_model_parameters']['channel_parameters']) label_map = {matlab_sanitize(x['name']): '{}_MEFL'.format(x['label']) for x in label_map} print label_map for c in channels: if c not in output_cols: output_cols.append('{}_MEFL'.format(c)) big_csv = [] for file_id,f in enumerate(expfiles): pointfile = os.path.join(output_dir, os.path.basename(re.sub('.fcs', '_PointCloud.csv', f['file']))) if f['sample'] not in ec_cache: ec_cache[f['sample']] = ec.ExperimentalCondition("https://hub-api.sd2e.org/sparql", f['sample']).conditions conditions = ec_cache[f['sample']] for c in conditions: if c not in input_cols: input_cols.append(c) if 'file_id' not in input_cols: input_cols.append('file_id') this_csv = csv.DictReader(open(pointfile, 'rb')) for row in this_csv: row.update(conditions) row.update({'file_id':file_id}) big_csv.append(row) with open(os.path.join(output_dir, 'bayesdb_data.csv'), 'wb') as bayesdb_datafile: print input_cols + output_cols writer = csv.DictWriter(bayesdb_datafile, fieldnames=input_cols + output_cols) writer.writeheader() for row in big_csv: writer.writerow(row) with open(os.path.join(output_dir, 'bayesdb_metadata.json'), 'wb') as bayesdb_metafile: metadata = {} metadata['outcome-variables'] = [] metadata['experimental-variables'] = [] for i in input_cols: metadata['experimental-variables'].append({'name': i}) for o in output_cols: metadata['outcome-variables'].append({'name': o}) json.dump(metadata, bayesdb_metafile)
def get_timeseries_batched(entity_desc, datasetid, stn_id, year_start, year_end, request_uri, out_file_json, out_file_csv, field_list, extra_params=None): result_count = 1000000 # does not matter atm, offset of 0 will always dominate current_offset = 0 result_limit = 1000 no_results = False list_obtained = [] license_key = cdo_tokens.get(block=True) logger.debug(f"Using License key {license_key}") stn_name = STATION_LOOKUP[stn_id]["stn_name"] logger.info( f"Pulling for station {stn_id}, readings from {year_start}-{year_end}, location: {stn_name}" ) try: for year_current in range(year_start, year_end + 1): no_results = False current_offset = 0 head = {'token': license_key} while not no_results and (current_offset < result_count): time.sleep(1) request_uri_parameterized = f"{request_uri}?limit={result_limit}&offset={current_offset}&startdate={year_current}-01-01&enddate={year_current}-12-31&datasetid={datasetid}&stn_id={stn_id}" if extra_params: request_uri_parameterized = f"{request_uri_parameterized}&{extra_params}" current_offset = current_offset + result_limit logger.debug( f"\tPulling for station {stn_id} for {year_current}") stations_resp = requests.get(request_uri_parameterized, headers=head) if stations_resp.status_code != 200: logger.error( f"Error with request: {request_uri_parameterized}") logger.error(stations_resp) # We DONT want to kill the whole process #raise ValueError(f"Error obtaining {entity_desc} list") current_results = stations_resp.json() # Only process non-empty requests if "metadata" not in current_results: no_results = True logger.debug( f"\tPulling for station {stn_id} for {year_current}: no results" ) else: result_count = current_results["metadata"]["resultset"][ "count"] current_batch_results = current_results["results"] list_obtained.extend(current_batch_results) logger.debug( f"\tPulling for station {stn_id} for {year_current}: {len(list_obtained)} total results" ) logger.info( f"Pulling for station {stn_id}, readings from {year_start}-{year_end}, location: {stn_name}, completed with {len(list_obtained)} readings" ) # Write CSV to disk logger.debug( f"Writing completed pull for station {stn_id} to {out_file_csv}") with open(out_file_csv, 'w') as output_file: dict_writer = csv.DictWriter(output_file, fieldnames=field_list, delimiter="|") dict_writer.writeheader() dict_writer.writerows(list_obtained) except Exception as err: logger.error( f"ERROR: Exception encountered on pull for station {stn_id}") logger.error(err) traceback.print_exc() cdo_tokens.put(license_key) return list_obtained
def update(): print "************* UPDATE ****************" print "Update By \n1.ID\n2.Name" choice = input("Enter 1 or 2") if choice == 1: print "Enter Product ID to be updated\n" update_id = raw_input() condition = check(update_id) if condition == "not found": print "Entered ID not found\n" return print "Enter Price to be Updated\n" price = raw_input("Enter price\n") with open("inventory.csv", "rb") as read: reader = csv.DictReader(read) for i in reader: if i == condition: with open("dummy.csv", "ab") as dummy: writeHeader("dummy.csv") fieldnames = ["item_id", "item_name", "item_price"] writer = csv.DictWriter(dummy, fieldnames=fieldnames) writer.writerow({ "item_id": i["item_id"], "item_name": i["item_name"], "item_price": price }) else: with open("dummy.csv", "ab") as dummy: writeHeader("dummy.csv") fieldnames = ["item_id", "item_name", "item_price"] writer = csv.DictWriter(dummy, fieldnames=fieldnames) writer.writerow(i) copyfile("dummy.csv", "inventory.csv") os.remove("dummy.csv") elif choice == 2: print "Enter Product ID to be updated\n" update_name = raw_input() condition = check_name(update_name) if condition == "not found": print "Entered Name not found" return print "Enter Price to be Updated\n" price = raw_input("Enter price\n") with open("inventory.csv", "rb") as read: reader = csv.DictReader(read) for i in reader: if i == condition: with open("dummy.csv", "ab") as dummy: writeHeader("dummy.csv") fieldnames = ["item_id", "item_name", "item_price"] writer = csv.DictWriter(dummy, fieldnames=fieldnames) writer.writerow({ "item_id": i["item_id"], "item_name": i["item_name"], "item_price": price }) else: with open("dummy.csv", "ab") as dummy: writeHeader("dummy.csv") fieldnames = ["item_id", "item_name", "item_price"] writer = csv.DictWriter(dummy, fieldnames=fieldnames) writer.writerow(i) copyfile("dummy.csv", "inventory.csv") os.remove("dummy.csv") else: print "You have entered an incorrect option\n"
print("Number of columns with duplicates: " + str(len(key_list))) # Cut duplicates key_list = list(dict.fromkeys(key_list)) print("Number of columns without duplicates: " + str(len(key_list))) print("Number of rows: " + str(len(json_list) + 1)) # That's useful for my project. It moves "file_path" and "<page title>" in front of key_list. # If you are not me, skip this one. If you are me, damn you are awesome! key_list.insert(0, key_list.pop(key_list.index("<page title>"))) key_list.insert(0, key_list.pop(key_list.index("file_path"))) print("Writing file " + csv_file_name) # Write a single csv file with the content of every json found with open(csv_file_name, mode='w') as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames=key_list) csv_writer.writeheader() for i in range(0, len(json_list)): csv_writer.writerow(json_list[i]) # And that's the end of the time counter stop_time = time.time() print("Execution completed. Yey. It took " + str("%.2f" % round((stop_time - start_time), 2)) + " seconds or, if you prefer, " + str("%.2f" % round(((stop_time - start_time) / 60), 2)) + " minutes")
def gp(sr, cos): print "Gross profit: £", result_gp cos(0, 0, 0) gp(0, 0) print " " print "Saving data in CSV" with open('Results.csv', 'w') as csvfile: fieldnames = [ 'Company', 'Sector', 'Current Liabilities', 'Non-Current Liabilities', 'Current Assets', 'Non-Current Assests', 'Equity' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerow({ 'Company': CN1, 'Sector': S1, 'Current Liabilities': CL, 'Non-Current Liabilities': NCL, 'Current Assets': CA, 'Non-Current Assests': NCA, 'Equity': EQ }) writer.writerow({}) fieldnames = [ 'Company', 'Sector', 'Purchases', 'Interest Payables', 'Sales', 'Expenses', 'Interest Receivables', 'Opening Stock', 'Closing Stock'
keyword_location) return location_of_closed_keyword with open("../../Output/" + sys.argv[1], "r") as intermediateFeaturesFile: intermediateFeatureReader = csv.DictReader(intermediateFeaturesFile) fieldnames = [ 'id', 'name', 'some_capitalized', 'atleast_one_capitalized', 'first_letter_capitalized', 'has_suffix_salutation', 'start_position', 'distance_to_period', 'distance_to_closest_keyword', 'frequency', 'contains_period', 'contains_keywords', 'name_length', 'number_of_capitals', 'distance_to_closest_eol', 'label' ] output = csv.DictWriter(open("../../Output/" + sys.argv[2], "w"), fieldnames=fieldnames) output.writeheader() for row in intermediateFeatureReader: identifier = row['id'] name = row['name'] number_of_capitals = sum(1 for c in name if c.isupper()) contains_period = 0 if name.find(".") == -1 else 1 name_length = len(name) some_capitalized = 1 if number_of_capitals >= 2 else 0 atleast_one_capitalized = 1 if number_of_capitals >= 1 else 0 first_letter_capitalized = 1 if name[0].isupper() else 0 has_suffix_salutations = check_suffix_salutations() start_position = int(row['start_position']) end_position = int(row['end_position']) label = row['label']