def save_results(results, path, header = None, mode = 'w'): """ Writes results to path specified Parameters ---------- results : dict the results to write path : str the path to the save file header : list Defaults to none mode : str defaults to 'w', or write. Can be 'a', append """ if header is None: try: header = results.keys() except AttributeError: try: header = results[0].keys() except AttributeError: raise(Exception('Could not get the column header from the list, please specify the header.')) with open(path, mode, encoding = 'utf8') as f: writer = DictWriter(f, header) if mode != 'a': writer.writeheader() for line in results: writer.writerow({k: make_safe(line[k], '/') for k in header})
def writePredictions(self): print "In writePredictions" o = DictWriter(open("predictions.csv", "w"), ["id", "position"]) o.writeheader() for ii, pp in zip([x["id"] for x in self.test], self.predictions): d = {"id": ii, "position": pp} o.writerow(d)
def test_01_importTab(self): #先检查表,如果都没有则直接创建 #client.drop_database(DB_INFO['DB_NAME']) SD.importTab() SD.copy_table() self.assertIn('FACT_ATTR', db.collection_names()) self.assertIn('FACT_SERVICE', db.collection_names()) self.assertIn('FACT_ATTR_SET', db.collection_names()) self.assertIn('FACT_SCENE', db.collection_names()) #如果有,先检查id如果不存在则增加,否则update L = list() table = 'FACT_ATTR' filepath = os.path.sep.join([os.path.abspath(Const.DATA_IN), table]) with open(filepath, 'r') as f: dReader = DictReader(f) L = [i for i in dReader] L[-1]['attr_set_id'] = 1 L[-1]['value'] = 'rampage' L.append({'_id': 4, 'name': 'attr4', 'type_id':6, 'value': 'test', 'attr_set_id': 2}) with open(filepath, 'w') as f: titles = L[-1].keys() dwriter = DictWriter(f, titles) header = dict(zip(titles, titles)) dwriter.writerow(header) dwriter.writerows(L) SD.importTab() #这里顺便把查询也一起测了 match = {'_id': {'$in': [3,4]}} rs = list(db[table].find(match)) self.assertEqual(len(rs), 2) self.assertEqual(rs[-2]['attr_set_id'], 1) self.assertEqual(rs[-2]['value'], 'rampage')
def main(args): with open("users.csv", "w") as f: fieldnames = ["username", "first_name", "last_name", "email", "phone", "institution", "voro_account"] csvFile = DictWriter(f, fieldnames=fieldnames, extrasaction="ignore") # write header row field_dict = dict([(x, x.capitalize()) for x in fieldnames]) csvFile.writerow(field_dict) for user in User.objects.all(): # look up associated profile & inst try: profile = user.get_profile() phone = profile.phone voro_account = profile.voroEAD_account except UserProfile.DoesNotExist: phone = "" voro_account = False user.__dict__["phone"] = phone user.__dict__["voro_account"] = voro_account # inst through group groups = user.groups.all() instname = "" if len(groups): firstgroup = user.groups.all()[0] grpprofile = firstgroup.groupprofile insts = grpprofile.institutions.all() if len(insts): instname = insts[0].name else: instname = "" user.__dict__["institution"] = instname.encode("utf-8") csvFile.writerow(user.__dict__)
def main(): search_par_h = open("data/search_params.csv", "w") writer = DictWriter(search_par_h, fieldnames=["SearchID", "SearchParams"]) writer.writeheader() for t, row in read_tsv("data/SearchInfo.tsv"): sparams = row["SearchParams"] if not sparams: continue sid = int(row["SearchID"]) sparams = re.sub(r"([A-Za-z0-9]+):", r'"\1":', sparams) sparams = sparams.replace("'", "\"") sparams = sparams.replace("Минивэн\",", "\"Минивэн\",") sparams = sparams.replace("Микроавтобус\"]", "\"Микроавтобус\"]") sparams = unicode(sparams, "utf-8") try: sparams = json.loads(sparams) for k, v in sparams.items(): t = type(v) if t not in type_set: print t, k, v type_set.add(t) sparams_str = json.dumps(sparams) writer.writerow({"SearchID": sid, "SearchParams": sparams_str}) except Exception as e: print e print sparams
def customer_stats(outfile=None): sales = sales_grouped_by_users() stats = {} for user_id, items in sales: item_list = list(items) data = {} data['user_id'] = user_id data['n_lines'] = len(item_list) #all orders fill_items(data, item_list, suffix='') #online orders item_list_online = [i for i in item_list if i['online_order_number']] fill_items(data, item_list_online, suffix='_online') # sale items item_list_on_sale = [i for i in item_list if i['on_sale'] == 't'] fill_items(data, item_list_on_sale, suffix='_on_sale') stats[user_id] = data if outfile is not None: fieldnames = sorted(data.keys()) dw = DictWriter(open(outfile, 'w'), fieldnames=fieldnames) dw.writeheader() for user_id, row in stats.iteritems(): dw.writerow(row) return stats.values()
def main(): # We open the 2000 file first because it has the headers print("Reading from:", SRC_PATHS['2000']) csv2000 = DictReader(SRC_PATHS['2000'].read_text().splitlines()) # awkward but whatever. We need to use csv2000's headers # and add the 'year' column to it destfile = DEST_PATH.open('w') destcsv = DictWriter(destfile, fieldnames=['year'] + csv2000.fieldnames) destcsv.writeheader() for i, row in enumerate(csv2000): row['year'] = 2000 destcsv.writerow(row) print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH)) # now we open 1990 file and iterate print("Reading from:", SRC_PATHS['1990']) for i, line in enumerate(SRC_PATHS['1990'].read_text().splitlines()): name, freq, cumfreq, rank = re.search(RX_ROW_1990, line).groups() row = { 'name': name.strip(), 'rank': int(rank), 'year': 1990, 'prop100k': int(float(freq) * 1000), 'cum_prop100k': int(float(cumfreq) * 1000), } destcsv.writerow(row) print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH)) # all done destfile.close()
def run_queries(session, state): query_inputs = term_handler(state) combinations = cartesian_product(query_inputs) for query in combinations: PARAMS.update(query) logger.info('query') sleep(SLEEP_TIME) page = session.get(url = OB_BASE % SEARCH_URL, params = PARAMS) logger.info('got page') pricing_data = page.json() with open(WRITE_FILE_PATH, 'wb') as output_file: fieldnames = pricing_data['PricingRecords'][0].keys() fieldnames.append('Scenario') print 'FIELDNAMES %s' % fieldnames logger.info('header %s' % fieldnames) csv_output = DictWriter(output_file, fieldnames=fieldnames) csv_output.writeheader() for row in pricing_data['PricingRecords']: row['Scenario'] = '{msa}|{product}{purpose}{amount}{ltv}{fico}LD30IO0{term}'.format(msa=query_inputs['MSALocation_Index'][query['MSALocation_Index']], product=query_inputs["ProductType"][query["ProductType"]], purpose=query_inputs["Purpose"][query["Purpose"]], amount=query_inputs["LoanAmount"][query["LoanAmount"]], ltv=query_inputs["LTV"][query["LTV"]], fico=query_inputs["FICO"][query["FICO"]], term=query_inputs["Term"][query["Term"]]) logger.info('adding row %s' % row) csv_output.writerow(row)
def initialize_writer(fieldnames, buffer, months): flowSummaryWriter = DictWriter(buffer, fieldnames=fieldnames, delimiter="\t") flowSummaryWriter.writerow(dict( zip(fieldnames, ["Account"] + (["Start"] if "Start" in fieldnames else []) +\ [month.strftime("%B %Y") for month in months] + ["Net"] +\ (["End"] if "End" in fieldnames else [])))) return flowSummaryWriter
def write_report(report): f = open(report.filename, 'wb') print >>f, ','.join(report.columns) writer = DictWriter(f, report.columns) for row in report.rows: writer.writerow(_encode_row(row)) f.close()
def write_csv(self, array, fname, delimiter=":"): with open(fname, "w") as f: fieldnames = list(array[0].keys()) writer = DictWriter(f, delimiter=delimiter, lineterminator="\n", fieldnames=fieldnames) writer.writerow(dict((field, field) for field in fieldnames)) for row in array: writer.writerow(row)
def main(): ''' >>> main() # stuff happens ''' args = parse_args() logging.basicConfig(filename=args.log, level=logging.INFO) input_otu_counts = defaultdict(lambda: defaultdict(lambda: 0)) field_names = set() for input in args.inputs: with open(input) as handle: kraken_data = parse_kraken_file(handle) for row in kraken_data: field_names.add(row['ncbi_taxid']) input_otu_counts[input][row['ncbi_taxid']] += 1 field_names = ['input'] + sorted([ i for i in field_names ]) with open(args.output, 'w') as handle: writer = DictWriter(handle, fieldnames=field_names) writer.writeheader() for input, otu_counts in list(input_otu_counts.items()): otu_counts['input'] = input writer.writerow(otu_counts)
def writePredictions(self): print "In writePredictions" o = DictWriter(open("predictions.csv", 'w'), ["id", "position"]) o.writeheader() for ii, pp in zip([x['id'] for x in self.test], self.predictions): d = {'id': ii, 'position': pp} o.writerow(d)
def csv_results(self, csv_file, histogram_size=None): specs = self.token_categories names_from_type = { spec.typename: spec.name.first_original().value.encode('utf-8') for spec in specs } spec_names = names_from_type.values() spec_names.sort() spec_names.insert(0, "idea") dw = DictWriter(csv_file, spec_names, dialect='excel', delimiter=';') dw.writeheader() by_idea = self._gather_results() values = { votable_id: self.results_for(voting_results) for (votable_id, voting_results) in by_idea.iteritems() } idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter( Idea.id.in_(by_idea.keys()))) idea_names = { id: name.encode('utf-8') for (id, name) in idea_names.iteritems()} ordered_idea_ids = Idea.visit_idea_ids_depth_first( AppendingVisitor(), self.get_discussion_id()) ordered_idea_ids = [id for id in ordered_idea_ids if id in values] for idea_id in ordered_idea_ids: base = values[idea_id] sums = {names_from_type[k]: v for (k, v) in base['sums'].iteritems()} sums['idea'] = idea_names[idea_id] dw.writerow(sums)
def handle(self, *args, **options): print('Start.') sampled_comments = [] feeds = Facebook_Feed.objects.all() from_date = dateutil.parser.parse(options['from_date']) to_date = dateutil.parser.parse(options['to_date']) sample_size = options['sample_size'] batch_size = options['batch_size'] for i, feed in enumerate(feeds): print('working on feed {} of {}'.format(i + 1, feeds.count())) residual = None sampled_comments_for_feed = [] statuses_for_feed = Facebook_Status.objects.filter(feed__id=feed.id).filter( published__range=[from_date, to_date]).order_by( 'comment_count') for i, status in enumerate(statuses_for_feed): is_last = i + 1 == len(statuses_for_feed) samples_for_status, residual = self.sample(status, is_last, residual, sample_size=sample_size, batch_size=batch_size) sampled_comments_for_feed += samples_for_status sampled_comments += sampled_comments_for_feed print('total_comments:', len(sampled_comments)) with open('{}.csv'.format(args[0]), 'wb') as f: fieldnames = ['comment_id', 'status_id'] writer = DictWriter(f, fieldnames=fieldnames) writer.writerow({'comment_id': 'comment_id', 'status_id': 'status_id'}) for row in sampled_comments: writer.writerow({'comment_id': row['comment_id'], 'status_id': row['parent__status_id']}) print('Done.')
def main(): layout = construct_layout(OFF_PROPERTY_LAYOUT) header = get_active_header(OFF_PROPERTY_LAYOUT) # Prepare CSV output to stdout writer = DictWriter(stdout, fieldnames=header) writer.writeheader() parse = Struct(layout).unpack_from struct_length = calcsize(layout) for line in get_stdin_bytes().readlines(): # Ensure string length is what deconstructer expects if len(line) != struct_length: line = '{:<{}s}'.format(line.decode(), struct_length).encode() # Deconstruct fixed-width string row = parse(line) # Decode each value row = (v.decode('ascii', 'ignore') for v in row) # Trim whitespace in each field row = [field.strip() for field in row] # Convert to dict using header row = dict(zip(header, row)) writer.writerow(row)
def _stats_data_csv(user_profile, req_input, client, ignored, stats_type, is_custom): n_type_keys = { 'mean': ['start', 'stop', 'service_name', 'mean', 'mean_all_services', 'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'mean_trend'], 'usage': ['start', 'stop', 'service_name', 'usage', 'rate', 'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'usage_trend'], } buff = StringIO() writer = DictWriter(buff, n_type_keys[req_input.n_type], extrasaction='ignore') writer.writeheader() for stat in _get_stats(client, req_input.utc_start, req_input.utc_stop, req_input.n, req_input.n_type, stats_type): d = stat.to_dict() d['start'] = req_input.user_start d['stop'] = req_input.user_stop if stats_type == 'trends' or is_custom else '' writer.writerow(d) out = buff.getvalue() buff.close() response = HttpResponse(out, content_type='text/csv') response['Content-Disposition'] = 'attachment; filename={}'.format('zato-stats.csv') return response
def convert_powertracker_log_to_csv(path): """ This function creates a CSV file (to ./results) from a PowerTracker log file (from ./data). This is inspired from https://github.com/sieben/makesense/blob/master/makesense/parser.py. :param path: path to the experiment (including [with-|without-malicious]) """ platforms = [p.capitalize() for p in get_available_platforms()] data, results = join(path, 'data'), join(path, 'results') with open(join(data, 'powertracker.log')) as f: log = f.read() iterables, fields = [], ['mote_id'] for it in PT_ITEMS: time_field = '{}_time'.format(it) iterables.append(finditer(PT_REGEX.format('|'.join(platforms), it.upper(), time_field), log, MULTILINE)) fields.append(time_field) with open(join(results, 'powertracker.csv'), 'w') as f: writer = DictWriter(f, delimiter=',', fieldnames=fields) writer.writeheader() for matches in zip(*iterables): row = {} for m in matches: row.update((k, int(v)) for k, v in m.groupdict().items()) for it in PT_ITEMS: time_field = '{}_time'.format(it) row[time_field] = float(row[time_field] / 10 ** 6) writer.writerow(row)
def output_list(to_save,path): head = to_save[0].keys() with open(path,'w') as f: csvwriter = DictWriter(f,head,delimiter='\t') csvwriter.writerow({x: x for x in head}) for l in to_save: csvwriter.writerow(l)
def export_feature_matrix_csv(feature_matrix, path, delimiter = ','): """ Save a FeatureMatrix as a column-delimited text file Parameters ---------- feature_matrix : FeatureMatrix FeatureMatrix to save to text file path : str Full path to write text file delimiter : str Character to mark boundaries between columns. Defaults to ',' """ with open(path, encoding='utf-8-sig', mode='w') as f: header = ['symbol'] + feature_matrix.features writer = DictWriter(f, header,delimiter=delimiter) writer.writerow({h: h for h in header}) for seg in feature_matrix.segments: #If FeatureMatrix uses dictionaries #outdict = feature_matrix[seg] #outdict['symbol'] = seg #writer.writerow(outdict) if seg in ['#','']: #wtf continue featline = feature_matrix.seg_to_feat_line(seg) outdict = {header[i]: featline[i] for i in range(len(header))} writer.writerow(outdict)
def main(argv): mountpoint, ifname, ofname = getParms() if not mountpoint.endswith('/'): mountpoint = mountpoint + '/' metaname = mountpoint + 'RW_32/metadata_v7.csv' # try opening the files try: #scl enable python27 bash # to allow multiple openings on one line with open(metaname, "rb") as mi, open(ifname, "rb") as cl, open(ofname, "wb") as mo: fields = ['identifier','filename','folder','date_created','checksum', \ 'series_number','creating_body','crawl_start', 'crawl_end', \ 'filesize', 'unit'] all_fields = fields + ['date_archivist_note', 'archivist_note'] metareader = DictReader(mi, fieldnames=fields) creader = reader(cl) # will always be tiny wrt metadata so slurp corrupt = {} for row in creader: corrupt[row[0]] = [row[1], row[2]] writer = DictWriter(mo, delimiter=',', fieldnames=all_fields) writer.writerow(dict((fn,fn) for fn in all_fields)) print "[INFO] Opened files successfully." modifyMetadata(metareader, corrupt, writer) except IOError as e: print "[IOERROR] " + e
def handle_noargs(self, **options): print "in the command..." comment_keys = ('user_key', 'g', 'agroup', 'user_key', 'experiment_slug', 'variant', 'via') petition_headers = comment_keys + ('name', 'email') # Petition signatories from the first two experiments for filename, url_path in [ ('petition-1.csv', '/county-performance/petition'), ('petition-2.csv', '/county-performance-2/petition'), ]: with open(filename, "wb") as f: writer = DictWriter(f, petition_headers) writer.writeheader() for f in Feedback.objects.filter(url__endswith=url_path): data, comment = unpack_comment(f.comment) row_data = data.copy() row_data['name'] = comment row_data['email'] = f.email writer.writerow(row_data) senate_headers = comment_keys + ('comment',) for filename, url_path in [ ('senate-1.csv', '/county-performance/senate'), ('senate-2.csv', '/county-performance-2/senate'), ]: with open(filename, "wb") as f: writer = DictWriter(f, senate_headers) writer.writeheader() for f in Feedback.objects.filter(url__endswith=url_path): data, comment = unpack_comment(f.comment) row_data = data.copy() row_data['comment'] = comment writer.writerow(row_data)
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({ 'uid': uid, 'data': json.dumps(tag_matrix) }) output.close()
def main(argv): uname, pwd, filelist, ifname, ofname = getParms() # try opening the files try: #scl enable python27 bash # to allow multiple openings on one line with open(filelist, "rb") as fhl, open(ifname, "rb") as fhi, open(ofname, "wb") as fho: # read in the list of filenames to insert d = {} for line in fhl: fname = line.split('/')[-1].rstrip() #filename points to folder parts = splitFilename(fname) crawldir = parts.group(1) if crawldir in d: d[crawldir].append(line.rstrip()) else: d[crawldir] = [line.rstrip()] fields = ['identifier','filename','folder','date_created','checksum', \ 'series_number','creating_body','crawl_start', 'crawl_end', \ 'filesize', 'unit'] reader = DictReader(fhi, fieldnames=fields) writer = DictWriter(fho, delimiter=',', fieldnames=fields) writer.writerow(dict((fn,fn) for fn in fields)) print "[INFO] Opened files successfully." insertFiles(uname, pwd, d, reader, writer) except IOError as e: print "[IOERROR] " + e
def process_vf(loc_data): precinct_data = {} with open(Files.VF_CUT.format(**loc_data), "r") as r, open(Files.VF_DEDUPED.format(**loc_data), "w") as w: reader = DictReader(r, dialect='excel-tab') writer = DictWriter(w, fieldnames=Headers.VF_DEDUPED) writer.writeheader() vf_hashes = set() p_count = 0 for row in reader: if len(loc_data['county']) > 0 and not row['vf_county_name'].upper() == loc_data['county'].upper(): continue vf_hash = get_hash(row, HashFields.VF) if vf_hash in vf_hashes: continue vf_hashes.add(vf_hash) vfp_hash = get_hash(row, HashFields.VFP) row_zip = row['vf_reg_cass_zip'] if vfp_hash not in precinct_data: p_count += 1 precinct_data[vfp_hash] = get_conversion(row, Conversions.VFP) precinct_data[vfp_hash]['vf_precinct_id'] = Prefixes.PRECINCT + str(p_count) precinct_data[vfp_hash]['zips'] = {row_zip:1} precinct_data[vfp_hash]['examples'] = [] elif row_zip not in precinct_data[vfp_hash]['zips']: precinct_data[vfp_hash]['zips'][row_zip] = 1 else: precinct_data[vfp_hash]['zips'][row_zip] += 1 vf_output = get_conversion(row, Conversions.VF) if len(precinct_data[vfp_hash]['examples']) < 5: precinct_data[vfp_hash]['examples'].append(vf_output) vf_output["vf_precinct_id"] = precinct_data[vfp_hash]['vf_precinct_id'] vf_output["vf_id"] = str(Prefixes.VF + row["voterbase_id"][3:]) writer.writerow(vf_output) return precinct_data
def dump_csv(pages, options): """Dump in CSV format. ``pages`` is an iterable of (field, value) tuples. It's assumed that the same fields are used in each tuple. """ from itertools import chain from csv import DictWriter from sys import stdout pages = iter(pages) try: first_row = pages.next() except StopIteration: return fields = [item[0] for item in first_row] rows = chain((first_row,), pages) dicts = (dict(page) for page in rows) dicts = (process_page(row) for row in dicts) def validate_row_length(row_dict): if len(row_dict) != len(fields): raise DataValidationError( 'Inconsistent number of fields in row {0}.\n' 'Fields: {1}'.format(row_dict, fields)) return row_dict dicts = (validate_row_length(row) for row in dicts) writer = DictWriter(stdout, fields, dialect='excel-tab') writer.writerow(dict((v, v) for v in fields)) writer.writerows(dicts)
def write_data(outfile, data, fields): with open(outfile, 'w') as outfile: writer = DictWriter(outfile, fieldnames=fields) writer.writeheader() for d in data: writer.writerow(d)
def get_vf_precincts(loc_data, precinct_data): with open(Files.VF_PRECINCTS.format(**loc_data), "w") as vfp_w, open(Files.VF_EX_PRECINCTS.format(**loc_data), "w") as vfep_w: vfp_writer = DictWriter(vfp_w, fieldnames=Headers.VFP) vfp_writer.writeheader() vfep_writer = DictWriter(vfep_w, fieldnames=Headers.VFEP) vfep_writer.writeheader() for key, vfp_dict in precinct_data.iteritems(): zips = vfp_dict.pop('zips') max_count = 0 max_zip = 0 total_count = 0 for zip_val, zip_count in zips.iteritems(): total_count += zip_count if zip_count > max_count: max_count = zip_count max_zip = zip_val vfp_dict['vf_precinct_zip'] = max_zip vfp_dict['vf_precinct_count'] = total_count examples = vfp_dict.pop('examples') vfp_writer.writerow(vfp_dict) ex_count = 1 for ex in examples: for key in Conversions.VF_EX: vfp_dict[Prefixes.VFP_EX.format(ex_count)+key] = ex[key] ex_count += 1 vfep_writer.writerow(vfp_dict)
def plot_file(filename1): base_name = os.path.basename(filename1) name_parts = base_name.split('_') work_path = os.path.dirname(__file__) scores_filename = os.path.join( work_path, '_'.join(name_parts[:2] + ['v3loop_scores.csv'])) if os.path.exists(scores_filename): with open(scores_filename) as f: reader = DictReader(f) score_rows = [list(map(int, row)) for row in map(itemgetter('score', 'count'), reader)] else: source1 = os.path.join('micall/tests/working/v3loop_alignment_scores/', filename1) source2 = source1.replace('_R1_', '_R2_') start = datetime.now() with open(source1) as fastq1, open(source2) as fastq2: score_counts = align_reads(fastq1, fastq2) print('{}: {}'.format(datetime.now() - start, filename1)) score_rows = sorted(score_counts.items()) with open(scores_filename, 'w') as scores_csv: writer = DictWriter(scores_csv, ('score', 'count'), lineterminator=os.linesep) writer.writeheader() for score, count in score_rows: writer.writerow(dict(score=score, count=count)) scores = [row[0] for row in score_rows] counts = [row[1] for row in score_rows] total_count = float(sum(counts)) fractions = [count/total_count for count in counts] plt.plot(scores, fractions, label=base_name.split('_')[0], alpha=0.7)
def append_dict_as_row(file_name, dict_of_elem, field_names): with open(file_name, 'a+', newline='') as write_obj: dict_writer = DictWriter(write_obj, fieldnames=field_names) dict_writer.writerow(dict_of_elem)
def summarize_junctions( sample_dirs: Dict[str, Path], # sample_names: List[str], gff_filename: Union[str, Path], output_prefix: Union[str, Path], genome_d: Optional[Union[str, Path]] = None, junction_known: Optional[Union[str, Path]] = None, ) -> defaultdict: """ 1. for each sample, read all the GFF, store the junction information (both 0-based) """ junc_by_chr_strand = defaultdict( lambda: defaultdict(list) ) # (seqname,strand) --> (donor,acceptor) --> samples it show up in (more than once possible) for sample_name, d in sample_dirs.items(): for r in GFF.collapseGFFReader(Path(d, gff_filename)): n = len(r.ref_exons) if n == 1: continue # ignore single exon transcripts for i in range(n - 1): donor = r.ref_exons[i].end - 1 # make it 0-based accep = r.ref_exons[i + 1].start # start is already 0-based junc_by_chr_strand[r.seqname, r.strand][donor, accep].append(sample_name) # write junction report with open(f"{output_prefix}.junction.bed", "w") as f1, open(f"{output_prefix}.junction_detail.txt", "w") as f: f1.write( f'track name=junctions description="{output_prefix}" useScore=1\n') JUNC_DETAIL_FIELDS = [ "seqname", "left", "right", "strand", "num_transcript", "num_sample", "genome", "annotation", "label", ] writer = DictWriter(f, JUNC_DETAIL_FIELDS, delimiter="\t") writer.writeheader() keys = list(junc_by_chr_strand) keys.sort() for _seqname, _strand in keys: v = junc_by_chr_strand[_seqname, _strand] v_keys = list(v) v_keys.sort() labels = cluster_junctions(v_keys) for i, (_donor, _accep) in enumerate(v_keys): rec = { "seqname": _seqname, "left": _donor, "right": _accep, "strand": _strand, "num_transcript": len(v[_donor, _accep]), "num_sample": len(set(v[_donor, _accep])), } # f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t".format(_chr, _donor, _accep, _strand, len(v[_donor,_accep]), len(set(v[_donor,_accep])))) f1.write( f"{_seqname}\t{_donor}\t{_accep + 1}\t{output_prefix}\t{len(v[_donor, _accep])}\t{_strand}\n" ) # if genome is given, write acceptor-donor site if genome_d is None or _seqname not in genome_d: rec["genome"] = "NA" # f.write("NA\t") else: up, down = ( genome_d[_seqname][(_donor + 1):(_donor + 3)], genome_d[_seqname][(_accep - 2):_accep], ) if _strand == "+": rec["genome"] = f"{str(up.seq).upper()}-{str(down.seq).upper()}" # f.write("{0}-{1}\t".format(str(up.seq).upper(), str(down.seq).upper())) else: rec["genome"] = f"{str(down.reverse_complement().seq).upper()}-{str(up.reverse_complement().seq).upper()}" # f.write("{0}-{1}\t".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper())) # if annotation is given, check if matches with annotation if junction_known is None: rec["annotation"] = "NA" # f.write("NA\n") else: if (_seqname, _strand) in junction_known and ( _donor, _accep, ) in junction_known[_seqname, _strand]: rec["annotation"] = "Y" # f.write("Y\t") else: rec["annotation"] = "N" # f.write("N\t") rec["label"] = f"{_seqname}_{_strand}_{labels[i]}" writer.writerow(rec) # f.write("{c}_{s}_{lab}\n".format(c=_seqname, s=_strand, lab=labels[i])) return junc_by_chr_strand
type=argparse.FileType('r')) parser.add_argument('input2_csv', help='CSV2 to compare', type=argparse.FileType('r')) parser.add_argument('output_csv', help='CSV to output diff', type=argparse.FileType('w')) args = parser.parse_args() r1 = DictReader(args.input1_csv, dialect="excel") r2 = DictReader(args.input2_csv, dialect="excel") w = DictWriter(args.output_csv, fieldnames=["filename"] + sorted(r1.fieldnames), dialect="excel") w.writeheader() for l1, l2 in zip(r1, r2): if l1 != l2: l1.update({"filename": args.input1_csv.name}) l2.update({"filename": args.input2_csv.name}) w.writerow(l1) w.writerow(l2) w.writerow({k: "" for k in l1}) args.input1_csv.close() args.input2_csv.close() args.output_csv.close()
def create_table(raw_data): from operator import itemgetter import matplotlib.pyplot as plt import pylab from csv import DictWriter req_fields = [('Time stepper orig name', 0), ('Stencil Kernel semi-bandwidth', 1), ('Stencil Kernel coefficients', 0), ('Precision', 0), ('Number of time steps', 1), ('Number of tests', 1), ('Global NX', 1), ('Global NY', 1), ('Global NZ', 1), ('Thread group size', 1), ('Intra-diamond prologue/epilogue MStencils', 1), ('Energy', 2), ('Energy DRAM', 2), ('Power', 2), ('Power DRAM', 2), ('WD main-loop RANK0 MStencil/s MAX', 2), ('MStencil/s MAX', 2), ('OpenMP Threads', 1)] data = [] for k in raw_data: tup = dict() # defaults if k['Intra-diamond prologue/epilogue MStencils'] == '': k['Intra-diamond prologue/epilogue MStencils'] = 0 # add the general fileds for f in req_fields: try: v = k[f[0]] if f[1] == 1: v = int(k[f[0]]) if f[1] == 2: v = float(k[f[0]]) except: print f[0] tup[f[0]] = v # add the stencil operator tup['Stencil'] = get_stencil_num(k) data.append(tup) # for i in data: print i data2 = [] for tup in data: glups = (tup['Number of time steps'] * tup['Global NX'] * tup['Global NY'] * tup['Global NZ'] - tup['Intra-diamond prologue/epilogue MStencils'] * 10**6) * tup['Number of tests'] / 10**9 tup['Total pJoul/LUP'] = (tup['Energy'] + tup['Energy DRAM']) / glups tup['DRAM pJoul/LUP'] = (tup['Energy DRAM']) / glups tup['CPU pJoul/LUP'] = (tup['Energy']) / glups if 'Dynamic' in tup['Time stepper orig name']: tup['Time stepper orig name'] = 'MWD' if 'Dynamic' in tup['Time stepper orig name']: tup['Performance'] = tup['WD main-loop RANK0 MStencil/s MAX'] else: tup['Performance'] = tup['MStencil/s MAX'] tup['Threads'] = tup['OpenMP Threads'] tup['Method'] = tup['Time stepper orig name'] data2.append(tup) #for i in data2: print i from operator import itemgetter data2 = sorted(data2, key=itemgetter('Stencil', 'Thread group size', 'Time stepper orig name', 'Global NX', 'Global NY', 'Global NZ')) fields = [ 'Method', 'Stencil', 'Threads', 'Thread group size', 'Global NX', 'Global NY', 'Global NZ', 'Precision', 'Power', 'Power DRAM', 'CPU pJoul/LUP', 'DRAM pJoul/LUP', 'Total pJoul/LUP', 'Performance' ] with open('energy_consumption.csv', 'w') as output_file: r = DictWriter(output_file, fieldnames=fields) r.writeheader() for k in data2: k2 = dict() for f in k.keys(): for f2 in fields: if f == f2: k2[f] = k[f] r.writerow(k2)
if __name__ == "__main__": # Cast to list to keep it all in memory train = list(DictReader(open("train.csv", 'r'))) test = list(DictReader(open("test.csv", 'r'))) feat = Featurizer() labels = [] for line in train: if not line['cat'] in labels: labels.append(line['cat']) x_train = feat.train_feature(x['text'] for x in train) x_test = feat.test_feature(x['text'] for x in test) y_train = array(list(labels.index(x['cat']) for x in train)) # Train classifier lr = SGDClassifier(loss='log', penalty='l2', shuffle=True) lr.fit(x_train, y_train) feat.show_top10(lr, labels) predictions = lr.predict(x_test) o = DictWriter(open("predictions.csv", 'w'), ["id", "cat"]) o.writeheader() for ii, pp in zip([x['id'] for x in test], predictions): d = {'id': ii, 'cat': labels[pp]} o.writerow(d)
def write_objects_csv(pad, objects, headers): with open(pad, 'w', newline = "") as outfile: writer = DictWriter(outfile, delimiter=";", fieldnames=headers) writer.writeheader() for obj in objects: writer.writerow(obj.write_to_dict())
def PhishSimCSV(field_names, f_obj, user_d): """Writes results to a CSV file.""" f_names = field_names writer = DictWriter(f_obj, fieldnames=f_names) writer.writerow(user_d)
""" import os import pickle import sys from csv import DictWriter if not os.path.exists("rt2freshdesk.cache"): print("Missing RT data") sys.exit(2) # Load RT from cache with open("rt2freshdesk.cache", "rb") as handle: data = pickle.load(handle) users = data["users"] with open("freshdesk-users.csv", "w") as handle: writer = DictWriter(handle, ["Name", "Email"]) for user in users.values(): if "EmailAddress" not in user: print("Skipping user without email: {}".format(user)) continue if user.get("Privileged"): print("Skipping privileged user {}".format(user["EmailAddress"])) continue email = user["EmailAddress"] name = user.get("RealName", user.get("Name", "")) if name == email: name = "" writer.writerow({"Name": name, "Email": email})
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn import metrics from matplotlib import pyplot as plt import numpy as np from .data import X_t, target, target_test, X \ ,X_full, dataset, X_f_t clf = GradientBoostingClassifier(n_estimators=4000, learning_rate=0.001) # clf.fit(X, target) # print(metrics.classification_report(target_test, clf.predict(X_t))) # clf = RandomForestClassifier(n_jobs=4, n_estimators=10000, min_samples_leaf=3) # clf.fit(X, target) # print(metrics.classification_report(target_test, clf.predict(X_t))) clf.fit(X_full, dataset.Survived) from csv import DictWriter w = DictWriter(open('solve.csv', 'w'), fieldnames=['SUB_ID', 'AGE_GROUP1']) w.writeheader() for i, x in enumerate(clf.predict(X_f_t)): w.writerow({'SUB_ID': i + 892, 'AGE_GROUP1': x}) width = 0.35 plt.bar(np.arange(len(clf.feature_importances_)), clf.feature_importances_, width=0.35) use_field = ['Pclass'] plt.xticks(np.arange(len(clf.feature_importances_)) + width / 2., use_field) plt.show()
def cleanup_scrubbed_files_redundancy(gff_filename, group_filename, count_filename, fastq_filename, output_prefix): junction_seen = defaultdict(lambda: defaultdict(lambda: [ ])) # key (chr,strand) -> dict of (series of junctions) -> record for r in GFF.collapseGFFReader(gff_filename): n = len(r.ref_exons) if n == 1: junc_str = str(r.start) + ',' + str(r.end) junction_seen[r.chr, r.strand][junc_str] = [r] else: junc_str = ",".join( str(r.ref_exons[i].end) + ',' + str(r.ref_exons[i + 1].start) for i in xrange(n - 1)) junction_seen[r.chr, r.strand][junc_str].append(r) # write out cleaned GFF outf = open(output_prefix + '.gff', 'w') outf2 = open(output_prefix + '.merged_ids.txt', 'w') merged = {} keys = junction_seen.keys() keys.sort() for k in keys: for bunch in junction_seen[k].itervalues(): if len(bunch) == 1: # just one record, write it out r = bunch[0] GFF.write_collapseGFF_format(outf, r) merged[r.seqid] = [r.seqid] else: # find the representative r = bunch[0] for r2 in bunch[1:]: if r2.end - r2.start > r.end - r.start: r = r2 GFF.write_collapseGFF_format(outf, r) merged[r.seqid] = [x.seqid for x in bunch] outf2.write("{0}\t{1}\n".format(r.seqid, ",".join(merged[r.seqid]))) outf.close() outf2.close() count_d, count_header = read_count_file(count_filename) # write out count file outf = open(output_prefix + '.abundance.txt', 'w') outf.write(count_header) writer = DictWriter(outf, fieldnames=['pbid','count_fl','count_nfl','count_nfl_amb','norm_fl','norm_nfl','norm_nfl_amb'], \ delimiter='\t', lineterminator='\n') writer.writeheader() for pbid, bunch in merged.iteritems(): # combine the counts r = count_d[bunch[0]] r['pbid'] = pbid for field in fields_to_add: r[field] = float(r[field]) for _id in bunch[1:]: for field in fields_to_add: r[field] += float(count_d[_id][field]) writer.writerow(r) outf.close() group_info = read_group_file(group_filename) # write out group file outf = open(output_prefix + '.group.txt', 'w') for pbid, bunch in merged.iteritems(): # combine the groups g = [group_info[bunch[0]]] for _id in bunch[1:]: g.append(group_info[_id]) outf.write("{0}\t{1}\n".format(pbid, ",".join(g))) outf.close() # write out fastq file if present if fastq_filename is not None: outf = open(output_prefix + '.rep.fq', 'w') for r in SeqIO.parse(open(fastq_filename), 'fastq'): if r.id.split('|')[0] in merged or r.id in merged: SeqIO.write(r, outf, 'fastq') outf.close() print >> sys.stderr, "scrubbed files written: {0}.gff, {0}.group.txt, {0}.abundance.txt, {0}.merged_ids.txt".format( output_prefix)
object_units_details += ' (default {default})'.format( default=default_value) object_units_details = object_units_details.strip() #writable = 'TRUE' except TypeError: pass except ValueError: pass _log.debug(' object units = ' + str(object_units)) _log.debug(' object units details = ' + str(object_units_details)) _log.debug(' object notes = ' + object_notes) results = {} results['Reference Point Name'] = results[ 'Volttron Point Name'] = object_name results['Units'] = object_units results['Unit Details'] = object_units_details results['BACnet Object Type'] = obj_type results['Property'] = 'presentValue' results['Writable'] = writable results['Index'] = index results['Notes'] = object_notes config_writer.writerow(results) except Exception as e: _log.exception("an error has occurred: %s", e) finally: _log.debug("finally")
from csv import DictWriter with open("cats_2.csv", "w") as file: headers = ["Name", "Breed", "Age"] csv_writer = DictWriter(file, fieldnames=headers) csv_writer.writeheader() csv_writer.writerow({"Name": "Shoe", "Breed": "Tuxie", "Age": 1}) csv_writer.writerow({"Name": "Agatha", "Breed": "Tuxie", "Age": 3})
from csv import DictReader, DictWriter def cm_to_in(cm): return round(float(cm) * 0.383701,2) with open('fighters.csv') as file: csv_reader = DictReader(file) fighters = list(csv_reader) with open ('inches_fighters.csv', "w") as file: headers = ["Name", "Country", "Height (in inches)"] csv_writer = DictWriter(file, fieldnames=headers) csv_writer.writeheader() for fighter in fighters: csv_writer.writerow({ "Name": fighter["Name"], "Country": fighter["Country"], "Height (in inches)": cm_to_in(fighter["Height (in cm)"]) })
nsamples, nx, ny = VX_train.shape VX_train = VX_train.reshape((nsamples, nx * ny)) VX_train = drmodel.transform(VX_train) VX_train = np.append(VX_train, X_train[svclassifier.support_], axis=0) Vy_train = np.append(Vy_train, y_train[svclassifier.support_]) print(VX_train.shape) print(Vy_train.shape) svclassifier2 = SVC() svclassifier2.fit(VX_train, Vy_train) y_pred = svclassifier2.predict(X_test) print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) ac_svm = accuracy_score(y_test, y_pred) print("Number of support points " + str(svclassifier2.support_.shape[0])) filename = 'svm_base_%s_%s.pckl' % (str(dim), str(vecinos)) elapsed_time = perf_counter() - t0 print("Time " + str(elapsed_time)) pickle.dump(svclassifier2, open(filename, 'wb'), protocol=4) result = { "Dimension": dim, "Accuracy_standard": ac_st, "Accuracy_vsvm": ac_svm } with open("svmiso.csv", "a+", newline='') as file: dict_writer = DictWriter(file, fieldnames=field_names) dict_writer.writerow(result)
def add_user(first_name, last_name): with open("users.csv", "a") as file: csv_writer = DictWriter(file, fieldnames=["First Name", "Last Name"]) #csv_writer.writeheader() # necessary if existing file w/ headers? NOPE! csv_writer.writerow({"First Name": first_name, "Last Name": last_name})
if (Enable_Sort == "y"): if not os.path.exists(Error_Destination_DIR): os.makedirs(Error_Destination_DIR) if not os.path.exists(Match_Destination_DIR): os.makedirs(Match_Destination_DIR) if not os.path.exists(NoMatch_Destination_DIR): os.makedirs(NoMatch_Destination_DIR) if (error): os.replace(file, f'{Error_Destination_DIR}{file_name[-1]}') elif (match): os.replace(file, f'{Match_Destination_DIR}{file_name[-1]}') else: os.replace(file, f'{NoMatch_Destination_DIR}{file_name[-1]}') # Open your CSV file in append mode # Create a file object for this file with open('list.csv', 'a', newline='') as f_object: # Pass the file object and a list # of column names to DictWriter() # You will get a object of DictWriter dictwriter_object = DictWriter(f_object, fieldnames=field_names) #Pass the dictionary as an argument to the Writerow() dictwriter_object.writerow(element) #Close the file object f_object.close()
class SummaryGenerator(Generator): generatorname = os.path.basename(__file__) generatorversion = "0.1.0" valid_formats = ['tsv'] def __init__(self, schema: Union[str, TextIO, SchemaDefinition], fmt: str = 'tsv') -> None: super().__init__(schema, fmt) self.dirname = None self.classtab: DictWriter = None self.slottab: DictWriter = None self.dialect = 'excel-tab' def visit_schema(self, **kwargs) -> None: self.classtab = DictWriter(sys.stdout, [ 'Class Name', 'Parent Class', 'YAML Class Name', 'Description', 'Flags', 'Slot Name', 'YAML Slot Name', 'Range', 'Card', 'Slot Description', 'URI' ], dialect=self.dialect) self.classtab.writeheader() def visit_class(self, cls: ClassDefinition) -> bool: self.classtab.writerow({ 'Class Name': camelcase(cls.name), 'Parent Class': camelcase(cls.is_a) if cls.is_a else '', 'YAML Class Name': cls.name, 'Description': cls.description }) return True def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition) -> None: min_card = 1 if slot.required else 0 max_card = "*" if slot.multivalued else 1 abstract = 'A' if slot.abstract else '' key = 'K' if slot.key else '' identifier = 'I' if slot.identifier else '' readonly = 'R' if slot.readonly else '' ref = '*' if slot.range in self.schema.classes and not slot.inlined else '' self.classtab.writerow({ 'Slot Name': aliased_slot_name, 'Flags': abstract + key + identifier + readonly, 'Card': f"{min_card}..{max_card}", 'YAML Slot Name': slot.name if slot.name != aliased_slot_name else '', 'Range': ref + self.class_or_type_name(slot.range), 'Slot Description': slot.description, 'URI': slot.slot_uri })
next(csv_reader) with open(filename, "w") as file: csv_writer = writer(file) csv_writer.writerow(["name", "country","height"]) for row in csv_reader: csv_writer.writerow([fighter.upper() for fighter in row]) with open(filename) as file: csv_reader = DictReader(file) with open("fighthers_inches.csv", "w") as file: csv_writer = DictWriter(file, fieldnames=csv_reader.fieldnames) csv_writer.writeheader() for row in list(csv_reader): csv_writer.writerow({ csv_writer.fieldnames[0]: row[csv_writer.fieldnames[0]], csv_writer.fieldnames[1]: row[csv_writer.fieldnames[1]], csv_writer.fieldnames[2]: cm_to_in(row[csv_writer.fieldnames[2]]) }) with open("cats.csv", "w") as file: headers = ["name", "breed", "age"] csv_writer = DictWriter(file, fieldnames=headers) csv_writer.writeheader() csv_writer.writerow({ "name":"Garfield", "breed": "Orange Tabby", "age": 10 })
#Caso esse parametro seja omitido a biblioteca vai assumir como delimitador a ',' leitor_csv = DictReader(arquivo, delimiter=',') for linha in leitor_csv: print(f"{linha['Nome']}, {linha['País']}, {linha['Altura (em cm)']}") with open('filmes.csv', 'a') as arquivo: escritor_csv = writer(arquivo) filme = None escritor_csv.writerow(['Titulo', 'Genero', 'Duracao']) while filme != 'sair': filme = input("Titulo: ") if filme != 'sair': genero = input("Genrero: ") duracao = input("Duracao: ") escritor_csv.writerow([filme, genero, duracao]) with open('filmes.csv', 'a') as arquivo: cabecalho = ['Titulo', 'Genero', 'Duracao'] escritor_csv = DictWriter(arquivo, fieldnames=cabecalho) escritor_csv.writeheader() filme = None while filme != 'sair': filme = input("Titulo: ") if filme != 'sair': genero = input("Genrero: ") duracao = input("Duracao: ") escritor_csv.writerow({"Titulo": filme, "Genero": genero, "Duracao": duracao})
# from csv import writer # with open("cats.csv", "w") as file: # csv_writer = writer(file) # csv_writer.writerow(["Name", "Age"]) # csv_writer.writerow(["Blue", 3]) # csv_writer.writerow(["Kitty", 1]) from csv import writer, DictWriter with open("cats.csv", "w") as file: headers = ["Name", "Breed", "Age"] csv_writer = DictWriter(file, fieldnames=headers) csv_writer.writeheader() csv_writer.writerow({ "Name": "Garfield", "Breed": "Orange Tabby", "Age": 10 }) # Another example want to convert fighters cm to feet # Want to read from fighters and then create a new file with inches # Need to pass the cm value to the cm_to_in function and write that # into the new file from csv import DictReader, DictWriter def cm_to_in(cm): # Better would be to validate the argument return round(float(cm) * 0.393701, 2) with open("fighters.csv") as file:
# ve sonrasında .png gördüm yere boşluk yazarak uzantısını kaldırıyorum. namee = i.split("_") name = namee[-1] name = name.replace(".png", "") # burada ise KAPILAR_0026_NORM-157 kısmı lazım olduğu için sadece uzantıyı kaldırıyorum. sku = i.replace(".png", "") #burada da resmin yolu veriliyor. uzanti = "erenodoor.com/wp-content/upload/images/" + i # yukarıda içinde bulunan kelimeleri kontrol etmiştik ve saydırmıştık. # burada ise o değerler eğer arttıysa kategorisi belli oluyor. if cat1 > 0: categori = "AYKA" elif cat2 > 0: categori = "NORM" else: categori = "YOF" # writerow , writerows ikisinden biri kullanılabilir burada # Burada bir sözlük oluşturup içindekileri dosyaya yazıyoruz csv_writer.writerow({ 'id': id, 'name': name, 'sku': sku, 'cat': categori, 'uzanti': uzanti })
pid = question["_id"]["$oid"] ans = question["answer"] category = map_protobowl(question['category'], question.get('subcategory', '')) page = pa(ans, tk(question["question"]), pb=pid) fold = assign_fold(question["tournament"], question["year"]) sents = add_question(conn, last_id, question["tournament"], category, page, question["question"], ans, protobowl=pid, fold=fold) for ii, ss in sents: o.writerow({"id": pid, "sent": ii, "text": ss, "ans": ans, "page": page, "fold": fold}) if page == "": norm = QuestionDatabase.normalize_answer(ans) if pa.is_ambiguous(norm): ambiguous[norm][pid] = question["question"] else: unmapped[norm] += 1 else: folds[fold] += 1 last_id += 1 if last_id % 1000 == 0: progress = pa.get_counts()
def summarize_junctions(sample_dirs, sample_names, gff_filename, output_prefix, genome_d=None, junction_known=None): """ 1. for each sample, read all the GFF, store the junction information (both 0-based) """ junc_by_chr_strand = defaultdict(lambda: defaultdict(lambda: [])) # (chr,strand) --> (donor,acceptor) --> samples it show up in (more than once possible) for sample_name, d in sample_dirs.items(): for r in GFF.collapseGFFReader(os.path.join(d, gff_filename)): n = len(r.ref_exons) if n == 1: continue # ignore single exon transcripts for i in range(n-1): donor = r.ref_exons[i].end-1 # make it 0-based accep = r.ref_exons[i+1].start # start is already 0-based junc_by_chr_strand[r.chr, r.strand][donor, accep].append(sample_name) # write junction report f1 = open(output_prefix+'.junction.bed', 'w') f1.write("track name=junctions description=\"{0}\" useScore=1\n".format(output_prefix)) JUNC_DETAIL_FIELDS = ['chr', 'left', 'right', 'strand', 'num_transcript', 'num_sample', 'genome', 'annotation', 'label'] with open(output_prefix+'.junction_detail.txt', 'w') as f: writer = DictWriter(f, JUNC_DETAIL_FIELDS, delimiter='\t') writer.writeheader() keys = list(junc_by_chr_strand.keys()) keys.sort() for _chr, _strand in keys: v = junc_by_chr_strand[_chr, _strand] v_keys = list(v.keys()) v_keys.sort() labels = cluster_junctions(v_keys) for i,(_donor, _accep) in enumerate(v_keys): rec = {'chr': _chr, 'left': _donor, 'right': _accep, 'strand': _strand, 'num_transcript': len(v[_donor,_accep]), 'num_sample': len(set(v[_donor,_accep]))} #f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t".format(_chr, _donor, _accep, _strand, len(v[_donor,_accep]), len(set(v[_donor,_accep])))) f1.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(_chr, _donor, _accep+1, output_prefix, len(v[_donor,_accep]), _strand)) # if genome is given, write acceptor-donor site if genome_d is None or _chr not in genome_d: rec['genome'] = 'NA' #f.write("NA\t") else: up, down = genome_d[_chr][_donor+1:_donor+3], genome_d[_chr][_accep-2:_accep] if _strand == '+': rec['genome'] = "{0}-{1}".format(str(up.seq).upper(), str(down.seq).upper()) #f.write("{0}-{1}\t".format(str(up.seq).upper(), str(down.seq).upper())) else: rec['genome'] = "{0}-{1}".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper()) #f.write("{0}-{1}\t".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper())) # if annotation is given, check if matches with annotation if junction_known is None: rec['annotation'] = 'NA' #f.write("NA\n") else: if (_chr, _strand) in junction_known and (_donor, _accep) in junction_known[_chr, _strand]: rec['annotation'] = 'Y' #f.write("Y\t") else: rec['annotation'] = 'N' #f.write("N\t") rec['label'] = "{c}_{s}_{lab}".format(c=_chr, s=_strand, lab=labels[i]) writer.writerow(rec) #f.write("{c}_{s}_{lab}\n".format(c=_chr, s=_strand, lab=labels[i])) f1.close() return junc_by_chr_strand
class CsvWriter(CsvData, DataWriter): """ A CSV writer to create a typical CSV file with head. It is very easy to use, only need to .. code-block:: python fieldnames = ['id', 'name', 'surname', 'address'] with ConflictsWriter('data.csv', fieldnames) as writer: writer.write_row(id=1, name='John', surname='Smith', address='Oxford street') Also, if the file ends with .gz, the file will be compressed with gzip automatically. """ @property def fieldnames(self) -> List[str]: """ :return: The sequence of field names to use as CSV head. """ return self._fieldnames def __init__(self, file_or_io: Union[str, TextIO, BinaryIO], fieldnames: Union[List[str], type, object] = None, mode: Mode = Mode.WRITE, encoding: str = 'utf-8') -> None: """ Constructor of this CSV writer. :param file_or_io: The file path or an opened stream to use. If it is a file path and it ends in .gz, then a compressed file is created using gzip. :param fieldnames: The field names of this CSV. :param mode: The writing mode: Mode.APPEND or Mode.WRITE. By default Mode.WRITE. :param encoding: The encoding (it is only used if the parameter file_or_io is a file path). :raises ValueError: If mode is not Mode.WRITE or Mode.APPEND or if file_or_io is a file stream with write or append modes but this modes does not correspond to the mode parameter. """ CsvData.__init__(self, file_or_io, mode, encoding) DataWriter.__init__(self, file_or_io, mode) self._fieldnames = self._parse_fieldnames(fieldnames) self._writer = DictWriter(self._file, fieldnames=self.fieldnames) if mode == Mode.WRITE: self._writer.writeheader() self.__num_row = 0 else: self.__num_row = None def write_row(self, **row) -> None: """ Write a row. :param row: The dictionary or parameters to write. """ self._writer.writerow(row) if self.__num_row is not None: self.__num_row += 1 def __len__(self) -> int: """ Calculate the number of rows in the file. :return: The number of rows in the data source. :raises DataFileError: If with this data source is not possible to calculate the number of rows. It is not possible to calculate if this comes from a file stream and it is opened as APPEND mode. """ if self.__num_row is None: if self.file_name: with CsvReader(self.file_name, encoding=self.encoding) as reader: self.__num_row = len(reader) else: raise DataSourceError( f'The length of the data source cannot be computed if it is defined as a file stream, ' f'instead of a file path and this writer is opened in APPEND mode.') return self.__num_row
try:return [_ for _ in session][0] except:pass def write(self, attempts, queue): if not attempts:return with open(self.file, 'w') as csvfile: fieldnames = ['attempts', 'queue'] writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerow({ 'attempts': attempts, 'queue': queue }) def delete(self): if path(self.file): try:remove(self.file) except:pass
from csv import DictReader, DictWriter def cm_to_in(cm): return int(cm) * 0.393701 with open("fighters.csv") as file: csv_reader = DictReader(file) fighters = list(csv_reader) with open("inchesFighters.py", "w") as file: headers = ('Name', 'Country', 'Height') csv_writer = DictWriter(file, fieldnames=headers) csv_writer.writeheader() for f in fighters: csv_writer.writerow({ 'Name': f['Name'], 'Country': f['Country'], 'Height': cm_to_in(f['Height (in cm)']) })
from csv import DictReader from csv import DictWriter from os.path import isfile from os import environ from os.path import join inpath = environ['PATH_TO_UNUM'] baseoutpath = environ['UNUM_OUT_PATH'] count = 0 with open(inpath) as f: reader = DictReader(f, delimiter="\t") for line in reader: count += 1 country_code = line["country_code"].lower() or "unknown" outpath = join(baseoutpath, country_code + ".tsv") if isfile(outpath): f = open(outpath, "a") writer = DictWriter(f, fieldnames=reader.fieldnames) else: f = open(outpath, "w") writer = DictWriter(f, fieldnames=reader.fieldnames) writer.writeheader() writer.writerow(line) f.close() if count % 100000 == 0: print("processed ", float(count) / 38000000)
from csv import DictWriter with open('agents.csv', 'w', newline='') as file: fieldnames = ['Agent Index'] writer = DictWriter(file, fieldnames=fieldnames) writer.writeheader() for i in range(1000): writer.writerow({'Agent Index': i})
def process_eps(self, episodes): """ Export episodes metadata. """ if self.options.basename: basename = self.options.basename else: # name the file after the client_show of the first episode # normaly a file will not span client or show ep = episodes[0] show = ep.show client = show.client self.set_dirs(show) basename = "%s_%s" % (client.slug, show.slug) json_pathname = os.path.join(self.show_dir, "txt", basename + ".json") csv_pathname = os.path.join(self.show_dir, "txt", basename + ".csv") txt_pathname = os.path.join(self.show_dir, "txt", basename + ".txt") wget_pathname = os.path.join(self.show_dir, "txt", basename + ".wget") sh_pathname = os.path.join(self.show_dir, "txt", basename + ".sh") curl_pathname = os.path.join(self.show_dir, "txt", basename + "_test.sh") html_pathname = os.path.join(self.show_dir, "txt", basename + ".html") # blip_pathname = os.path.join( self.show_dir, "txt", basename+"_blip.xml" ) if self.options.verbose: print("filenames:") for n in ( json_pathname, csv_pathname, txt_pathname, wget_pathname, html_pathname, ): print(n) # fields to export: fields = "id conf_key conf_url state name slug primary host_url public_url source archive_mp4_url".split( ) # setup csv csv = DictWriter(open(csv_pathname, "w"), fields) # write out field names csv.writerow(dict(list(zip(fields, fields)))) # setup txt txt = open(txt_pathname, "w") wget = open(wget_pathname, "w") sh = open(sh_pathname, "w") curl = open(curl_pathname, "w") # xml=open(blip_pathname, "w") # setup html (not full html, just some snippits) html = open(html_pathname, "w") # setup json (list written to file at end.) json_data = [] # file headers sh.writelines("#! /bin/bash -ex\n\n") curl.writelines("#! /bin/bash -ex\n\n") # write out episode data for ep in episodes: if not ep.rax_mp4_url: # skip episodes that have not been uploaded yet. continue # fields includes output fields that are derived below # so fill them with None for now. row = dict([(f, getattr(ep, f, None)) for f in fields]) if self.options.verbose: print(row) # blip_cli=blip_uploader.Blip_CLI() # blip_cli.debug = self.options.verbose # xml_code = blip_cli.Get_VideoMeta(ep.host_url) # if self.options.verbose: print xml_code # blip_meta = blip_cli.Parse_VideoMeta(xml_code) # if self.options.verbose: print blip_meta # if self.options.verbose: print pprint.pprint(blip_meta) # blip_xml=self.blip_meta(ep.host_url) # show_page = self.get_showpage(blip_xml) # row['blip'] = "%sfile/%s"%(show_page,ep.host_url) # row['blip'] = "http://blip.tv/file/%s"%(ep.host_url) # xml.write(blip_xml) # if self.options.verbose: print blip_xml # row['embed']=self.get_embed(blip_xml) # row['source']=self.get_media(blip_xml) # row['embed']=blip_meta['embed_code'] # oggs = [i for i in blip_meta['contents'] if i['type']=='video/ogg'] # if self.options.verbose: print pprint.pprint(oggs) # row['source']=oggs[0] row['name'] = row['name'].encode('utf-8') if self.options.verbose: print(row) json_data.append(row) csv.writerow(row) # txt.write("%s %s\n" % (row['blip'],row['name'])) # html.write('<a href="%(blip)s">%(name)s</a>\n%(blip)s\n'%row) # wget.writelines(["%s\n" % c['url'] for c in blip_meta['contents']]) wget.writelines(ep.rax_mp4_url + "\n") sh.writelines("wget -N '%s' -O %s.mp4\n" % (ep.rax_mp4_url, ep.slug)) curl.writelines("echo Checking %s ...\n" % (ep.slug)) curl.writelines("curl -s --head '%s' |grep -q '200 OK'\n" % (ep.archive_mp4_url, )) curl.writelines("echo Passed.\n") if self.options.verbose: json.dump(json_data, open(json_pathname, "w"), indent=2) else: json.dump(json_data, open(json_pathname, "w")) pprint.pprint(json_data)
#verificando se existe pelo menos um objeto detectado if len(results) > 0: for i in results.flatten(): x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1] box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3] colours_box_current = colours[class_numbers[i]].tolist() cv2.rectangle(frame, (x_min, y_min), (x_min + box_width, y_min + box_height), colours_box_current, 2) #Preparando texto com rótulo e acuracia para o objeto detectado. text_box_current = "{}: {:.4f}".format(labels[int(class_numbers[i])], confidences[i]) # Coloca o texto nos objetos detectados cv2.putText(frame, text_box_current, (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colours_box_current, 2) escritor_csv.writerow( {"Detectado": text_box_current.split(":")[0], "Acuracia": text_box_current.split(":")[1]}) print(text_box_current.split(":")[0] +" - "+ text_box_current.split(":")[1]) cv2.namedWindow('Yolo v3 WebCamera', cv2.WINDOW_NORMAL) cv2.imshow("Yolo v3 Cam", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break camera.release() cv2.destroyAllWindows()
def csv_results(self, csv_file, histogram_size=None): histogram_size = histogram_size or 10 bins = range(histogram_size) bins.insert(0, "idea") bins.extend(["avg", "std_dev"]) dw = DictWriter(csv_file, bins, dialect='excel', delimiter=';') dw.writeheader() by_idea = self._gather_results() values = { votable_id: self.results_for(voting_results, histogram_size) for (votable_id, voting_results) in by_idea.iteritems() } idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter( Idea.id.in_(by_idea.keys()))) idea_names = { id: name.encode('utf-8') for (id, name) in idea_names.iteritems()} ordered_idea_ids = Idea.visit_idea_ids_depth_first( AppendingVisitor(), self.get_discussion_id()) ordered_idea_ids = [id for id in ordered_idea_ids if id in values] for idea_id, base in ordered_idea_ids: base = values[idea_id] r = dict(enumerate(base['histogram'])) r['idea'] = idea_names[idea_id] r['avg'] = base['avg'] r['std_dev'] = base['std_dev'] dw.writerow(r)