Exemplo n.º 1
1
def save_results(results, path, header = None, mode = 'w'):
    """
    Writes results to path specified 

    Parameters
    ----------
    results : dict
        the results to write
    path : str
        the path to the save file
    header : list
        Defaults to none
    mode : str
        defaults to 'w', or write. Can be 'a', append
    """
    if header is None:
        try:
            header = results.keys()
        except AttributeError:
            try:
                header = results[0].keys()
            except AttributeError:
                raise(Exception('Could not get the column header from the list, please specify the header.'))
    with open(path, mode, encoding = 'utf8') as f:
        writer = DictWriter(f, header)
        if mode != 'a':
            writer.writeheader()
        for line in results:
            writer.writerow({k: make_safe(line[k], '/') for k in header})
 def writePredictions(self):
     print "In writePredictions"
     o = DictWriter(open("predictions.csv", "w"), ["id", "position"])
     o.writeheader()
     for ii, pp in zip([x["id"] for x in self.test], self.predictions):
         d = {"id": ii, "position": pp}
         o.writerow(d)
Exemplo n.º 3
0
    def test_01_importTab(self):
        #先检查表,如果都没有则直接创建
        #client.drop_database(DB_INFO['DB_NAME'])
        SD.importTab()
        SD.copy_table()
        self.assertIn('FACT_ATTR', db.collection_names())
        self.assertIn('FACT_SERVICE', db.collection_names())
        self.assertIn('FACT_ATTR_SET', db.collection_names())
        self.assertIn('FACT_SCENE', db.collection_names())

        #如果有,先检查id如果不存在则增加,否则update
        L = list()
        table = 'FACT_ATTR'
        filepath = os.path.sep.join([os.path.abspath(Const.DATA_IN), table])
        with open(filepath, 'r') as f:
            dReader = DictReader(f)
            L = [i for i in dReader]
        L[-1]['attr_set_id'] = 1
        L[-1]['value'] = 'rampage'
        L.append({'_id': 4, 'name': 'attr4', 'type_id':6, 'value': 'test', 'attr_set_id': 2})
        with open(filepath, 'w') as f:
            titles = L[-1].keys()
            dwriter = DictWriter(f, titles)
            header = dict(zip(titles, titles))
            dwriter.writerow(header)
            dwriter.writerows(L)

        SD.importTab()
        #这里顺便把查询也一起测了
        match = {'_id': {'$in': [3,4]}}
        rs = list(db[table].find(match))
        self.assertEqual(len(rs), 2)
        self.assertEqual(rs[-2]['attr_set_id'], 1)
        self.assertEqual(rs[-2]['value'], 'rampage')
Exemplo n.º 4
0
def main(args):
    with open("users.csv", "w") as f:
        fieldnames = ["username", "first_name", "last_name", "email", "phone", "institution", "voro_account"]
        csvFile = DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
        # write header row
        field_dict = dict([(x, x.capitalize()) for x in fieldnames])
        csvFile.writerow(field_dict)
        for user in User.objects.all():
            # look up associated profile & inst
            try:
                profile = user.get_profile()
                phone = profile.phone
                voro_account = profile.voroEAD_account
            except UserProfile.DoesNotExist:
                phone = ""
                voro_account = False
            user.__dict__["phone"] = phone
            user.__dict__["voro_account"] = voro_account
            # inst through group
            groups = user.groups.all()
            instname = ""
            if len(groups):
                firstgroup = user.groups.all()[0]
                grpprofile = firstgroup.groupprofile
                insts = grpprofile.institutions.all()
                if len(insts):
                    instname = insts[0].name
                else:
                    instname = ""
            user.__dict__["institution"] = instname.encode("utf-8")
            csvFile.writerow(user.__dict__)
Exemplo n.º 5
0
def main():
    search_par_h = open("data/search_params.csv", "w")
    writer = DictWriter(search_par_h, fieldnames=["SearchID", "SearchParams"])
    writer.writeheader()
    for t, row in read_tsv("data/SearchInfo.tsv"):
        sparams = row["SearchParams"]
        if not sparams:
            continue
        sid = int(row["SearchID"])
        sparams = re.sub(r"([A-Za-z0-9]+):", r'"\1":', sparams)
        sparams = sparams.replace("'", "\"")
        sparams = sparams.replace("Минивэн\",", "\"Минивэн\",")
        sparams = sparams.replace("Микроавтобус\"]", "\"Микроавтобус\"]")
        sparams = unicode(sparams, "utf-8")
        try:
            sparams = json.loads(sparams)
            for k, v in sparams.items():
                t = type(v)
                if t not in type_set:
                    print t, k, v
                    type_set.add(t)
            sparams_str = json.dumps(sparams)
            writer.writerow({"SearchID": sid, "SearchParams": sparams_str})
        except Exception as e:
            print e
            print sparams
Exemplo n.º 6
0
def customer_stats(outfile=None):
    sales = sales_grouped_by_users()

    stats = {}
    for user_id, items in sales:
        item_list = list(items)
        data = {}
        data['user_id'] = user_id
        data['n_lines'] = len(item_list)
        #all orders
        fill_items(data, item_list, suffix='')
        #online orders
        item_list_online = [i for i in item_list if i['online_order_number']]
        fill_items(data, item_list_online, suffix='_online')
        # sale items
        item_list_on_sale = [i for i in item_list if i['on_sale'] == 't']
        fill_items(data, item_list_on_sale, suffix='_on_sale')

        stats[user_id] = data

    if outfile is not None:
        fieldnames = sorted(data.keys())
        dw = DictWriter(open(outfile, 'w'), fieldnames=fieldnames)
        dw.writeheader()
        for user_id, row in stats.iteritems():
            dw.writerow(row)

    return stats.values()
def main():
    # We open the 2000 file first because it has the headers
    print("Reading from:", SRC_PATHS['2000'])
    csv2000 = DictReader(SRC_PATHS['2000'].read_text().splitlines())
    # awkward but whatever. We need to use csv2000's headers
    # and add the 'year' column to it
    destfile = DEST_PATH.open('w')
    destcsv = DictWriter(destfile, fieldnames=['year'] + csv2000.fieldnames)
    destcsv.writeheader()
    for i, row in enumerate(csv2000):
        row['year'] = 2000
        destcsv.writerow(row)
    print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH))

    # now we open 1990 file and iterate
    print("Reading from:", SRC_PATHS['1990'])
    for i, line in enumerate(SRC_PATHS['1990'].read_text().splitlines()):
        name, freq, cumfreq, rank = re.search(RX_ROW_1990, line).groups()
        row = { 'name': name.strip(),
                'rank': int(rank),
                'year': 1990,
                'prop100k': int(float(freq) * 1000),
                'cum_prop100k': int(float(cumfreq) * 1000),
              }
        destcsv.writerow(row)
    print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH))
    # all done
    destfile.close()
Exemplo n.º 8
0
def run_queries(session, state):

                           
    query_inputs = term_handler(state)
    
    combinations = cartesian_product(query_inputs)
    
    for query in combinations:
        PARAMS.update(query)
        logger.info('query')
        sleep(SLEEP_TIME)
        page = session.get(url = OB_BASE % SEARCH_URL,
                                params = PARAMS)
        logger.info('got page')
        pricing_data = page.json()
        with open(WRITE_FILE_PATH, 'wb') as output_file:
            fieldnames = pricing_data['PricingRecords'][0].keys()
            fieldnames.append('Scenario')
            print 'FIELDNAMES %s' % fieldnames
            logger.info('header %s' % fieldnames)
            csv_output = DictWriter(output_file, fieldnames=fieldnames)
            csv_output.writeheader()
            for row in pricing_data['PricingRecords']:
                row['Scenario'] = '{msa}|{product}{purpose}{amount}{ltv}{fico}LD30IO0{term}'.format(msa=query_inputs['MSALocation_Index'][query['MSALocation_Index']],
                                                                                                    product=query_inputs["ProductType"][query["ProductType"]],
                                                                                                    purpose=query_inputs["Purpose"][query["Purpose"]],
                                                                                                    amount=query_inputs["LoanAmount"][query["LoanAmount"]],
                                                                                                    ltv=query_inputs["LTV"][query["LTV"]],
                                                                                                    fico=query_inputs["FICO"][query["FICO"]],
                                                                                                    term=query_inputs["Term"][query["Term"]])
                
        
                logger.info('adding row %s' % row)
                csv_output.writerow(row)
Exemplo n.º 9
0
def initialize_writer(fieldnames, buffer, months):
    flowSummaryWriter = DictWriter(buffer, fieldnames=fieldnames, delimiter="\t")
    flowSummaryWriter.writerow(dict(
        zip(fieldnames, ["Account"] + (["Start"] if "Start" in fieldnames else []) +\
                        [month.strftime("%B %Y") for month in months] + ["Net"] +\
                        (["End"] if "End" in fieldnames else []))))
    return flowSummaryWriter
Exemplo n.º 10
0
def write_report(report):
    f = open(report.filename, 'wb')
    print >>f, ','.join(report.columns)
    writer = DictWriter(f, report.columns)
    for row in report.rows:
        writer.writerow(_encode_row(row))
    f.close()
Exemplo n.º 11
0
 def write_csv(self, array, fname, delimiter=":"):
     with open(fname, "w") as f:
         fieldnames = list(array[0].keys())
         writer = DictWriter(f, delimiter=delimiter, lineterminator="\n", fieldnames=fieldnames)
         writer.writerow(dict((field, field) for field in fieldnames))
         for row in array:
             writer.writerow(row)
Exemplo n.º 12
0
def main():
    '''
        >>> main() # stuff happens
    '''

    args = parse_args()
    logging.basicConfig(filename=args.log, level=logging.INFO)

    input_otu_counts = defaultdict(lambda: defaultdict(lambda: 0))
    field_names = set()

    for input in args.inputs:
        with open(input) as handle:
            kraken_data = parse_kraken_file(handle)

            for row in kraken_data:
                field_names.add(row['ncbi_taxid'])
                input_otu_counts[input][row['ncbi_taxid']] += 1

    field_names = ['input'] + sorted([ i for i in field_names ])

    with open(args.output, 'w') as handle:
        writer = DictWriter(handle,
                            fieldnames=field_names)

        writer.writeheader()

        for input, otu_counts in list(input_otu_counts.items()):
            otu_counts['input'] = input
            writer.writerow(otu_counts)
Exemplo n.º 13
0
 def writePredictions(self):
     print "In writePredictions"
     o = DictWriter(open("predictions.csv", 'w'), ["id", "position"])
     o.writeheader()
     for ii, pp in zip([x['id'] for x in self.test], self.predictions):
         d = {'id': ii, 'position': pp}
         o.writerow(d)
Exemplo n.º 14
0
 def csv_results(self, csv_file, histogram_size=None):
     specs = self.token_categories
     names_from_type = {
         spec.typename: spec.name.first_original().value.encode('utf-8') for spec in specs
     }
     spec_names = names_from_type.values()
     spec_names.sort()
     spec_names.insert(0, "idea")
     dw = DictWriter(csv_file, spec_names, dialect='excel', delimiter=';')
     dw.writeheader()
     by_idea = self._gather_results()
     values = {
         votable_id: self.results_for(voting_results)
         for (votable_id, voting_results) in by_idea.iteritems()
     }
     idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter(
         Idea.id.in_(by_idea.keys())))
     idea_names = {
         id: name.encode('utf-8') for (id, name) in idea_names.iteritems()}
     ordered_idea_ids = Idea.visit_idea_ids_depth_first(
         AppendingVisitor(), self.get_discussion_id())
     ordered_idea_ids = [id for id in ordered_idea_ids if id in values]
     for idea_id in ordered_idea_ids:
         base = values[idea_id]
         sums = {names_from_type[k]: v for (k, v) in base['sums'].iteritems()}
         sums['idea'] = idea_names[idea_id]
         dw.writerow(sums)
Exemplo n.º 15
0
    def handle(self, *args, **options):
        print('Start.')
        sampled_comments = []
        feeds = Facebook_Feed.objects.all()
        from_date = dateutil.parser.parse(options['from_date'])
        to_date = dateutil.parser.parse(options['to_date'])
        sample_size = options['sample_size']
        batch_size = options['batch_size']

        for i, feed in enumerate(feeds):
            print('working on feed {} of {}'.format(i + 1, feeds.count()))
            residual = None
            sampled_comments_for_feed = []
            statuses_for_feed = Facebook_Status.objects.filter(feed__id=feed.id).filter(
                published__range=[from_date, to_date]).order_by(
                'comment_count')

            for i, status in enumerate(statuses_for_feed):
                is_last = i + 1 == len(statuses_for_feed)
                samples_for_status, residual = self.sample(status, is_last, residual, sample_size=sample_size,
                                                           batch_size=batch_size)
                sampled_comments_for_feed += samples_for_status

            sampled_comments += sampled_comments_for_feed
        print('total_comments:', len(sampled_comments))
        with open('{}.csv'.format(args[0]), 'wb') as f:
            fieldnames = ['comment_id', 'status_id']
            writer = DictWriter(f, fieldnames=fieldnames)
            writer.writerow({'comment_id': 'comment_id', 'status_id': 'status_id'})
            for row in sampled_comments:
                writer.writerow({'comment_id': row['comment_id'], 'status_id': row['parent__status_id']})
        print('Done.')
def main():
    layout = construct_layout(OFF_PROPERTY_LAYOUT)
    header = get_active_header(OFF_PROPERTY_LAYOUT)

    # Prepare CSV output to stdout
    writer = DictWriter(stdout, fieldnames=header)
    writer.writeheader()

    parse = Struct(layout).unpack_from
    struct_length = calcsize(layout)

    for line in get_stdin_bytes().readlines():
        # Ensure string length is what deconstructer expects
        if len(line) != struct_length:
            line = '{:<{}s}'.format(line.decode(), struct_length).encode()

        # Deconstruct fixed-width string
        row = parse(line)

        # Decode each value
        row = (v.decode('ascii', 'ignore') for v in row)

        # Trim whitespace in each field
        row = [field.strip() for field in row]

        # Convert to dict using header
        row = dict(zip(header, row))

        writer.writerow(row)
Exemplo n.º 17
0
def _stats_data_csv(user_profile, req_input, client, ignored, stats_type, is_custom):

    n_type_keys = {
        'mean': ['start', 'stop', 'service_name', 'mean', 'mean_all_services',
                  'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'mean_trend'],
        'usage': ['start', 'stop', 'service_name', 'usage', 'rate', 'usage_perc_all_services',
                  'time_perc_all_services', 'all_services_usage', 'usage_trend'],
        }

    buff = StringIO()
    writer = DictWriter(buff, n_type_keys[req_input.n_type], extrasaction='ignore')
    writer.writeheader()

    for stat in _get_stats(client, req_input.utc_start, req_input.utc_stop, req_input.n, req_input.n_type, stats_type):
        d = stat.to_dict()
        d['start'] = req_input.user_start
        d['stop'] = req_input.user_stop if stats_type == 'trends' or is_custom else ''
        writer.writerow(d)

    out = buff.getvalue()
    buff.close()

    response = HttpResponse(out, content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename={}'.format('zato-stats.csv')

    return response
Exemplo n.º 18
0
def convert_powertracker_log_to_csv(path):
    """
    This function creates a CSV file (to ./results) from a PowerTracker log file (from ./data).
    This is inspired from https://github.com/sieben/makesense/blob/master/makesense/parser.py.

    :param path: path to the experiment (including [with-|without-malicious])
    """
    platforms = [p.capitalize() for p in get_available_platforms()]
    data, results = join(path, 'data'), join(path, 'results')
    with open(join(data, 'powertracker.log')) as f:
        log = f.read()
    iterables, fields = [], ['mote_id']
    for it in PT_ITEMS:
        time_field = '{}_time'.format(it)
        iterables.append(finditer(PT_REGEX.format('|'.join(platforms), it.upper(), time_field), log, MULTILINE))
        fields.append(time_field)
    with open(join(results, 'powertracker.csv'), 'w') as f:
        writer = DictWriter(f, delimiter=',', fieldnames=fields)
        writer.writeheader()
        for matches in zip(*iterables):
            row = {}
            for m in matches:
                row.update((k, int(v)) for k, v in m.groupdict().items())
            for it in PT_ITEMS:
                time_field = '{}_time'.format(it)
                row[time_field] = float(row[time_field] / 10 ** 6)
            writer.writerow(row)
def output_list(to_save,path):
    head = to_save[0].keys()
    with open(path,'w') as f:
        csvwriter = DictWriter(f,head,delimiter='\t')
        csvwriter.writerow({x: x for x in head})
        for l in to_save:
            csvwriter.writerow(l)
Exemplo n.º 20
0
def export_feature_matrix_csv(feature_matrix, path, delimiter = ','):
    """
    Save a FeatureMatrix as a column-delimited text file

    Parameters
    ----------
    feature_matrix : FeatureMatrix
        FeatureMatrix to save to text file
    path : str
        Full path to write text file
    delimiter : str
        Character to mark boundaries between columns.  Defaults to ','
    """
    with open(path, encoding='utf-8-sig', mode='w') as f:
        header = ['symbol'] + feature_matrix.features
        writer = DictWriter(f, header,delimiter=delimiter)
        writer.writerow({h: h for h in header})
        for seg in feature_matrix.segments:
            #If FeatureMatrix uses dictionaries
            #outdict = feature_matrix[seg]
            #outdict['symbol'] = seg
            #writer.writerow(outdict)
            if seg in ['#','']: #wtf
                continue
            featline = feature_matrix.seg_to_feat_line(seg)
            outdict = {header[i]: featline[i] for i in range(len(header))}
            writer.writerow(outdict)
Exemplo n.º 21
0
def main(argv):
    mountpoint, ifname, ofname = getParms()
    if not mountpoint.endswith('/'):
        mountpoint = mountpoint  + '/'
    metaname = mountpoint + 'RW_32/metadata_v7.csv'
    # try opening the files    
    try:
        #scl enable python27 bash
        # to allow multiple openings on one line
        with open(metaname, "rb") as mi, open(ifname, "rb") as cl, open(ofname, "wb") as mo:
            fields = ['identifier','filename','folder','date_created','checksum', \
                      'series_number','creating_body','crawl_start', 'crawl_end', \
                      'filesize', 'unit']
            all_fields = fields + ['date_archivist_note', 'archivist_note']
            metareader = DictReader(mi, fieldnames=fields)
            creader = reader(cl)
            # will always be tiny wrt metadata so slurp
            corrupt = {}
            for row in creader:
                corrupt[row[0]] = [row[1], row[2]]
            writer = DictWriter(mo, delimiter=',', fieldnames=all_fields)
            writer.writerow(dict((fn,fn) for fn in all_fields))
            print "[INFO] Opened files successfully."
            modifyMetadata(metareader, corrupt, writer)
    except IOError as e:
        print "[IOERROR] " + e
    def handle_noargs(self, **options):
        print "in the command..."

        comment_keys = ('user_key', 'g', 'agroup', 'user_key', 'experiment_slug', 'variant', 'via')

        petition_headers = comment_keys + ('name', 'email')
        # Petition signatories from the first two experiments
        for filename, url_path in [
            ('petition-1.csv', '/county-performance/petition'),
            ('petition-2.csv', '/county-performance-2/petition'),
        ]:
            with open(filename, "wb") as f:
                writer = DictWriter(f, petition_headers)
                writer.writeheader()
                for f in Feedback.objects.filter(url__endswith=url_path):
                    data, comment = unpack_comment(f.comment)
                    row_data = data.copy()
                    row_data['name'] = comment
                    row_data['email'] = f.email
                    writer.writerow(row_data)

        senate_headers = comment_keys + ('comment',)
        for filename, url_path in [
            ('senate-1.csv', '/county-performance/senate'),
            ('senate-2.csv', '/county-performance-2/senate'),
        ]:
            with open(filename, "wb") as f:
                writer = DictWriter(f, senate_headers)
                writer.writeheader()
                for f in Feedback.objects.filter(url__endswith=url_path):
                    data, comment = unpack_comment(f.comment)
                    row_data = data.copy()
                    row_data['comment'] = comment
                    writer.writerow(row_data)
def run():
    output = open(sys.argv[1], 'w')
    writer = DictWriter(output, fieldnames=['uid', 'data'])
    writer.writeheader()
    db = DB(dbconfig)

    for uid in fetch_users(db):
        data = fetch_user_location_logs(uid, db)
        locations = merge_locations(data)
        matrix = generate_matrix(locations)
        semantic_data = fetch_semantic_data(list(matrix.keys()))
        semantic_dict = {}
        for row in semantic_data:
            semantic_dict[row['location']] = clean_tags(row['tags'], 5)
        tag_matrix = {}
        for location, proba in list(matrix.items()):
            tag_dict = semantic_dict[location]
            tag_weight = sum(v for v in list(tag_dict.values()))
            if tag_weight == 0:
                continue
            for tag, cnt in list(tag_dict.items()):
                tag_matrix.setdefault(tag, [0] * 48)
                for i in range(48):
                    tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001)
        writer.writerow({
            'uid': uid,
            'data': json.dumps(tag_matrix)
        })
    output.close()
Exemplo n.º 24
0
def main(argv):
    uname, pwd, filelist, ifname, ofname = getParms()
    # try opening the files    
    try:
        #scl enable python27 bash
        # to allow multiple openings on one line
        with open(filelist, "rb") as fhl, open(ifname, "rb") as fhi, open(ofname, "wb") as fho:
            # read in the list of filenames to insert
            d = {}
            for line in fhl:
                fname = line.split('/')[-1].rstrip()
                #filename points to folder
                parts = splitFilename(fname)
                crawldir = parts.group(1)
                if crawldir in d:
                    d[crawldir].append(line.rstrip())
                else:
                    d[crawldir] = [line.rstrip()]

            fields = ['identifier','filename','folder','date_created','checksum', \
                      'series_number','creating_body','crawl_start', 'crawl_end', \
                      'filesize', 'unit']
            reader = DictReader(fhi, fieldnames=fields)
            writer = DictWriter(fho, delimiter=',', fieldnames=fields)
            writer.writerow(dict((fn,fn) for fn in fields))
            print "[INFO] Opened files successfully."
            insertFiles(uname, pwd, d, reader, writer)
    except IOError as e:
        print "[IOERROR] " + e
Exemplo n.º 25
0
def process_vf(loc_data):
	precinct_data = {}
	with open(Files.VF_CUT.format(**loc_data), "r") as r, open(Files.VF_DEDUPED.format(**loc_data), "w") as w:
		reader = DictReader(r, dialect='excel-tab')
		writer = DictWriter(w, fieldnames=Headers.VF_DEDUPED)
		writer.writeheader()
		vf_hashes = set()
		p_count = 0
		for row in reader:
			if len(loc_data['county']) > 0 and not row['vf_county_name'].upper() == loc_data['county'].upper():
				continue
			vf_hash = get_hash(row, HashFields.VF)
			if vf_hash in vf_hashes:
				continue
			vf_hashes.add(vf_hash)
			vfp_hash = get_hash(row, HashFields.VFP)
			row_zip = row['vf_reg_cass_zip']
			if vfp_hash not in precinct_data:
				p_count += 1
				precinct_data[vfp_hash] = get_conversion(row, Conversions.VFP)
				precinct_data[vfp_hash]['vf_precinct_id'] = Prefixes.PRECINCT + str(p_count)
				precinct_data[vfp_hash]['zips'] = {row_zip:1}
				precinct_data[vfp_hash]['examples'] = []
			elif row_zip not in precinct_data[vfp_hash]['zips']:
				precinct_data[vfp_hash]['zips'][row_zip] = 1
			else:
				precinct_data[vfp_hash]['zips'][row_zip] += 1
			vf_output = get_conversion(row, Conversions.VF)
			if len(precinct_data[vfp_hash]['examples']) < 5:
				precinct_data[vfp_hash]['examples'].append(vf_output)
			vf_output["vf_precinct_id"] = precinct_data[vfp_hash]['vf_precinct_id'] 
			vf_output["vf_id"] = str(Prefixes.VF + row["voterbase_id"][3:])
			writer.writerow(vf_output)
	return precinct_data
Exemplo n.º 26
0
def dump_csv(pages, options):
    """Dump in CSV format.

    ``pages`` is an iterable of (field, value) tuples.

    It's assumed that the same fields are used in each tuple.
    """
    from itertools import chain
    from csv import DictWriter
    from sys import stdout
    pages = iter(pages)
    try:
        first_row = pages.next()
    except StopIteration:
        return
    fields = [item[0] for item in first_row]
    rows = chain((first_row,), pages)
    dicts = (dict(page) for page in rows)
    dicts = (process_page(row) for row in dicts)

    def validate_row_length(row_dict):
        if len(row_dict) != len(fields):
            raise DataValidationError(
                'Inconsistent number of fields in row {0}.\n'
                'Fields: {1}'.format(row_dict, fields))
        return row_dict
    dicts = (validate_row_length(row) for row in dicts)

    writer = DictWriter(stdout, fields, dialect='excel-tab')
    writer.writerow(dict((v, v) for v in fields))
    writer.writerows(dicts)
Exemplo n.º 27
0
def write_data(outfile, data, fields):

    with open(outfile, 'w') as outfile:
        writer = DictWriter(outfile, fieldnames=fields)
        writer.writeheader()
        for d in data:
            writer.writerow(d)
Exemplo n.º 28
0
def get_vf_precincts(loc_data, precinct_data):
	with open(Files.VF_PRECINCTS.format(**loc_data), "w") as vfp_w, open(Files.VF_EX_PRECINCTS.format(**loc_data), "w") as vfep_w:
		vfp_writer = DictWriter(vfp_w, fieldnames=Headers.VFP)
		vfp_writer.writeheader()
		vfep_writer = DictWriter(vfep_w, fieldnames=Headers.VFEP)
		vfep_writer.writeheader()
		for key, vfp_dict in precinct_data.iteritems():
			zips = vfp_dict.pop('zips')
			max_count = 0
			max_zip = 0
			total_count = 0
			for zip_val, zip_count in zips.iteritems():
				total_count += zip_count
				if zip_count > max_count:
					max_count = zip_count
					max_zip = zip_val
			vfp_dict['vf_precinct_zip'] = max_zip
			vfp_dict['vf_precinct_count'] = total_count
			examples = vfp_dict.pop('examples')
			vfp_writer.writerow(vfp_dict)
			ex_count = 1
			for ex in examples:
				for key in Conversions.VF_EX:
					vfp_dict[Prefixes.VFP_EX.format(ex_count)+key] = ex[key]
				ex_count += 1
			vfep_writer.writerow(vfp_dict)
Exemplo n.º 29
0
def plot_file(filename1):
    base_name = os.path.basename(filename1)
    name_parts = base_name.split('_')
    work_path = os.path.dirname(__file__)
    scores_filename = os.path.join(
        work_path,
        '_'.join(name_parts[:2] + ['v3loop_scores.csv']))
    if os.path.exists(scores_filename):
        with open(scores_filename) as f:
            reader = DictReader(f)
            score_rows = [list(map(int, row))
                          for row in map(itemgetter('score', 'count'), reader)]
    else:
        source1 = os.path.join('micall/tests/working/v3loop_alignment_scores/',
                               filename1)
        source2 = source1.replace('_R1_', '_R2_')
        start = datetime.now()
        with open(source1) as fastq1, open(source2) as fastq2:
            score_counts = align_reads(fastq1, fastq2)
        print('{}: {}'.format(datetime.now() - start, filename1))
        score_rows = sorted(score_counts.items())
        with open(scores_filename, 'w') as scores_csv:
            writer = DictWriter(scores_csv,
                                ('score', 'count'),
                                lineterminator=os.linesep)
            writer.writeheader()
            for score, count in score_rows:
                writer.writerow(dict(score=score, count=count))
    scores = [row[0] for row in score_rows]
    counts = [row[1] for row in score_rows]
    total_count = float(sum(counts))
    fractions = [count/total_count for count in counts]
    plt.plot(scores, fractions, label=base_name.split('_')[0], alpha=0.7)
Exemplo n.º 30
0
def append_dict_as_row(file_name, dict_of_elem, field_names):
    with open(file_name, 'a+', newline='') as write_obj:
        dict_writer = DictWriter(write_obj, fieldnames=field_names)
        dict_writer.writerow(dict_of_elem)
def summarize_junctions(
    sample_dirs: Dict[str, Path],
    # sample_names: List[str],
    gff_filename: Union[str, Path],
    output_prefix: Union[str, Path],
    genome_d: Optional[Union[str, Path]] = None,
    junction_known: Optional[Union[str, Path]] = None,
) -> defaultdict:
    """
    1. for each sample, read all the GFF, store the junction information (both 0-based)

    """
    junc_by_chr_strand = defaultdict(
        lambda: defaultdict(list)
    )  # (seqname,strand) --> (donor,acceptor) --> samples it show up in (more than once possible)

    for sample_name, d in sample_dirs.items():
        for r in GFF.collapseGFFReader(Path(d, gff_filename)):
            n = len(r.ref_exons)
            if n == 1:
                continue  # ignore single exon transcripts
            for i in range(n - 1):
                donor = r.ref_exons[i].end - 1  # make it 0-based
                accep = r.ref_exons[i + 1].start  # start is already 0-based
                junc_by_chr_strand[r.seqname,
                                   r.strand][donor, accep].append(sample_name)

    # write junction report
    with open(f"{output_prefix}.junction.bed",
              "w") as f1, open(f"{output_prefix}.junction_detail.txt",
                               "w") as f:
        f1.write(
            f'track name=junctions description="{output_prefix}" useScore=1\n')

        JUNC_DETAIL_FIELDS = [
            "seqname",
            "left",
            "right",
            "strand",
            "num_transcript",
            "num_sample",
            "genome",
            "annotation",
            "label",
        ]

        writer = DictWriter(f, JUNC_DETAIL_FIELDS, delimiter="\t")
        writer.writeheader()
        keys = list(junc_by_chr_strand)
        keys.sort()
        for _seqname, _strand in keys:
            v = junc_by_chr_strand[_seqname, _strand]
            v_keys = list(v)
            v_keys.sort()
            labels = cluster_junctions(v_keys)
            for i, (_donor, _accep) in enumerate(v_keys):
                rec = {
                    "seqname": _seqname,
                    "left": _donor,
                    "right": _accep,
                    "strand": _strand,
                    "num_transcript": len(v[_donor, _accep]),
                    "num_sample": len(set(v[_donor, _accep])),
                }
                # f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t".format(_chr, _donor, _accep, _strand, len(v[_donor,_accep]), len(set(v[_donor,_accep]))))
                f1.write(
                    f"{_seqname}\t{_donor}\t{_accep + 1}\t{output_prefix}\t{len(v[_donor, _accep])}\t{_strand}\n"
                )
                # if genome is given, write acceptor-donor site
                if genome_d is None or _seqname not in genome_d:
                    rec["genome"] = "NA"
                    # f.write("NA\t")
                else:
                    up, down = (
                        genome_d[_seqname][(_donor + 1):(_donor + 3)],
                        genome_d[_seqname][(_accep - 2):_accep],
                    )
                    if _strand == "+":
                        rec["genome"] = f"{str(up.seq).upper()}-{str(down.seq).upper()}"
                        # f.write("{0}-{1}\t".format(str(up.seq).upper(), str(down.seq).upper()))
                    else:
                        rec["genome"] = f"{str(down.reverse_complement().seq).upper()}-{str(up.reverse_complement().seq).upper()}"
                        # f.write("{0}-{1}\t".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper()))
                # if annotation is given, check if matches with annotation
                if junction_known is None:
                    rec["annotation"] = "NA"
                    # f.write("NA\n")
                else:
                    if (_seqname, _strand) in junction_known and (
                            _donor,
                            _accep,
                    ) in junction_known[_seqname, _strand]:
                        rec["annotation"] = "Y"
                        # f.write("Y\t")
                    else:
                        rec["annotation"] = "N"
                        # f.write("N\t")
                rec["label"] = f"{_seqname}_{_strand}_{labels[i]}"
                writer.writerow(rec)
            # f.write("{c}_{s}_{lab}\n".format(c=_seqname, s=_strand, lab=labels[i]))

    return junc_by_chr_strand
Exemplo n.º 32
0
                        type=argparse.FileType('r'))
    parser.add_argument('input2_csv',
                        help='CSV2 to compare',
                        type=argparse.FileType('r'))
    parser.add_argument('output_csv',
                        help='CSV to output diff',
                        type=argparse.FileType('w'))

    args = parser.parse_args()

    r1 = DictReader(args.input1_csv, dialect="excel")
    r2 = DictReader(args.input2_csv, dialect="excel")

    w = DictWriter(args.output_csv,
                   fieldnames=["filename"] + sorted(r1.fieldnames),
                   dialect="excel")
    w.writeheader()

    for l1, l2 in zip(r1, r2):
        if l1 != l2:
            l1.update({"filename": args.input1_csv.name})
            l2.update({"filename": args.input2_csv.name})

            w.writerow(l1)
            w.writerow(l2)
            w.writerow({k: "" for k in l1})

    args.input1_csv.close()
    args.input2_csv.close()
    args.output_csv.close()
Exemplo n.º 33
0
def create_table(raw_data):
    from operator import itemgetter
    import matplotlib.pyplot as plt
    import pylab
    from csv import DictWriter

    req_fields = [('Time stepper orig name', 0),
                  ('Stencil Kernel semi-bandwidth', 1),
                  ('Stencil Kernel coefficients', 0), ('Precision', 0),
                  ('Number of time steps', 1), ('Number of tests', 1),
                  ('Global NX', 1), ('Global NY', 1), ('Global NZ', 1),
                  ('Thread group size', 1),
                  ('Intra-diamond prologue/epilogue MStencils', 1),
                  ('Energy', 2), ('Energy DRAM', 2), ('Power', 2),
                  ('Power DRAM', 2), ('WD main-loop RANK0 MStencil/s  MAX', 2),
                  ('MStencil/s  MAX', 2), ('OpenMP Threads', 1)]
    data = []
    for k in raw_data:
        tup = dict()
        # defaults
        if k['Intra-diamond prologue/epilogue MStencils'] == '':
            k['Intra-diamond prologue/epilogue MStencils'] = 0
        # add the general fileds
        for f in req_fields:
            try:
                v = k[f[0]]
                if f[1] == 1: v = int(k[f[0]])
                if f[1] == 2: v = float(k[f[0]])
            except:
                print f[0]

            tup[f[0]] = v

        # add the stencil operator
        tup['Stencil'] = get_stencil_num(k)
        data.append(tup)


#    for i in data: print i

    data2 = []
    for tup in data:
        glups = (tup['Number of time steps'] * tup['Global NX'] *
                 tup['Global NY'] * tup['Global NZ'] -
                 tup['Intra-diamond prologue/epilogue MStencils'] *
                 10**6) * tup['Number of tests'] / 10**9
        tup['Total pJoul/LUP'] = (tup['Energy'] + tup['Energy DRAM']) / glups
        tup['DRAM pJoul/LUP'] = (tup['Energy DRAM']) / glups
        tup['CPU pJoul/LUP'] = (tup['Energy']) / glups
        if 'Dynamic' in tup['Time stepper orig name']:
            tup['Time stepper orig name'] = 'MWD'

        if 'Dynamic' in tup['Time stepper orig name']:
            tup['Performance'] = tup['WD main-loop RANK0 MStencil/s  MAX']
        else:
            tup['Performance'] = tup['MStencil/s  MAX']
        tup['Threads'] = tup['OpenMP Threads']
        tup['Method'] = tup['Time stepper orig name']
        data2.append(tup)

    #for i in data2: print i
    from operator import itemgetter
    data2 = sorted(data2,
                   key=itemgetter('Stencil', 'Thread group size',
                                  'Time stepper orig name', 'Global NX',
                                  'Global NY', 'Global NZ'))

    fields = [
        'Method', 'Stencil', 'Threads', 'Thread group size', 'Global NX',
        'Global NY', 'Global NZ', 'Precision', 'Power', 'Power DRAM',
        'CPU pJoul/LUP', 'DRAM pJoul/LUP', 'Total pJoul/LUP', 'Performance'
    ]

    with open('energy_consumption.csv', 'w') as output_file:
        r = DictWriter(output_file, fieldnames=fields)
        r.writeheader()
        for k in data2:
            k2 = dict()
            for f in k.keys():
                for f2 in fields:
                    if f == f2:
                        k2[f] = k[f]
            r.writerow(k2)
Exemplo n.º 34
0
if __name__ == "__main__":

    # Cast to list to keep it all in memory
    train = list(DictReader(open("train.csv", 'r')))
    test = list(DictReader(open("test.csv", 'r')))

    feat = Featurizer()

    labels = []
    for line in train:
        if not line['cat'] in labels:
            labels.append(line['cat'])

    x_train = feat.train_feature(x['text'] for x in train)
    x_test = feat.test_feature(x['text'] for x in test)

    y_train = array(list(labels.index(x['cat']) for x in train))

    # Train classifier
    lr = SGDClassifier(loss='log', penalty='l2', shuffle=True)
    lr.fit(x_train, y_train)

    feat.show_top10(lr, labels)

    predictions = lr.predict(x_test)
    o = DictWriter(open("predictions.csv", 'w'), ["id", "cat"])
    o.writeheader()
    for ii, pp in zip([x['id'] for x in test], predictions):
        d = {'id': ii, 'cat': labels[pp]}
        o.writerow(d)
Exemplo n.º 35
0
 def write_objects_csv(pad, objects, headers):
     with open(pad, 'w', newline = "") as outfile:
         writer = DictWriter(outfile, delimiter=";", fieldnames=headers)
         writer.writeheader()
         for obj in objects:
             writer.writerow(obj.write_to_dict())
Exemplo n.º 36
0
def PhishSimCSV(field_names, f_obj, user_d):
    """Writes results to a CSV file."""
    f_names = field_names
    writer = DictWriter(f_obj, fieldnames=f_names)
    writer.writerow(user_d)
Exemplo n.º 37
0
"""

import os
import pickle
import sys
from csv import DictWriter

if not os.path.exists("rt2freshdesk.cache"):
    print("Missing RT data")
    sys.exit(2)

# Load RT from cache
with open("rt2freshdesk.cache", "rb") as handle:
    data = pickle.load(handle)
    users = data["users"]

with open("freshdesk-users.csv", "w") as handle:
    writer = DictWriter(handle, ["Name", "Email"])
    for user in users.values():
        if "EmailAddress" not in user:
            print("Skipping user without email: {}".format(user))
            continue
        if user.get("Privileged"):
            print("Skipping privileged user {}".format(user["EmailAddress"]))
            continue
        email = user["EmailAddress"]
        name = user.get("RealName", user.get("Name", ""))
        if name == email:
            name = ""
        writer.writerow({"Name": name, "Email": email})
Exemplo n.º 38
0
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn import metrics
from matplotlib import pyplot as plt
import numpy as np
from .data import X_t, target, target_test, X \
    ,X_full, dataset, X_f_t

clf = GradientBoostingClassifier(n_estimators=4000, learning_rate=0.001)
# clf.fit(X, target)
# print(metrics.classification_report(target_test, clf.predict(X_t)))

# clf = RandomForestClassifier(n_jobs=4, n_estimators=10000, min_samples_leaf=3)
# clf.fit(X, target)
# print(metrics.classification_report(target_test, clf.predict(X_t)))

clf.fit(X_full, dataset.Survived)
from csv import DictWriter
w = DictWriter(open('solve.csv', 'w'), fieldnames=['SUB_ID', 'AGE_GROUP1'])
w.writeheader()
for i, x in enumerate(clf.predict(X_f_t)):
    w.writerow({'SUB_ID': i + 892, 'AGE_GROUP1': x})

width = 0.35
plt.bar(np.arange(len(clf.feature_importances_)),
        clf.feature_importances_,
        width=0.35)
use_field = ['Pclass']
plt.xticks(np.arange(len(clf.feature_importances_)) + width / 2., use_field)

plt.show()
def cleanup_scrubbed_files_redundancy(gff_filename, group_filename,
                                      count_filename, fastq_filename,
                                      output_prefix):

    junction_seen = defaultdict(lambda: defaultdict(lambda: [
    ]))  # key (chr,strand) -> dict of (series of junctions) -> record
    for r in GFF.collapseGFFReader(gff_filename):
        n = len(r.ref_exons)
        if n == 1:
            junc_str = str(r.start) + ',' + str(r.end)
            junction_seen[r.chr, r.strand][junc_str] = [r]
        else:
            junc_str = ",".join(
                str(r.ref_exons[i].end) + ',' + str(r.ref_exons[i + 1].start)
                for i in xrange(n - 1))
            junction_seen[r.chr, r.strand][junc_str].append(r)

    # write out cleaned GFF
    outf = open(output_prefix + '.gff', 'w')
    outf2 = open(output_prefix + '.merged_ids.txt', 'w')
    merged = {}
    keys = junction_seen.keys()
    keys.sort()
    for k in keys:
        for bunch in junction_seen[k].itervalues():
            if len(bunch) == 1:  # just one record, write it out
                r = bunch[0]
                GFF.write_collapseGFF_format(outf, r)
                merged[r.seqid] = [r.seqid]
            else:
                # find the representative
                r = bunch[0]
                for r2 in bunch[1:]:
                    if r2.end - r2.start > r.end - r.start:
                        r = r2
                GFF.write_collapseGFF_format(outf, r)
                merged[r.seqid] = [x.seqid for x in bunch]
            outf2.write("{0}\t{1}\n".format(r.seqid,
                                            ",".join(merged[r.seqid])))
    outf.close()
    outf2.close()

    count_d, count_header = read_count_file(count_filename)
    # write out count file
    outf = open(output_prefix + '.abundance.txt', 'w')
    outf.write(count_header)
    writer = DictWriter(outf, fieldnames=['pbid','count_fl','count_nfl','count_nfl_amb','norm_fl','norm_nfl','norm_nfl_amb'], \
                        delimiter='\t', lineterminator='\n')
    writer.writeheader()
    for pbid, bunch in merged.iteritems():
        # combine the counts
        r = count_d[bunch[0]]
        r['pbid'] = pbid
        for field in fields_to_add:
            r[field] = float(r[field])
        for _id in bunch[1:]:
            for field in fields_to_add:
                r[field] += float(count_d[_id][field])
        writer.writerow(r)
    outf.close()

    group_info = read_group_file(group_filename)
    # write out group file
    outf = open(output_prefix + '.group.txt', 'w')
    for pbid, bunch in merged.iteritems():
        # combine the groups
        g = [group_info[bunch[0]]]
        for _id in bunch[1:]:
            g.append(group_info[_id])
        outf.write("{0}\t{1}\n".format(pbid, ",".join(g)))
    outf.close()

    # write out fastq file if present
    if fastq_filename is not None:
        outf = open(output_prefix + '.rep.fq', 'w')
        for r in SeqIO.parse(open(fastq_filename), 'fastq'):
            if r.id.split('|')[0] in merged or r.id in merged:
                SeqIO.write(r, outf, 'fastq')
        outf.close()

    print >> sys.stderr, "scrubbed files written: {0}.gff, {0}.group.txt, {0}.abundance.txt, {0}.merged_ids.txt".format(
        output_prefix)
Exemplo n.º 40
0
                        object_units_details += ' (default {default})'.format(
                            default=default_value)
                        object_units_details = object_units_details.strip()
                        #writable = 'TRUE'
                    except TypeError:
                        pass
                    except ValueError:
                        pass

        _log.debug('  object units = ' + str(object_units))
        _log.debug('  object units details = ' + str(object_units_details))
        _log.debug('  object notes = ' + object_notes)

        results = {}
        results['Reference Point Name'] = results[
            'Volttron Point Name'] = object_name
        results['Units'] = object_units
        results['Unit Details'] = object_units_details
        results['BACnet Object Type'] = obj_type
        results['Property'] = 'presentValue'
        results['Writable'] = writable
        results['Index'] = index
        results['Notes'] = object_notes

        config_writer.writerow(results)

except Exception as e:
    _log.exception("an error has occurred: %s", e)
finally:
    _log.debug("finally")
Exemplo n.º 41
0
from csv import DictWriter

with open("cats_2.csv", "w") as file:
    headers = ["Name", "Breed", "Age"]
    csv_writer = DictWriter(file, fieldnames=headers)
    csv_writer.writeheader()
    csv_writer.writerow({"Name": "Shoe", "Breed": "Tuxie", "Age": 1})
    csv_writer.writerow({"Name": "Agatha", "Breed": "Tuxie", "Age": 3})
Exemplo n.º 42
0
from csv import DictReader, DictWriter

def cm_to_in(cm):
    return round(float(cm) * 0.383701,2)

with open('fighters.csv') as file:
    csv_reader = DictReader(file)
    fighters = list(csv_reader)

with open ('inches_fighters.csv', "w") as file:
    headers = ["Name", "Country", "Height (in inches)"]
    csv_writer = DictWriter(file, fieldnames=headers)
    csv_writer.writeheader()
    for fighter in fighters:
        csv_writer.writerow({
            "Name": fighter["Name"],
            "Country": fighter["Country"],
            "Height (in inches)": cm_to_in(fighter["Height (in cm)"])
        })
Exemplo n.º 43
0
    nsamples, nx, ny = VX_train.shape
    VX_train = VX_train.reshape((nsamples, nx * ny))
    VX_train = drmodel.transform(VX_train)

    VX_train = np.append(VX_train, X_train[svclassifier.support_], axis=0)
    Vy_train = np.append(Vy_train, y_train[svclassifier.support_])

    print(VX_train.shape)
    print(Vy_train.shape)
    svclassifier2 = SVC()
    svclassifier2.fit(VX_train, Vy_train)
    y_pred = svclassifier2.predict(X_test)
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    ac_svm = accuracy_score(y_test, y_pred)

    print("Number of support points " + str(svclassifier2.support_.shape[0]))
    filename = 'svm_base_%s_%s.pckl' % (str(dim), str(vecinos))
    elapsed_time = perf_counter() - t0
    print("Time " + str(elapsed_time))

    pickle.dump(svclassifier2, open(filename, 'wb'), protocol=4)
    result = {
        "Dimension": dim,
        "Accuracy_standard": ac_st,
        "Accuracy_vsvm": ac_svm
    }
    with open("svmiso.csv", "a+", newline='') as file:
        dict_writer = DictWriter(file, fieldnames=field_names)
        dict_writer.writerow(result)
Exemplo n.º 44
0
def add_user(first_name, last_name):
    with open("users.csv", "a") as file:
        csv_writer = DictWriter(file, fieldnames=["First Name", "Last Name"])
        #csv_writer.writeheader()  # necessary if existing file w/ headers? NOPE!
        csv_writer.writerow({"First Name": first_name, "Last Name": last_name})
Exemplo n.º 45
0
    if (Enable_Sort == "y"):
        if not os.path.exists(Error_Destination_DIR):
            os.makedirs(Error_Destination_DIR)
        if not os.path.exists(Match_Destination_DIR):
            os.makedirs(Match_Destination_DIR)
        if not os.path.exists(NoMatch_Destination_DIR):
            os.makedirs(NoMatch_Destination_DIR)

        if (error):
            os.replace(file, f'{Error_Destination_DIR}{file_name[-1]}')
        elif (match):
            os.replace(file, f'{Match_Destination_DIR}{file_name[-1]}')
        else:
            os.replace(file, f'{NoMatch_Destination_DIR}{file_name[-1]}')

    # Open your CSV file in append mode
    # Create a file object for this file
    with open('list.csv', 'a', newline='') as f_object:

        # Pass the file object and a list
        # of column names to DictWriter()
        # You will get a object of DictWriter
        dictwriter_object = DictWriter(f_object, fieldnames=field_names)

        #Pass the dictionary as an argument to the Writerow()
        dictwriter_object.writerow(element)

        #Close the file object
        f_object.close()
Exemplo n.º 46
0
class SummaryGenerator(Generator):
    generatorname = os.path.basename(__file__)
    generatorversion = "0.1.0"
    valid_formats = ['tsv']

    def __init__(self,
                 schema: Union[str, TextIO, SchemaDefinition],
                 fmt: str = 'tsv') -> None:
        super().__init__(schema, fmt)
        self.dirname = None
        self.classtab: DictWriter = None
        self.slottab: DictWriter = None
        self.dialect = 'excel-tab'

    def visit_schema(self, **kwargs) -> None:
        self.classtab = DictWriter(sys.stdout, [
            'Class Name', 'Parent Class', 'YAML Class Name', 'Description',
            'Flags', 'Slot Name', 'YAML Slot Name', 'Range', 'Card',
            'Slot Description', 'URI'
        ],
                                   dialect=self.dialect)
        self.classtab.writeheader()

    def visit_class(self, cls: ClassDefinition) -> bool:
        self.classtab.writerow({
            'Class Name':
            camelcase(cls.name),
            'Parent Class':
            camelcase(cls.is_a) if cls.is_a else '',
            'YAML Class Name':
            cls.name,
            'Description':
            cls.description
        })
        return True

    def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str,
                         slot: SlotDefinition) -> None:
        min_card = 1 if slot.required else 0
        max_card = "*" if slot.multivalued else 1
        abstract = 'A' if slot.abstract else ''
        key = 'K' if slot.key else ''
        identifier = 'I' if slot.identifier else ''
        readonly = 'R' if slot.readonly else ''
        ref = '*' if slot.range in self.schema.classes and not slot.inlined else ''
        self.classtab.writerow({
            'Slot Name':
            aliased_slot_name,
            'Flags':
            abstract + key + identifier + readonly,
            'Card':
            f"{min_card}..{max_card}",
            'YAML Slot Name':
            slot.name if slot.name != aliased_slot_name else '',
            'Range':
            ref + self.class_or_type_name(slot.range),
            'Slot Description':
            slot.description,
            'URI':
            slot.slot_uri
        })
Exemplo n.º 47
0
    next(csv_reader)
    with open(filename, "w") as file:
        csv_writer = writer(file)
        csv_writer.writerow(["name", "country","height"])
        for row in csv_reader:
            csv_writer.writerow([fighter.upper() for fighter in row])

with open(filename) as file:
    csv_reader = DictReader(file)
    with open("fighthers_inches.csv", "w") as file:
        csv_writer = DictWriter(file, fieldnames=csv_reader.fieldnames)
        csv_writer.writeheader()
        for row in list(csv_reader):
            csv_writer.writerow({
                csv_writer.fieldnames[0]: row[csv_writer.fieldnames[0]],
                csv_writer.fieldnames[1]: row[csv_writer.fieldnames[1]],
                csv_writer.fieldnames[2]: cm_to_in(row[csv_writer.fieldnames[2]])
            })


with open("cats.csv", "w") as file:
    headers = ["name", "breed", "age"]
    csv_writer = DictWriter(file, fieldnames=headers)
    csv_writer.writeheader()

    csv_writer.writerow({
       "name":"Garfield",
       "breed": "Orange Tabby",
       "age": 10
    })
Exemplo n.º 48
0
    #Caso esse parametro seja omitido a biblioteca vai assumir como delimitador a ','
    leitor_csv = DictReader(arquivo, delimiter=',')
    for linha in leitor_csv:
        print(f"{linha['Nome']}, {linha['País']}, {linha['Altura (em cm)']}")
        
        
with open('filmes.csv', 'a') as arquivo:
    escritor_csv = writer(arquivo)
    filme = None
    escritor_csv.writerow(['Titulo', 'Genero', 'Duracao'])
    while filme != 'sair':
        filme = input("Titulo: ")
        if filme != 'sair':
            
            genero = input("Genrero: ")
            duracao = input("Duracao: ")
            escritor_csv.writerow([filme, genero, duracao])
            
with open('filmes.csv', 'a') as arquivo:
    cabecalho = ['Titulo', 'Genero', 'Duracao']
    escritor_csv = DictWriter(arquivo, fieldnames=cabecalho)
    escritor_csv.writeheader()
    filme = None
    while filme != 'sair':
        filme = input("Titulo: ")
        if filme != 'sair':
            
            genero = input("Genrero: ")
            duracao = input("Duracao: ")
            escritor_csv.writerow({"Titulo": filme, "Genero": genero, "Duracao": duracao})
Exemplo n.º 49
0
# from csv import writer
# with open("cats.csv", "w") as file:
#     csv_writer = writer(file)
#     csv_writer.writerow(["Name", "Age"])
#     csv_writer.writerow(["Blue", 3])
#     csv_writer.writerow(["Kitty", 1])

from csv import writer, DictWriter
with open("cats.csv", "w") as file:
    headers = ["Name", "Breed", "Age"]
    csv_writer = DictWriter(file, fieldnames=headers)
    csv_writer.writeheader()
    csv_writer.writerow({
        "Name": "Garfield",
        "Breed": "Orange Tabby",
        "Age": 10
    })

# Another example want to convert fighters cm to feet
# Want to read from fighters and then create a new file with inches
# Need to pass the cm value to the cm_to_in function and write that
# into the new file
from csv import DictReader, DictWriter


def cm_to_in(cm):  # Better would be to validate the argument
    return round(float(cm) * 0.393701, 2)


with open("fighters.csv") as file:
Exemplo n.º 50
0
    # ve sonrasında .png gördüm yere boşluk yazarak uzantısını kaldırıyorum.
    namee = i.split("_")
    name = namee[-1]
    name = name.replace(".png", "")

    # burada ise KAPILAR_0026_NORM-157 kısmı lazım olduğu için sadece uzantıyı kaldırıyorum.
    sku = i.replace(".png", "")
    #burada da resmin yolu veriliyor.
    uzanti = "erenodoor.com/wp-content/upload/images/" + i

    # yukarıda içinde bulunan kelimeleri kontrol etmiştik ve saydırmıştık.
    # burada ise o değerler eğer arttıysa kategorisi belli oluyor.
    if cat1 > 0:
        categori = "AYKA"

    elif cat2 > 0:
        categori = "NORM"

    else:
        categori = "YOF"

# writerow , writerows ikisinden biri kullanılabilir burada
# Burada bir sözlük oluşturup içindekileri dosyaya yazıyoruz
    csv_writer.writerow({
        'id': id,
        'name': name,
        'sku': sku,
        'cat': categori,
        'uzanti': uzanti
    })
Exemplo n.º 51
0
                pid = question["_id"]["$oid"]
                ans = question["answer"]
                category = map_protobowl(question['category'],
                                         question.get('subcategory', ''))
                page = pa(ans, tk(question["question"]), pb=pid)
                fold = assign_fold(question["tournament"],
                                   question["year"])
                sents = add_question(conn, last_id, question["tournament"], category,
                             page, question["question"], ans, protobowl=pid,
                             fold=fold)

                for ii, ss in sents:
                    o.writerow({"id": pid,
                                 "sent": ii,
                                 "text": ss,
                                 "ans": ans,
                                 "page": page,
                                 "fold": fold})

                if page == "":
                    norm = QuestionDatabase.normalize_answer(ans)
                    if pa.is_ambiguous(norm):
                        ambiguous[norm][pid] = question["question"]
                    else:
                        unmapped[norm] += 1
                else:
                    folds[fold] += 1
                last_id += 1

                if last_id % 1000 == 0:
                    progress = pa.get_counts()
Exemplo n.º 52
0
def summarize_junctions(sample_dirs, sample_names, gff_filename, output_prefix, genome_d=None, junction_known=None):
    """
    1. for each sample, read all the GFF, store the junction information (both 0-based)

    """
    junc_by_chr_strand = defaultdict(lambda: defaultdict(lambda: [])) # (chr,strand) --> (donor,acceptor) --> samples it show up in (more than once possible)

    for sample_name, d in sample_dirs.items():
        for r in GFF.collapseGFFReader(os.path.join(d, gff_filename)):
            n = len(r.ref_exons)
            if n == 1: continue # ignore single exon transcripts
            for i in range(n-1):
                donor = r.ref_exons[i].end-1 # make it 0-based
                accep = r.ref_exons[i+1].start # start is already 0-based
                junc_by_chr_strand[r.chr, r.strand][donor, accep].append(sample_name)

    # write junction report
    f1 = open(output_prefix+'.junction.bed', 'w')
    f1.write("track name=junctions description=\"{0}\" useScore=1\n".format(output_prefix))

    JUNC_DETAIL_FIELDS = ['chr', 'left', 'right', 'strand', 'num_transcript', 'num_sample', 'genome', 'annotation', 'label']


    with open(output_prefix+'.junction_detail.txt', 'w') as f:
        writer = DictWriter(f, JUNC_DETAIL_FIELDS, delimiter='\t')
        writer.writeheader()
        keys = list(junc_by_chr_strand.keys())
        keys.sort()
        for _chr, _strand in keys:
            v = junc_by_chr_strand[_chr, _strand]
            v_keys = list(v.keys())
            v_keys.sort()
            labels = cluster_junctions(v_keys)
            for i,(_donor, _accep) in enumerate(v_keys):
                rec = {'chr': _chr,
                       'left': _donor,
                       'right': _accep,
                       'strand': _strand,
                       'num_transcript': len(v[_donor,_accep]),
                       'num_sample': len(set(v[_donor,_accep]))}
                #f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t".format(_chr, _donor, _accep, _strand, len(v[_donor,_accep]), len(set(v[_donor,_accep]))))
                f1.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(_chr, _donor, _accep+1, output_prefix, len(v[_donor,_accep]), _strand))
                # if genome is given, write acceptor-donor site
                if genome_d is None or _chr not in genome_d:
                    rec['genome'] = 'NA'
                    #f.write("NA\t")
                else:
                    up, down = genome_d[_chr][_donor+1:_donor+3], genome_d[_chr][_accep-2:_accep]
                    if _strand == '+':
                        rec['genome'] = "{0}-{1}".format(str(up.seq).upper(), str(down.seq).upper())
                        #f.write("{0}-{1}\t".format(str(up.seq).upper(), str(down.seq).upper()))
                    else:
                        rec['genome'] = "{0}-{1}".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper())
                        #f.write("{0}-{1}\t".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper()))
                # if annotation is given, check if matches with annotation
                if junction_known is None:
                    rec['annotation'] = 'NA'
                    #f.write("NA\n")
                else:
                    if (_chr, _strand) in junction_known and (_donor, _accep) in junction_known[_chr, _strand]:
                        rec['annotation'] = 'Y'
                        #f.write("Y\t")
                    else:
                        rec['annotation'] = 'N'
                        #f.write("N\t")
                rec['label'] = "{c}_{s}_{lab}".format(c=_chr, s=_strand, lab=labels[i])
                writer.writerow(rec)
                #f.write("{c}_{s}_{lab}\n".format(c=_chr, s=_strand, lab=labels[i]))
    f1.close()

    return junc_by_chr_strand
Exemplo n.º 53
0
class CsvWriter(CsvData, DataWriter):
    """
    A CSV writer to create a typical CSV file with head. It is very easy to use, only need to

    .. code-block:: python

        fieldnames = ['id', 'name', 'surname', 'address']
        with ConflictsWriter('data.csv', fieldnames) as writer:
            writer.write_row(id=1, name='John', surname='Smith', address='Oxford street')

    Also, if the file ends with .gz, the file will be compressed with gzip automatically.
    """
    @property
    def fieldnames(self) -> List[str]:
        """
        :return: The sequence of field names to use as CSV head.
        """
        return self._fieldnames

    def __init__(self, file_or_io: Union[str, TextIO, BinaryIO], fieldnames: Union[List[str], type, object] = None,
                 mode: Mode = Mode.WRITE, encoding: str = 'utf-8') -> None:
        """ Constructor of this CSV writer.

        :param file_or_io: The file path or an opened stream to use. If it is a file path and it ends in .gz, then
        a compressed file is created using gzip.
        :param fieldnames: The field names of this CSV.
        :param mode: The writing mode: Mode.APPEND or Mode.WRITE. By default Mode.WRITE.
        :param encoding: The encoding (it is only used if the parameter file_or_io is a file path).
        :raises ValueError: If mode is not Mode.WRITE or Mode.APPEND or if file_or_io is a file stream with
          write or append modes but this modes does not correspond to the mode parameter.
        """
        CsvData.__init__(self, file_or_io, mode, encoding)
        DataWriter.__init__(self, file_or_io, mode)
        self._fieldnames = self._parse_fieldnames(fieldnames)

        self._writer = DictWriter(self._file, fieldnames=self.fieldnames)
        if mode == Mode.WRITE:
            self._writer.writeheader()
            self.__num_row = 0
        else:
            self.__num_row = None

    def write_row(self, **row) -> None:
        """ Write a row.

        :param row: The dictionary or parameters to write.
        """
        self._writer.writerow(row)
        if self.__num_row is not None:
            self.__num_row += 1

    def __len__(self) -> int:
        """
        Calculate the number of rows in the file.
        :return: The number of rows in the data source.
        :raises DataFileError: If with this data source is not possible to calculate the number of rows.
          It is not possible to calculate if this comes from a file stream and it is opened as APPEND mode.
        """
        if self.__num_row is None:
            if self.file_name:
                with CsvReader(self.file_name, encoding=self.encoding) as reader:
                    self.__num_row = len(reader)
            else:
                raise DataSourceError(
                    f'The length of the data source cannot be computed if it is defined as a file stream, '
                    f'instead of a file path and this writer is opened in APPEND mode.')
        return self.__num_row
Exemplo n.º 54
0
   try:return [_ for _ in session][0]

   except:pass 


 def write(self, attempts, queue):

  if not attempts:return

  with open(self.file, 'w') as csvfile:

   fieldnames = ['attempts', 'queue']

   writer = DictWriter(csvfile, fieldnames=fieldnames)


   writer.writeheader()

   writer.writerow({ 'attempts': attempts, 'queue': queue })


 def delete(self):

  if path(self.file):

   try:remove(self.file)

   except:pass 


Exemplo n.º 55
0
from csv import DictReader, DictWriter


def cm_to_in(cm):
    return int(cm) * 0.393701


with open("fighters.csv") as file:
    csv_reader = DictReader(file)
    fighters = list(csv_reader)

with open("inchesFighters.py", "w") as file:
    headers = ('Name', 'Country', 'Height')
    csv_writer = DictWriter(file, fieldnames=headers)
    csv_writer.writeheader()
    for f in fighters:
        csv_writer.writerow({
            'Name': f['Name'],
            'Country': f['Country'],
            'Height': cm_to_in(f['Height (in cm)'])
        })
Exemplo n.º 56
0
from csv import DictReader
from csv import DictWriter
from os.path import isfile
from os import environ
from os.path import join

inpath = environ['PATH_TO_UNUM']

baseoutpath = environ['UNUM_OUT_PATH']

count = 0

with open(inpath) as f:
    reader = DictReader(f, delimiter="\t")
    for line in reader:
        count += 1
        country_code = line["country_code"].lower() or "unknown"
        outpath = join(baseoutpath, country_code + ".tsv")
        if isfile(outpath):
            f = open(outpath, "a")
            writer = DictWriter(f, fieldnames=reader.fieldnames)
        else:
            f = open(outpath, "w")
            writer = DictWriter(f, fieldnames=reader.fieldnames)
            writer.writeheader()
        writer.writerow(line)
        f.close()

        if count % 100000 == 0:
            print("processed ", float(count) / 38000000)
Exemplo n.º 57
0
from csv import DictWriter
with open('agents.csv', 'w', newline='') as file:
    fieldnames = ['Agent Index']
    writer = DictWriter(file, fieldnames=fieldnames)

    writer.writeheader()
    for i in range(1000):
        writer.writerow({'Agent Index': i})
Exemplo n.º 58
0
    def process_eps(self, episodes):
        """ Export episodes metadata. """

        if self.options.basename:
            basename = self.options.basename
        else:
            # name the file after the client_show of the first episode
            # normaly a file will not span client or show
            ep = episodes[0]
            show = ep.show
            client = show.client
            self.set_dirs(show)
            basename = "%s_%s" % (client.slug, show.slug)

        json_pathname = os.path.join(self.show_dir, "txt", basename + ".json")
        csv_pathname = os.path.join(self.show_dir, "txt", basename + ".csv")
        txt_pathname = os.path.join(self.show_dir, "txt", basename + ".txt")
        wget_pathname = os.path.join(self.show_dir, "txt", basename + ".wget")
        sh_pathname = os.path.join(self.show_dir, "txt", basename + ".sh")
        curl_pathname = os.path.join(self.show_dir, "txt",
                                     basename + "_test.sh")
        html_pathname = os.path.join(self.show_dir, "txt", basename + ".html")
        # blip_pathname = os.path.join( self.show_dir, "txt", basename+"_blip.xml" )

        if self.options.verbose:
            print("filenames:")
            for n in (
                    json_pathname,
                    csv_pathname,
                    txt_pathname,
                    wget_pathname,
                    html_pathname,
            ):
                print(n)


# fields to export:
        fields = "id conf_key conf_url state name slug primary host_url public_url source archive_mp4_url".split(
        )

        # setup csv
        csv = DictWriter(open(csv_pathname, "w"), fields)
        # write out field names
        csv.writerow(dict(list(zip(fields, fields))))

        # setup txt
        txt = open(txt_pathname, "w")
        wget = open(wget_pathname, "w")
        sh = open(sh_pathname, "w")
        curl = open(curl_pathname, "w")
        # xml=open(blip_pathname, "w")

        # setup html (not full html, just some snippits)
        html = open(html_pathname, "w")

        # setup json (list written to file at end.)
        json_data = []

        # file headers
        sh.writelines("#! /bin/bash -ex\n\n")
        curl.writelines("#! /bin/bash -ex\n\n")

        # write out episode data
        for ep in episodes:
            if not ep.rax_mp4_url:
                # skip episodes that have not been uploaded yet.
                continue

            # fields includes output fields that are derived below
            # so fill them with None for now.
            row = dict([(f, getattr(ep, f, None)) for f in fields])
            if self.options.verbose: print(row)

            # blip_cli=blip_uploader.Blip_CLI()
            # blip_cli.debug = self.options.verbose

            # xml_code = blip_cli.Get_VideoMeta(ep.host_url)
            # if self.options.verbose: print xml_code
            # blip_meta = blip_cli.Parse_VideoMeta(xml_code)
            # if self.options.verbose: print blip_meta
            # if self.options.verbose: print pprint.pprint(blip_meta)

            # blip_xml=self.blip_meta(ep.host_url)
            # show_page = self.get_showpage(blip_xml)
            # row['blip'] = "%sfile/%s"%(show_page,ep.host_url)
            # row['blip'] = "http://blip.tv/file/%s"%(ep.host_url)

            # xml.write(blip_xml)
            # if self.options.verbose: print blip_xml

            # row['embed']=self.get_embed(blip_xml)
            # row['source']=self.get_media(blip_xml)

            # row['embed']=blip_meta['embed_code']
            # oggs = [i for i in blip_meta['contents'] if i['type']=='video/ogg']
            # if self.options.verbose: print pprint.pprint(oggs)
            # row['source']=oggs[0]

            row['name'] = row['name'].encode('utf-8')

            if self.options.verbose: print(row)
            json_data.append(row)
            csv.writerow(row)
            # txt.write("%s %s\n" % (row['blip'],row['name']))
            # html.write('<a href="%(blip)s">%(name)s</a>\n%(blip)s\n'%row)
            # wget.writelines(["%s\n" % c['url'] for c in blip_meta['contents']])
            wget.writelines(ep.rax_mp4_url + "\n")

            sh.writelines("wget -N '%s' -O %s.mp4\n" %
                          (ep.rax_mp4_url, ep.slug))

            curl.writelines("echo Checking %s ...\n" % (ep.slug))
            curl.writelines("curl -s --head  '%s' |grep -q '200 OK'\n" %
                            (ep.archive_mp4_url, ))
            curl.writelines("echo Passed.\n")

            if self.options.verbose:
                json.dump(json_data, open(json_pathname, "w"), indent=2)
            else:
                json.dump(json_data, open(json_pathname, "w"))
            pprint.pprint(json_data)
Exemplo n.º 59
0
		#verificando se existe pelo menos um objeto detectado
		if len(results) > 0:
			for i in results.flatten():
				x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
				box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
				colours_box_current = colours[class_numbers[i]].tolist()
				cv2.rectangle(frame, (x_min, y_min), (x_min + box_width, y_min + box_height), colours_box_current, 2)


				#Preparando texto com rótulo e acuracia para o objeto detectado.
				text_box_current = "{}: {:.4f}".format(labels[int(class_numbers[i])], confidences[i])

				# Coloca o texto nos objetos detectados
				cv2.putText(frame, text_box_current, (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colours_box_current, 2)

				escritor_csv.writerow( {"Detectado": text_box_current.split(":")[0], "Acuracia": text_box_current.split(":")[1]})
				print(text_box_current.split(":")[0] +" - "+ text_box_current.split(":")[1])


		cv2.namedWindow('Yolo v3 WebCamera', cv2.WINDOW_NORMAL)
		cv2.imshow("Yolo v3 Cam", frame)

		if cv2.waitKey(1) & 0xFF == ord('q'):
			break


camera.release()
cv2.destroyAllWindows()


Exemplo n.º 60
-1
 def csv_results(self, csv_file, histogram_size=None):
     histogram_size = histogram_size or 10
     bins = range(histogram_size)
     bins.insert(0, "idea")
     bins.extend(["avg", "std_dev"])
     dw = DictWriter(csv_file, bins, dialect='excel', delimiter=';')
     dw.writeheader()
     by_idea = self._gather_results()
     values = {
         votable_id: self.results_for(voting_results, histogram_size)
         for (votable_id, voting_results) in by_idea.iteritems()
     }
     idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter(
         Idea.id.in_(by_idea.keys())))
     idea_names = {
         id: name.encode('utf-8') for (id, name) in idea_names.iteritems()}
     ordered_idea_ids = Idea.visit_idea_ids_depth_first(
         AppendingVisitor(), self.get_discussion_id())
     ordered_idea_ids = [id for id in ordered_idea_ids if id in values]
     for idea_id, base in ordered_idea_ids:
         base = values[idea_id]
         r = dict(enumerate(base['histogram']))
         r['idea'] = idea_names[idea_id]
         r['avg'] = base['avg']
         r['std_dev'] = base['std_dev']
         dw.writerow(r)