def test_quote_none_quotechar_undefined(self): """A QUOTE_NONE dialect should not error if quotechar is undefined.""" class CustomDialect(csv.Dialect): delimiter = '\t' skipinitialspace = False lineterminator = '\n' quoting = csv.QUOTE_NONE csv.writer(io.StringIO(), CustomDialect)
def fetch(self): fd, tmp_file = tempfile.mkstemp() pip = PointInPolygon(self.polygon_id, 60) traffic_signs = [] reader = json.loads(open(self.mapping, 'r').read()) try: for row in reader: traffic_signs += row['object'] except: self.logger.err(row) raise with open(tmp_file, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['accuracy', 'direction', 'image_key', 'first_seen_at', 'last_seen_at', 'value', 'X', 'Y']) slice = lambda A, n: [A[i:i+n] for i in range(0, len(A), n)] bboxes = pip.bboxes() start_time = (datetime.today() - timedelta(days=365*2)).isoformat()[0:10] b = 0 for traffic_signs_ in slice(traffic_signs, 10): b = b + 1 self.logger.log('Batch {0}/{1}: {2}'.format(b, round(len(traffic_signs) / 10 + 0.5), ','.join(traffic_signs_))) for bbox in bboxes: url = 'https://a.mapillary.com/v3/map_features?bbox={bbox}&client_id={client_id}&layers={layer}&per_page=1000&start_time={start_time}&values={values}'.format(bbox=','.join(map(str, bbox)), layer=self.layer, client_id='MEpmMTFQclBTUWlacjV6RTUxWWMtZzo5OTc2NjY2MmRiMDUwYmMw', start_time=start_time, values=','.join(traffic_signs_)) print(url) with open(tmp_file, 'a') as csvfile: writer = csv.writer(csvfile) r = None page = 0 while(url): page = page + 1 self.logger.log("Page {0}".format(page)) r = downloader.get(url) url = r.links['next']['url'] if 'next' in r.links else None features = r.json()['features'] filtered = 0 self.logger.log('{0} features fetched'.format(len(features))) for j in features: p = j['properties'] image_key = p['detections'][0]['image_key'] gc = j['geometry']['coordinates'] row = [p['accuracy'], p['direction'] if 'direction' in p else None, image_key, p['first_seen_at'], p['last_seen_at'], p['value']] + gc if row[0] > 0.01 and pip.point_inside_polygon(gc[0], gc[1]): writer.writerow(row) filtered = filtered + 1 self.logger.log('{0} keeped'.format(filtered)) return tmp_file
def fetch(self, url, tmp_file, date_string=None): pip = PointInPolygon(self.polygon_id, 60) traffic_signs = [] reader = json.loads(open(self.mapping, 'r').read()) try: for row in reader: traffic_signs += row['object'] except: self.logger.err(row) raise with open(tmp_file, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['accuracy', 'direction', 'image_key', 'first_seen_at', 'last_seen_at', 'value', 'X', 'Y']) slice = lambda A, n: [A[i:i+n] for i in range(0, len(A), n)] bboxes = pip.bboxes() start_time = (datetime.today() - timedelta(days=365*2)).isoformat()[0:10] b = 0 for traffic_signs_ in slice(traffic_signs, 10): b = b + 1 self.logger.log('Batch {0}/{1}: {2}'.format(b, ceil(len(traffic_signs) / 10.0), ','.join(traffic_signs_))) for bbox in bboxes: url = 'https://a.mapillary.com/v3/map_features?bbox={bbox}&client_id={client_id}&layers={layer}&per_page=1000&start_time={start_time}&values={values}'.format(bbox=','.join(map(str, bbox)), layer=self.layer, client_id='MEpmMTFQclBTUWlacjV6RTUxWWMtZzo5OTc2NjY2MmRiMDUwYmMw', start_time=start_time, values=','.join(traffic_signs_)) self.logger.log(url) with open(tmp_file, 'a') as csvfile: writer = csv.writer(csvfile) r = None page = 0 while(url): page = page + 1 self.logger.log("Page {0}".format(page)) r = downloader.get(url) url = r.links['next']['url'] if 'next' in r.links else None features = r.json()['features'] filtered = 0 self.logger.log('{0} features fetched'.format(len(features))) for j in features: p = j['properties'] image_key = p['detections'][0]['image_key'] gc = j['geometry']['coordinates'] row = [p['accuracy'], p['direction'] if 'direction' in p else None, image_key, p['first_seen_at'], p['last_seen_at'], p['value']] + gc if row[0] > 0.01 and pip.point_inside_polygon(gc[0], gc[1]): writer.writerow(row) filtered = filtered + 1 self.logger.log('{0} keeped'.format(filtered)) return True
def test_quote_all_quotechar_unset(self): """A QUOTE_ALL dialect should error if quotechar is unset.""" class CustomDialect(csv.Dialect): delimiter = '\t' skipinitialspace = False lineterminator = '\n' quoting = csv.QUOTE_ALL with self.assertRaises(TypeError) as cx: csv.writer(io.StringIO(), CustomDialect) assert cx.exception.args[ 0] == 'quotechar must be set if quoting enabled'
def test_writerows(self): class BrokenFile: def write(self, buf): raise OSError writer = csv.writer(BrokenFile()) self.assertRaises(OSError, writer.writerows, [['a']]) with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj) self.assertRaises(TypeError, writer.writerows, None) writer.writerows([['a','b'],['c','d']]) fileobj.seek(0) self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
def test_writerows(self): class BrokenFile: def write(self, buf): raise OSError writer = csv.writer(BrokenFile()) self.assertRaises(OSError, writer.writerows, [['a']]) with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj) self.assertRaises(TypeError, writer.writerows, None) writer.writerows([['a', 'b'], ['c', 'd']]) fileobj.seek(0) self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
def get(self, request, *args, **kwargs): object_list = self.get_queryset()[:2000] # Do reasonable ACL check for global acl_obj = self.translation or self.component or self.project if not acl_obj: for change in object_list: if change.component: acl_obj = change.component break if not request.user.has_perm('change.download', acl_obj): raise PermissionDenied() # Always output in english activate('en') response = HttpResponse(content_type='text/csv; charset=utf-8') response['Content-Disposition'] = 'attachment; filename=changes.csv' writer = csv.writer(response) # Add header writer.writerow(('timestamp', 'action', 'user', 'url', 'target')) for change in object_list: writer.writerow(( change.timestamp.isoformat(), change.get_action_display(), change.user.username if change.user else '', get_site_url(change.get_absolute_url()), change.target, )) return response
def dumpCSV(self, sql, ext, head, callback): self.giscurs.execute(sql) row = [] column = {} while True: many = self.giscurs.fetchmany(1000) if not many: break for res in many: row.append(res) for k in res['tags'].keys(): if k not in column: column[k] = 1 else: column[k] += 1 column = sorted(column, key=column.get, reverse=True) column = list(filter(lambda a: a!=self.mapping.osmRef and not a in self.mapping.select.tags[0], column)) column = [self.mapping.osmRef] + list(self.mapping.select.tags[0].keys()) + column buffer = io.StringIO() writer = csv.writer(buffer, lineterminator=u'\n') writer.writerow(head + column) for r in row: cc = [] for c in column: tags = r['tags'] if c in tags: cc.append(tags[c]) else: cc.append(None) writer.writerow(callback(r, cc)) with bz2.BZ2File(u"%s/%s-%s%s.csv.bz2" % (self.config.dst_dir, self.name, self.__class__.__name__, ext), mode='w') as csv_bz2_file: csv_bz2_file.write(buffer.getvalue().encode('utf-8'))
def _write_test(self, fields, expect, **kwargs): with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, **kwargs) writer.writerow(fields) fileobj.seek(0) self.assertEqual(fileobj.read(), expect + writer.dialect.lineterminator)
def store_to_csv(store_infos): f = open('./store.csv', 'a', encoding='euc-kr') csvWriter = csv.writer(f) for e in store_infos: temp_name = None temp_branch = None temp_address = None temp_phone_num = None if type(e.getName()) is not str: temp_name = e.getName().decode('euc-kr') else: temp_name = e.getName() if type(e.getName()) is not str: temp_branch = e.getBranch().decode('euc-kr') else: temp_branch = e.getBranch if type(e.getAddress()) is not str: temp_address = e.getAddress().decode('euc-kr') else: temp_address = e.getAddress() if type(e.getName()) is not str: temp_phone_num = e.getPhoneNum().decode('euc-kr') else: temp_phone_num = e.getPhoneNum() csvWriter.writerow( [temp_name, temp_branch, temp_address, temp_phone_num]) f.close()
def save_file_set(output_filename, delimiter, header_row, set_data_rows): mime_type = 'text/tsv' if delimiter == '\t' else 'text/csv' with open_file(output_filename, 'w', mime_type=mime_type) as f: writer = csv.writer(f, delimiter=text_type(delimiter)) if header_row: write_csv_rows(writer, [header_row]) write_csv_rows(writer, set_data_rows)
def process_item(self, item, spider): isin = spider.isin reader = csv.reader(io.StringIO(item['csv']), delimiter=';') result = [] next(reader) # Skip header for line in reader: if not line: continue day = line[0] value = line[4] result.append((day, value)) min_time = result[0][0] max_time = result[-1][0] output_filename = self.save_file.format(isin, min_time, max_time) with io.open(output_filename, 'w', newline='', encoding='UTF-8') as output: writer = csv.writer(output, delimiter=';') writer.writerow(['Datum', 'Kurs']) for t in result: writer.writerow([t[0], t[1]])
def run(self): with io.open(self.output_file, 'w', newline='', encoding='utf8') as fh: w = csv.writer(fh) self._get_column_names() w.writerow(self.columns) with click.progressbar(self.data['days'], label='Exporting to CSV file...', fill_char=click.style('#', fg='blue')) as days: for day in days: data = [ day['date'], day['grand_total']['total_seconds'], ] self._add_data_for_columns(data, self.projects, day['projects']) self._add_data_for_columns(data, self.entities, day['entities']) self._add_data_for_columns(data, self.languages, day['languages']) self._add_data_for_columns(data, self.editors, day['editors']) self._add_data_for_columns(data, self.operating_systems, day['operating_systems']) w.writerow(data)
def write_csv(self, f): headings = [ 'comment', 'is_helpful', 'page', 'referrer', 'submitted_on', 'language' ] writer = csv.writer(f, quoting=csv.QUOTE_ALL) writer.writerow([field for field in headings]) for feedback in self: # For legacy compatibility purposes, generated CSVs should contain # only the date feedback was submitted, and not the complete # timestamp. Timestamps are stored in the database as UTC, but # we want them to be exported in the Django default timezone # specified in settings.TIME_ZONE, which is America/New_York. feedback.submitted_on = \ localdate(feedback.submitted_on).strftime('%Y-%m-%d') writer.writerow([ force_str(getattr(feedback, heading), strings_only=True) for heading in headings ])
def dontruntest_big_csvs(): cols = [ Column(name='name', columnType='STRING', maximumSize=1000), Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']), Column(name='x', columnType='DOUBLE'), Column(name='n', columnType='INTEGER'), Column(name='is_bogus', columnType='BOOLEAN') ] schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project)) # write rows to CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: schedule_for_cleanup(temp.name) filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) writer.writerow([col.name for col in cols]) for i in range(10): for j in range(100): foo = cols[1].enumValues[random.randint(0, 2)] writer.writerow( ('Robot ' + str(i * 100 + j), foo, random.random() * 200.0, random.randint(0, 100), random.random() >= 0.5)) # upload CSV syn._uploadCsv(filepath=temp.name, schema=schema1) from synapseclient.table import CsvFileTable CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
def _write_error_test(self, exc, fields, **kwargs): with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, **kwargs) with self.assertRaises(exc): writer.writerow(fields) fileobj.seek(0) self.assertEqual(fileobj.read(), '')
def writeUniqueResults(clustered_dupes, input_file, output_file): # Write our original data back out to a CSV with a new column called # 'Cluster ID' which indicates which records refer to each other. logging.info('saving unique results to: %s' % output_file) cluster_membership = {} for cluster_id, (cluster, score) in enumerate(clustered_dupes): for record_id in cluster: cluster_membership[record_id] = cluster_id unique_record_id = cluster_id + 1 writer = csv.writer(output_file) reader = csv.reader(StringIO(input_file)) heading_row = next(reader) heading_row.insert(0, u'Cluster ID') writer.writerow(heading_row) seen_clusters = set() for row_id, row in enumerate(reader): if row_id in cluster_membership: cluster_id = cluster_membership[row_id] if cluster_id not in seen_clusters: row.insert(0, cluster_id) writer.writerow(row) seen_clusters.add(cluster_id) else: cluster_id = unique_record_id unique_record_id += 1 row.insert(0, cluster_id) writer.writerow(row)
def writeResults(clustered_dupes, input_file, output_file): # Write our original data back out to a CSV with a new column called # 'Cluster ID' which indicates which records refer to each other. logging.info('saving results to: %s' % output_file) cluster_membership = {} for cluster_id, (cluster, score) in enumerate(clustered_dupes): for record_id in cluster: cluster_membership[record_id] = cluster_id unique_record_id = cluster_id + 1 writer = csv.writer(output_file) reader = csv.reader(StringIO(input_file)) heading_row = next(reader) heading_row.insert(0, u'Cluster ID') writer.writerow(heading_row) for row_id, row in enumerate(reader): if row_id in cluster_membership: cluster_id = cluster_membership[row_id] else: cluster_id = unique_record_id unique_record_id += 1 row.insert(0, cluster_id) writer.writerow(row)
def open(self): # Cheat the parent open self.encoding = 'UTF-8' f = Source.open(self) csvreader = csv.reader(f, delimiter=u';') structureet = [ u'nofinesset,nofinessej,rs,rslongue,complrs,compldistrib,numvoie,typvoie,voie,compvoie,lieuditbp,commune,departement,libdepartement,ligneacheminement,telephone,telecopie,categetab,libcategetab,categagretab,libcategagretab,siret,codeape,codemft,libmft,codesph,libsph,dateouv,dateautor,datemaj,numuai,coordxet,coordyet,sourcecoordet,datemajcoord' .split(',') ] geolocalisation = {} for row in csvreader: if row[0] == 'structureet': structureet.append(row[1:]) elif row[0] == 'geolocalisation': geolocalisation[row[1]] = row[2:] for row in structureet: row += geolocalisation.get(row[0], []) csvfile = io.StringIO() writer = csv.writer(csvfile) for row in structureet: writer.writerow(row) csvfile.seek(0) return csvfile
def export_to_csv(self, result_list, export_filename="ACRCloud_ScanFile_Results.csv", export_dir="./"): try: results = [] for item in result_list: filename = item["file"] timestamp = item["timestamp"] jsoninfo = item["result"] if "status" in jsoninfo and jsoninfo["status"]["code"] == 0: row = self.parse_data(jsoninfo) row = [filename, timestamp] + list(row) results.append(row) results = sorted(results, key=lambda x:x[1]) export_filepath = os.path.join(export_dir, export_filename) with codecs.open(export_filepath, 'w', 'utf-8-sig') as f: head_row = ['filename', 'timestamp', 'custom_files_title', 'custom_acrid', 'title', 'artists', 'album', 'acrid', 'played_duration', 'label', 'isrc', 'upc', 'dezzer', 'spotify', 'itunes', 'youtube'] dw = csv.writer(f) dw.writerow(head_row) dw.writerows(results) if self.debug: self.log.info("export_to_csv.Save Data to csv: {0}".format(export_filename)) except Exception as e: self.log.error("Error@export_to_csv", exc_info=True)
def write_csv(filename, rows): with io.open(filename, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(["latitude", "longitude", "datetime"]) for row in rows: writer.writerow([row["latitude"], row["longitude"], row["datetime"]]) f.close()
def test_unicode_write(self): import io with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj: writer = csv.writer(fileobj) writer.writerow(self.names) expected = ",".join(self.names) + "\r\n" fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def test_unicode_write(self): import io with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj: writer = csv.writer(fileobj) writer.writerow(self.names) expected = ",".join(self.names)+"\r\n" fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def to_csv(table): with self.lock, self.conn.begin(): result = self.conn.execute(select([table])) filepath = os.path.join(dirpath, table.name + ".csv") with io.open(filepath, mode="w", encoding="utf-8") as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(table.columns.keys()) csv_writer.writerows(result)
def test_roundtrip_escaped_unquoted_newlines(self): with TemporaryFile("w+", newline="") as fileobj: writer = csv.writer(fileobj, quoting=csv.QUOTE_NONE, escapechar="\\") rows = [["a\nb", "b"], ["c", "x\r\nd"]] writer.writerows(rows) fileobj.seek(0) for i, row in enumerate(csv.reader(fileobj, quoting=csv.QUOTE_NONE, escapechar="\\")): self.assertEqual(row, rows[i])
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): with TemporaryFile("w+", newline="", encoding="utf-8") as fileobj: writer = csv.writer(fileobj, *writeargs, **kwwriteargs) writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj: writer = csv.writer(fileobj, *writeargs, **kwwriteargs) writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def write_data(self, buf, rows=None, **kwargs): if rows is None: rows = self.get_rows() csv_writer = csv.writer(codecs.getwriter(self.get_encoding())( buf, errors='replace'), delimiter=self.get_delimiter()) for row in rows: csv_writer.writerow(row)
def format_csv_rows(rows, delimiter=','): get_logger().debug('format_csv_rows, rows: %s', rows) out = StringIO() writer = csv.writer(out, delimiter=text_type(delimiter)) writer.writerows([[_to_text(x) for x in row] for row in rows]) result = out.getvalue().rstrip('\r\n') get_logger().debug('format_csv_rows, result: %s', result) return result
def test_roundtrip_quoteed_newlines(self): with TemporaryFile("w+", newline="") as fileobj: writer = csv.writer(fileobj) self.assertRaises(TypeError, writer.writerows, None) rows = [["a\nb", "b"], ["c", "x\r\nd"]] writer.writerows(rows) fileobj.seek(0) for i, row in enumerate(csv.reader(fileobj)): self.assertEqual(row, rows[i])
def _write_csv(file_path, data, delimiter=DEFAULT_DELIMITER, lineterminator=DEFAULT_LINETERMINATOR): with io.open(file_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=delimiter, lineterminator=lineterminator) writer.writerows(data)
def save_file_pairs_to_csv(output_path, source_xml_pairs): mkdirs_if_not_exists(dirname(output_path)) delimiter = csv_delimiter_by_filename(output_path) mime_type = 'text/tsv' if delimiter == '\t' else 'text/csv' with open_file(output_path, 'w', mime_type=mime_type) as f: writer = csv.writer(f, delimiter=text_type(delimiter)) write_csv_rows(writer, [['source_url', 'xml_url']]) write_csv_rows(writer, source_xml_pairs) LOGGER.info('written results to %s', output_path)
def test_roundtrip_quoteed_newlines(self): with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj) self.assertRaises(TypeError, writer.writerows, None) rows = [['a\nb', 'b'], ['c', 'x\r\nd']] writer.writerows(rows) fileobj.seek(0) for i, row in enumerate(csv.reader(fileobj)): self.assertEqual(row, rows[i])
def test_quote_nonnumeric_decimal(self): """Decimals should not be quoted with non-numeric quoting.""" import decimal with TemporaryFile('w+', newline='', encoding='utf-8') as fileobj: writer = csv.writer(fileobj, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([10, 10.0, decimal.Decimal('10.0'), '10.0']) expected = '10,10.0,10.0,"10.0"\r\n' fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def test_float_write(self): import array contents = [(20-i)*0.1 for i in range(20)] a = array.array(str('f'), contents) with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join([str(i) for i in a])+"\r\n" fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def test_char_write(self): import array, string a = array.array(str('u'), text_type(string.ascii_letters)) with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join(a)+"\r\n" fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def test_char_write(self): import array, string a = array.array(str('u'), text_type(string.ascii_letters)) with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join(a) + "\r\n" fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def test_quote_nonnumeric_decimal(self): """Decimals should not be quoted with non-numeric quoting.""" import decimal with TemporaryFile("w+", newline="", encoding="utf-8") as fileobj: writer = csv.writer(fileobj, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([10, 10.0, decimal.Decimal("10.0"), "10.0"]) expected = '10,10.0,10.0,"10.0"\r\n' fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def test_float_write(self): import array contents = [(20 - i) * 0.1 for i in range(20)] a = array.array(str('f'), contents) with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join([str(i) for i in a]) + "\r\n" fileobj.seek(0) self.assertEqual(fileobj.read(), expected)
def get(self): f = io.StringIO() writer = csv.writer(f) headers = [ 'User ID', 'Username', 'First Name', 'Last Name', 'Email', 'Telephone', 'Enabled', 'Admin', 'Last Login', 'Last Active', 'Cohorts', 'Hospitals', 'Roles', ] writer.writerow(headers) def get_groups(user, group_type): """Comma-separated list of groups.""" groups = [x.name for x in user.groups if x.type == group_type] groups = sorted(groups) groups = uniq(groups) return ', '.join(groups) def get_roles(user): """Comma-separated list of roles.""" roles = [gu.role.name for gu in user.group_users] return ', '.join(sorted(set(roles))) users = list_users() for user in users: output = [] output.append(user.id) output.append(user.username) output.append(user.first_name) output.append(user.last_name) output.append(user.email) output.append(user.telephone_number) output.append(user.is_enabled) output.append(user.is_admin) output.append(user.last_login_date) output.append(user.last_active_date) output.append(get_groups(user, GROUP_TYPE.COHORT)) output.append(get_groups(user, GROUP_TYPE.HOSPITAL)) output.append(get_roles(user)) writer.writerow(output) return Response(f.getvalue(), content_type='text/csv')
def _write_csv( file_path, data, delimiter=DEFAULT_DELIMITER, lineterminator=DEFAULT_LINETERMINATOR): with io.open(file_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer( f, delimiter=delimiter, lineterminator=lineterminator ) writer.writerows(data)
def read(self, read_len): if not hasattr(self, 'buffer'): with io.StringIO() as out: csv.writer(out).writerow(self.headers) self.buffer = out.getvalue() while self.reader is not None and len(self.buffer) < read_len: with io.StringIO() as out: writer = csv.writer(out) try: for i in range(1000): row = next(self.reader) writer.writerow(row) self.count += 1 if self.count % 1000 == 0: print 'Loaded %s...' % self.count except StopIteration: self.reader = None self.buffer += out.getvalue() chunk, self.buffer = self.buffer[:read_len], self.buffer[read_len:] return chunk
def test_roundtrip_escaped_unquoted_newlines(self): with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj, quoting=csv.QUOTE_NONE, escapechar="\\") rows = [['a\nb', 'b'], ['c', 'x\r\nd']] writer.writerows(rows) fileobj.seek(0) for i, row in enumerate( csv.reader(fileobj, quoting=csv.QUOTE_NONE, escapechar="\\")): self.assertEqual(row, rows[i])
def from_list_of_rows(cls, schema, values, filepath=None, etag=None, quoteCharacter='"', escapeCharacter="\\", lineEnd=str(os.linesep), separator=",", linesToSkip=0, includeRowIdAndRowVersion=None, headers=None): ## create CSV file f = None try: if not filepath: temp_dir = tempfile.mkdtemp() filepath = os.path.join(temp_dir, 'table.csv') f = io.open(filepath, 'w', encoding='utf-8', newline='') writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC, delimiter=separator, escapechar=escapeCharacter, lineterminator=lineEnd, quotechar=quoteCharacter, skipinitialspace=linesToSkip) ## if we haven't explicitly set columns, try to grab them from ## the schema object if not headers and "columns_to_store" in schema and schema.columns_to_store is not None: headers = [SelectColumn.from_column(col) for col in schema.columns_to_store] ## write headers? if headers: writer.writerow([header.name for header in headers]) header = True else: header = False ## write row data for row in values: writer.writerow(row) finally: if f: f.close() return cls( schema=schema, filepath=filepath, etag=etag, quoteCharacter=quoteCharacter, escapeCharacter=escapeCharacter, lineEnd=lineEnd, separator=separator, header=header, headers=headers, includeRowIdAndRowVersion=includeRowIdAndRowVersion)
def __init__( self, fileobj, header=False, dialect=CSV_DIALECT, encoding='utf-8', **kwargs): self.fileobj = fileobj self.header = header self.dialect = dialect self.encoding = encoding self.keywords = kwargs self.count = 0 self._first_row = None # The csv writer outputs strings so we stick a transcoding shim between # the writer and the output object self._writer = csv_.writer( codecs.getwriter(self.encoding)(self.fileobj), dialect=self.dialect, **self.keywords)
def _get_project_strings_csv(project, entities, output): """Return a CSV content of all strings and translations for a project and locale. The file format looks as follow: source, locale_code_1, locale_code_2 "string A", "tranlation A1", "tranlation A2" "string B", "tranlation B1", "tranlation B2" The first column has all source strings. Then there is one column per enabled locale, each containing available translations for each source string (or an empty cell). The first line contains the code of each locale, expect for the first cell which is always "source". :arg Project project: the project from which to take strings :arg list entities: the list of all entities of the project :arg buffer output: a buffer to which the CSV writed will send its data :returns: the same output object with the CSV data """ locales = Locale.objects.filter(project_locale__project=project) translations = ( Translation.objects .filter( entity__resource__project=project, approved=True, ) .prefetch_related('locale') .prefetch_related('entity') ) all_data = dict((x.id, {'source': x.string}) for x in entities) for translation in translations: all_data[translation.entity.id][translation.locale.code] = translation.string writer = csv.writer(output) headers = ['source'] + [x.code for x in locales] writer.writerow(headers) for string in all_data.values(): row = [string.get(key, '') for key in headers] writer.writerow(row) return output
def writeLinkedResults(clustered_pairs, input_1, input_2, output_file, inner_join=False): logging.info('saving unique results to: %s' % output_file) matched_records = [] seen_1 = set() seen_2 = set() input_1 = [row for row in csv.reader(StringIO(input_1))] row_header = input_1.pop(0) length_1 = len(row_header) input_2 = [row for row in csv.reader(StringIO(input_2))] row_header_2 = input_2.pop(0) length_2 = len(row_header_2) row_header += row_header_2 for pair in clustered_pairs: index_1, index_2 = [int(index.split('|', 1)[1]) for index in pair[0]] matched_records.append(input_1[index_1] + input_2[index_2]) seen_1.add(index_1) seen_2.add(index_2) writer = csv.writer(output_file) writer.writerow(row_header) for matches in matched_records: writer.writerow(matches) if not inner_join: for i, row in enumerate(input_1): if i not in seen_1: writer.writerow(row + [None] * length_2) for i, row in enumerate(input_2): if i not in seen_2: writer.writerow([None] * length_1 + row)
def dontruntest_big_csvs(): cols = [] cols.append(Column(name='name', columnType='STRING', maximumSize=1000)) cols.append(Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat'])) cols.append(Column(name='x', columnType='DOUBLE')) cols.append(Column(name='n', columnType='INTEGER')) cols.append(Column(name='is_bogus', columnType='BOOLEAN')) schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project)) print("Created table:", schema1.id) print("with columns:", schema1.columnIds) ## write rows to CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: schedule_for_cleanup(temp.name) filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) writer.writerow([col.name for col in cols]) for i in range(10): for j in range(100): foo = cols[1].enumValues[random.randint(0,2)] writer.writerow(('Robot ' + str(i*100 + j), foo, random.random()*200.0, random.randint(0,100), random.random()>=0.5)) print("wrote 100 rows to disk") ## upload CSV UploadToTableResult = syn._uploadCsv(filepath=temp.name, schema=schema1) from synapseclient.table import CsvFileTable results = CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id) print("etag:", results.etag) print("tableId:", results.tableId) for row in results: print(row)
def open(self): # Cheat the parent open encoding, self.encoding = self.encoding, 'UTF-8' f = Source.open(self) csvreader = csv.reader(f, delimiter=u';') structureet = [u'nofinesset,nofinessej,rs,rslongue,complrs,compldistrib,numvoie,typvoie,voie,compvoie,lieuditbp,commune,departement,libdepartement,ligneacheminement,telephone,telecopie,categetab,libcategetab,categagretab,libcategagretab,siret,codeape,codemft,libmft,codesph,libsph,dateouv,dateautor,datemaj,numuai,coordxet,coordyet,sourcecoordet,datemajcoord'.split(',')] geolocalisation = {} for row in csvreader: if row[0] == 'structureet': structureet.append(row[1:]) elif row[0] == 'geolocalisation': geolocalisation[row[1]] = row[2:] for row in structureet: row += geolocalisation.get(row[0], []) csvfile = io.StringIO() writer = csv.writer(csvfile) for row in structureet: writer.writerow(row) csvfile.seek(0) return csvfile
def export_domains(request): ctx = { "title": _("Export domains"), "action_label": _("Export"), "action_classes": "submit", "formid": "exportform", "action": reverse("admin:domain_export"), } if request.method == "POST": form = ExportDomainsForm(request.POST) form.is_valid() fp = six.StringIO() csvwriter = csv.writer(fp, delimiter=form.cleaned_data["sepchar"]) for dom in get_domains(request.user, **request.session["domains_filters"]): dom.to_csv(csvwriter) content = fp.getvalue() fp.close() return _export(content, form.cleaned_data["filename"]) ctx["form"] = ExportDomainsForm() return render(request, "common/generic_modal_form.html", ctx)
import io import sys import backports.csv as csv import html2text from markdown import markdown CSV_FILE_NAME = 'forums.csv' # the file to import NEW_CSV_FILE_NAME = 'forums_markdown.csv' # the file to create # some content fields are bigger than csv.field_size_limit csv.field_size_limit(sys.maxsize) with io.open(CSV_FILE_NAME, 'r') as csvfile, io.open(NEW_CSV_FILE_NAME, 'w') as writecsvfile: reader = csv.reader(csvfile, delimiter=u',', quotechar=u'"') writer = csv.writer(writecsvfile) counter = 0 for row in reader: col_number = 0 my_row = [] for col in row: if col_number == 3: # use the permalink as the file name title = col col_number = col_number + 1 if col_number == 6:# & counter != 0: # aha, a content field! h = html2text.HTML2Text() markdown_col = h.handle(col) my_row.append(markdown_col)
def get(self): f = io.StringIO() writer = csv.writer(f) args = parse_args(PatientListRequestSerializer) cohorts = [i for i in args['group'] if i.type == GROUP_TYPE.COHORT] headers = [ 'Patient ID', 'First Name', 'Last Name', 'Date of Birth', 'Year of Birth', 'Date of Death', 'Year of Death', 'Gender', 'Gender Label', 'Ethnicity', 'Ethnicity Label', 'Patient Number', 'PV', 'Recruited On', 'Recruited Group Name', 'Recruited Group Code', 'Cohorts', 'Hospitals', ] for cohort in cohorts: headers.append(cohort.short_name) writer.writerow(headers) def get_groups(patient, group_type): """Comma-separated list of groups.""" groups = [x.name for x in patient.current_groups if x.type == group_type] groups = sorted(groups) groups = uniq(groups) return ', '.join(groups) patients = list_patients() for patient in patients: # Wrap the patient so demographics aren't exposed to unprivileged users patient = SkipProxy(PatientProxy(patient, current_user)) output = [] output.append(patient.id) output.append(patient.first_name) output.append(patient.last_name) output.append(patient.date_of_birth) output.append(patient.year_of_birth) output.append(patient.date_of_death) output.append(patient.year_of_death) output.append(patient.gender) output.append(patient.gender_label) output.append(patient.available_ethnicity) output.append(patient.ethnicity_label) output.append(get_attrs(patient, 'primary_patient_number', 'number')) output.append('Y' if patient.ukrdc else 'N') output.append(patient.recruited_date()) output.append(get_attrs(patient.recruited_group(), 'name')) output.append(get_attrs(patient.recruited_group(), 'code')) output.append(get_groups(patient, GROUP_TYPE.COHORT)) output.append(get_groups(patient, GROUP_TYPE.HOSPITAL)) for cohort in cohorts: output.append(patient.recruited_date(cohort)) writer.writerow(output) return Response(f.getvalue(), content_type='text/csv')
def list2file(string_list, filepath): with io.open(filepath, 'w') as csvfile: writer = csv.writer(csvfile, doublequote=True, quoting=csv.QUOTE_MINIMAL) for string in string_list: writer.writerow([string])
def print_csv(field_items, fileobj=sys.stdout, fields=None): writer = csv.writer(codecs.getwriter('utf8')(fileobj)) fields = field_items['fields'] if fields is None else fields writer.writerow(fields) for row in field_items['items']: writer.writerow([row[field] for field in fields])
def writerAssertEqual(self, input, expected_result): with TemporaryFile("w+", newline="") as fileobj: writer = csv.writer(fileobj, dialect=self.dialect) writer.writerows(input) fileobj.seek(0) self.assertEqual(fileobj.read(), expected_result)
def test_csv_table(): # Maybe not truly a unit test, but here because it doesn't do # network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") # TODO: use StringIO.StringIO(data) rather than writing files try: # create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert_is_instance(table, CsvFileTable) # need to set column headers to read a CSV file table.setColumnHeaders( [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + [SelectColumn.from_column(col) for col in cols]) # test iterator for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) # test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row[2:]) assert_equals(rowset_row['rowId'], expected_row[0]) assert_equals(rowset_row['versionNumber'], expected_row[1]) df = table.asDataFrame() assert_equals(list(df['Name']), [row[2] for row in data]) assert_equals(list(df['Born']), [row[3] for row in data]) assert_equals(list(df['Living']), [row[5] for row in data]) assert_equals(list(df.index), ['%s_%s' % tuple(row[0:2]) for row in data]) assert_equals(df.shape, (8, 4)) except Exception: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def _write_error_test(self, exc, fields, **kwargs): with TemporaryFile("w+", newline="") as fileobj: writer = csv.writer(fileobj, **kwargs) self.assertRaises(exc, writer.writerow, fields) fileobj.seek(0) self.assertEqual(fileobj.read(), "")