def convert(self): document_body = self._prepare_document() with codecs.open(self.target_name, 'w+') as target: self.writer = UnicodeWriter(target, FIELDNAMES) self._write_header_row() for outline in document_body: self.process_element(outline)
def convert_opml_to_csv(args): """Convert OPML file to Todoist CSV.""" tree = ET.parse(args.file) opml = tree.getroot() body = opml.find('body') with codecs.open(target_name(args.file, 'csv'), 'w+') as target: writer = UnicodeWriter(target, FIELDNAMES) writer.writerow(FIELDNAMES) def make_row(type='', content='', indent = ''): return [type, content, '', indent, '', '', '', ''] def process_element(outline, level=1): # content row = make_row(TYPE_TASK, outline.get('text'), str(level)) writer.writerow(row) # note note = outline.get(NOTE_ATTRIB) if note: row = make_row(TYPE_NOTE, note) writer.writerow(row) # separator writer.writerow(make_row()) for subelement in outline.findall('outline'): process_element(subelement, level+1) for outline in body: process_element(outline)
def dataset(query_arguments, type_output='csv', delimiter='|', output=stdout, lang=None): if set(['main', 'union', 'optional', 'filter']) <= set(query_arguments): query_arguments = format_query(query=query_arguments, lang=lang) query = factory(main=query_arguments['main'], union=query_arguments['union'], optional=query_arguments['optional'], filter=query_arguments['filter']) results = retrieve_result(query) properties_set = set() formatted_result = defaultdict(lambda: defaultdict( lambda: defaultdict(lambda: defaultdict(list)))) for result in results: quantity_of_interest = result['quantity_of_interest']['value'] property_uri = result['property']['value'] property_label = 'property_label' in result and result['property_label']['value'] or '' hasValue_uri = result['hasValue']['value'] hasValue_label = 'hasValue_label' in result and result['hasValue_label']['value'] or '' properties_set.add((property_uri, property_label)) formatted_result[quantity_of_interest][property_uri]['hasValue']['hasValue_label'].append(hasValue_label) formatted_result[quantity_of_interest][property_uri]['hasValue']['hasValue_uri'].append(hasValue_uri) keys = list(properties_set) if type_output == 'csv': out = UnicodeWriter(output, ';') out.writerow(filter(None, (chain.from_iterable(keys)))) for qoi in formatted_result: cells = [['%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_uri']), '%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_label'])] if key[1] else ['%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_uri'])] for key in keys] out.writerow(chain.from_iterable(cells)) return formatted_result
def write_json_to_csv(json_list, filename): from unicode_csv import UnicodeWriter translate = { "class_nbr":"course_num", "crn":"course_num", "num":"course_num", "number":"department_num", "dist":"distribution", "div":"division", "lim":"course_cap", "graded_sem":"seminar", "graded_seminar":"seminar", "double_graded_seminar":"seminar", "dept_name":"department", } scrap = {"req", "sign", "note", "prerequisite", "prereq"} # Get all the available headers. header_set = {header for obj in json_list for header in obj.keys()} headers = [h for h in header_set if not (h in translate or h in scrap)] headers.extend(translate.values()) headers = map(unicode, headers) headers = list(set(headers)) headers.sort() with open(filename, "w") as f: # Prepare the csv. writer = UnicodeWriter(f) # Write "cleaned" headers to the CSV cleaned_headers = [unicode(h.replace(" ","_").lower()) for h in headers] writer.writerow(cleaned_headers) for obj in json_list: for key in translate.keys(): if key in obj: new_key = translate[key] obj[new_key] = obj[key] vals = [] for header in headers: val = obj.get(header, "") if type(val) == list: val = map(str, val) vals.append(unicode(val)) writer.writerow(vals)
class SqlExportFileWriter(object): """Writes rows to a CSV file, optionally filtering on a predicate.""" def __init__(self, dest, predicate=None, use_unicode=False): if use_unicode: self._writer = UnicodeWriter(dest, delimiter=DELIMITER) else: self._writer = csv.writer(dest, delimiter=DELIMITER) self._predicate = predicate def write_header(self, keys): self._writer.writerow(keys) def write_rows(self, results): if self._predicate: results = [result for result in results if self._predicate(result)] if results: self._writer.writerows(results)
def write_to_csv(file_name): documents = get_docs() print `documents.count()` serialized_documents = json.loads(dumps(documents)) csv_file = open(file_name,'w') csv_writer = UnicodeWriter(csv_file, dialect='excel') count = 0 for doc in serialized_documents: print `doc` del(doc['_id']) if count == 0: header = doc.keys() #header.sort() csv_writer.writerow(header) count = count+1 csv_writer.writerow(doc.values()) csv_file.close()
def marcanalyse(files, sample_length=5): """ returns a csv of marc keys and analysed values, showing, for example, how many records exist. ================= ============================================================== Column Description ================= ============================================================== ``tag`` The 3-digit MARC tag. ``subfield`` The single-character subfield. ``tag_meaning`` The English meaning of the tag/subfield, if known. ``record_count`` The number of records that have at least one of these tags. ``min_valency`` The minimum number of this tag or subfield that each record has. ``max_valency`` The maximum number of this tag or subfield that each record has. ``samples`` Non-repeating sample values of the values of each tag or subfield. ================= ============================================================== """ analysis = multifile_iter_records(files, sample_length = sample_length) csv_header=("tag", "subfield", "tag_meaning", "record_count", "min_valency", "max_valency","samples") writer = UnicodeWriter(sys.stdout) writer.writerow(csv_header) listanalysis = [x for x in analysis.iteritems()] listanalysis.sort() for key, value in listanalysis: v = [] v.append(u'"%s"' % key) #tag v.append(u"") # subfield v.append(meaning(key)) #tag_meaning v.append(unicode(value['count'])) #record_count v.append(unicode(value['min_valency'])) v.append(unicode(value['max_valency'])) v.append(u"\r\r".join(value['samples'])) writer.writerow(v) listanalysis = [x for x in value['subfields'].iteritems()] listanalysis.sort() for subfield, value in listanalysis: v = [] v.append("") #tag v.append(subfield) # subfield v.append(meaning(key, subfield)) #tag_meaning v.append(unicode(value['count'])) #record_count v.append(unicode(value['min_valency'])) v.append(unicode(value['max_valency'])) v.append(u"\r\r".join(value['samples'])) writer.writerow(v)
def all_csv(): pseudofile = StringIO() spamwriter = UnicodeWriter(pseudofile, encoding='utf-8') #, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) header = ','.join([ 'company.name', 'company.cvr', 'context.year', 'grossprofitloss', ]) for company in all_companies: for context in company.contexts: if 'grossprofitloss' in context.fields: spamwriter.writerow([ company.name, company.cvr, context.year, context.fields['grossprofitloss'], ]) return Response(header + '\n' + pseudofile.getvalue(), mimetype='text/csv')
def main(): writer = UnicodeWriter(sys.stdout) writer.writerow(["gemeente", "stembureau", "postcode", "stemmen"]) for file_path in get_file_paths(): rows = parse_eml_file(file_path) writer.writerows(rows) return 0
def on_data(self, data): raw_data = json.loads(data) user = {'screen_name': raw_data['user']['screen_name']} tweet = {'text': processText(raw_data['text'])} output = open('dados.csv', 'a') UnicodeWriter(output, delimiter='|', lineterminator='\n').writerow( (tweet['text'].replace('\n', ' ').replace('r', ' '), user['screen_name'])) output.close() print('@' + user['screen_name'] + '\n' + tweet['text'] + '\n') return (True)
def generate_csv(params, sort_by): class_dict = get_class_dict() primary_column_name = get_primary_column_name() all_classes = get_all_classes() keypairs = {} for param in params: name = param['name'] class_type = class_dict[name] value = class_type.get_search_value(param) if name == primary_column_name: name = '_id' if value: keypairs.update({name: value}) items = accounts_collection.find(keypairs) if sort_by: items = items.sort(sort_by) with TemporaryFile() as f: # csv_writer = csv.writer(f) csv_writer = UnicodeWriter(f) csv_writer.writerow(all_classes) for item in items: csv_columns =[] primary_key = item['_id'] for name in all_classes: class_type = class_dict[name] if name == primary_column_name: name = '_id' if name in item: csv_string = class_type.get_csv_string(item[name]) else: csv_string = default_csv_string csv_columns.append(csv_string) csv_writer.writerow(csv_columns) f.seek(0) lines = f.read() return lines
def main(): reader = UnicodeReader(sys.stdin) writer = UnicodeWriter(sys.stdout) writer.writerow([ "gemeente", "stembureau", "postcode", "stemmen", "postcode_google", "lat", "lng" ]) for row in reader: result = find_voting_place(row) writer.writerow(result) sleep(1) return 0
class OpmlToCsvConverter(OpmlConverter): """Convert OPML file to Todoist CSV.""" EXT = 'csv' def __init__(self, args): super(OpmlToCsvConverter, self).__init__(args) def convert(self): document_body = self._prepare_document() with codecs.open(self.target_name, 'w+') as target: self.writer = UnicodeWriter(target, FIELDNAMES) self._write_header_row() for outline in document_body: self.process_element(outline) def _prepare_document(self): tree = ET.parse(self.source_name) opml = tree.getroot() return opml.find('body') def _write_header_row(self): self.writer.writerow(FIELDNAMES) def _make_row(self, type='', content='', indent=''): return [type, content, '', indent, '', '', '', '', ''] def process_element(self, outline, level=1): # content row = self._make_row(self.TYPE_TASK, outline.get('text'), str(level)) self.writer.writerow(row) # note note = outline.get(self.NOTE_ATTRIB) if note: row = self._make_row(self.TYPE_NOTE, note) self.writer.writerow(row) # separator self.writer.writerow(self._make_row()) for subelement in outline.findall('outline'): self.process_element(subelement, level + 1)
def main(): shapes = get_shapes(sys.argv[1]) writer = UnicodeWriter(sys.stdout) writer.writerow([ 'buurt_code', 'buurt_naam', 'wijk_code', 'gem_code', 'gem_naam']) for geom, props in shapes: out_row = [] for fld in [ u'BU_CODE', u'BU_NAAM', u'WK_CODE', u'GM_CODE', u'GM_NAAM' ]: out_row.append(props[fld]) writer.writerow(out_row) return 0
def write_group_by_one_filed(todos, output_path, value_name, values_list): csv_file = open(output_path, 'wb') res = group_by_value(todos, value_name, values_list) print('write: %s' % output_path) writer = UnicodeWriter(csv_file) row = ['complete'] row.extend(values_list) writer.writerow(row) for row in res: output_row = [row[0]] for _, v in row[1].items(): output_row.append(str(v)) writer.writerow(output_row)
def main(): if len(sys.argv) < 6: print >> sys.stderr, "Usage: merge.py <shape_file> <lat_field> <lon_field> <lat_fallbck> <lon_fallback>" return 1 reader = UnicodeReader(sys.stdin) writer = UnicodeWriter(sys.stdout) header = reader.next() shapes = get_shapes(sys.argv[1]) out_header = deepcopy(header) out_header += [ 'buurt_code', 'buurt_naam', 'wijk_code', 'wijk_naam', 'gem_code', 'gem_naam' ] writer.writerow(out_header) lat_field = sys.argv[2] lon_field = sys.argv[3] lat_fb_field = sys.argv[4] lon_fb_field = sys.argv[5] for row in reader: out_row = deepcopy(row) data = dict(zip(header, row)) if (data[lon_field] != u'-') and (data[lat_field] != u''): lat = data[lat_field] lon = data[lon_field] else: lat = data[lat_fb_field] lon = data[lon_fb_field] if (lat != u'-') and (lon != u'-'): point = shapely.geometry.Point(float(lat), float(lon)) for shape, props in shapes: if shape.contains(point): for fld in [ u'BU_CODE', u'BU_NAAM', u'BU_CODE', u'BU_NAAM', u'GM_CODE', u'GM_NAAM' ]: out_row.append(props[fld]) break if len(out_row) == len(row): # if we did not find anything out_row += [u'-', u'-', u'-', u'-', u'-', u'-'] writer.writerow(out_row) return 0
def write_timer(todos, output_path): csv_file = open(output_path, 'wb') date_timer = {} print('write: %s' % output_path) writer = UnicodeWriter(csv_file) for todo in todos: if not isinstance(todo.timer, int): continue if todo.completed not in date_timer: date_timer[todo.completed] = 0 date_timer[todo.completed] += todo.timer row = ['date', 'timer'] writer.writerow(row) for date, minutes in date_timer.items(): output_row = [ date.strftime('%Y-%m-%d'), "{0:.2f}".format(minutes / 60.0) ] writer.writerow(output_row)
def main(): if len(sys.argv) < 3: print >> sys.stderr, "Usage: merge.py <file1> <file2>" return 1 places = get_places(sys.argv[2]) election_file = UnicodeReader(open(sys.argv[1])) headers = election_file.next() writer = UnicodeWriter(sys.stdout) writer.writerow([ "gemeente", "stembureau", "postcode", "stemmen", "postcode_google", "lat", "lng", "stembureau2017", "lat2017", "lon2017" ]) for row in election_file: result = dict(zip(headers, row)) place = None if result[u'postcode'] != u'-': place = find_place_by_postcode( places, re.sub(r'\s+', u'', result[u'postcode'])) elif result[u'postcode_google'] != u'': place = find_place_by_postcode( places, re.sub(r'\s+', u'', result[u'postcode'])) if place is None: place = find_place_by_muni_and_name(places, result[u'gemeente'], result[u'stembureau']) result_row = deepcopy(row) if place is not None: result_row.append(place[u'stembureau']) result_row.append(place[u'Longitude']) result_row.append(place[u'Latitude']) else: result_row.append(u'-') result_row.append(u'-') result_row.append(u'-') # if result_row[-1] != u'-': # pprint(result_row) writer.writerow(result_row) return 0
from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.proxy import * file_player_link2 = open("clean_result_player_link2") result_player_link = pickle.load(file_player_link2) links = list() for i in range(len(result_player_link)): links.append(result_player_link[i][1]) result = list() result_file = open("result_file.csv", "a") wr = UnicodeWriter(result_file) for link in links[6234:]: proxy = ["173.9.233.186", "54.227.39.120", "194.141.96.1", "218.108.232.190", "80.193.214.233", "125.39.171.194"] port = [3128, 80, 8080, 843, 3128, 86] while True: chosen = random.randint(0, len(proxy)-1) profile = webdriver.FirefoxProfile() profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.http", proxy[chosen]) profile.set_preference("network.proxy.http_port", port[chosen]) profile.update_preferences() ff = webdriver.Firefox(firefox_profile=profile) ff.set_page_load_timeout(20) try:
viaf_code = line[1] sbn_code = line[2] except: pass if page: wikipedia[page] = {'viaf': viaf_code, 'sbn': sbn_code} if viaf_code: viaf2wiki[viaf_code] = page if sbn_code: sbn2wiki[sbn_code] = page outwikifile = open(OUTWIKIFILE, 'a+') wikiwriter = UnicodeWriter(outwikifile) wikipages_to_get = set(wikipages_with_authority_control) - set(wikipedia.keys()) logger.debug('Wikipages with authority control: {no}'.format( no=len(set(wikipages_with_authority_control)))) logger.debug('no. of keys already collected: {no}'.format( no=len(set(wikipedia.keys())))) logger.debug('no. of pages in it.wiki with authority control, still to get: {no}'.format( no=len(wikipages_to_get))) count = 0 for page in wikipages_to_get: count += 1 logger.debug(count)
from unicode_csv import UTF8Recoder, UnicodeReader, UnicodeWriter rdr = UnicodeReader(open(sys.argv[1])) tag_re = re.compile('\W', re.UNICODE) flickr_tags = set() tag_source = os.path.join(os.path.dirname(__file__), '../data/tags_from_flickr.csv') with open(tag_source) as f: tagreader = UnicodeReader(f) for tagrow in tagreader: text, raw, author = tagrow[:] flickr_tags.add(text) outfile = open('updated_tags.orig.csv', 'w+') wrtr = UnicodeWriter(outfile) head = [ 'Normalized', 'Raw', 'Model', 'field' ] wrtr.writerow(head) for row in rdr: new_row = [] ugly, raw, mitch, laura, model, field, basis = row[:] u = 'MISSING' if ugly: u = ugly if ugly in flickr_tags else ('CURR:NO_MATCH:%s' % ugly) else: ugly = unicodedata.normalize('NFC', raw) ugly = tag_re.sub('', ugly.strip().lower()) u = ugly if ugly in flickr_tags else ('NEW:NO_MATCH:%s' % ugly) new_row = row[:]
import json import os.path import sys import io from unicode_csv import UnicodeWriter def get_value_from_dict_or_return_NA(dict, key): if key in dict and dict[key] != None: return dict[key] return "NA" with io.open('acl_instagram.csv', 'ab') as csv_data: writer = UnicodeWriter(csv_data, delimiter='`') with open('acl_instagram.txt') as instagram_file: for insta_data in instagram_file: post = json.loads(insta_data) writer.writerow([ get_value_from_dict_or_return_NA(post, 'user'), get_value_from_dict_or_return_NA(post, 'post_time'), get_value_from_dict_or_return_NA(post, 'post_location'), get_value_from_dict_or_return_NA(post, 'likes_count'), get_value_from_dict_or_return_NA(post, 'views_count'), json.dumps(get_value_from_dict_or_return_NA(post, 'comments')) ])
def __init__(self, dest, predicate=None, use_unicode=False): if use_unicode: self._writer = UnicodeWriter(dest, delimiter=DELIMITER) else: self._writer = csv.writer(dest, delimiter=DELIMITER) self._predicate = predicate