def test_write_csv_fp(self): data = read_csv("data/buildings.txt", delimiter='\t') fp = open('data/buildings_out.txt', 'w') write_csv_fp(fp, data, delimiter='\t') fp.close() data2 = read_csv("data/buildings.txt", delimiter='\t') self.assertTrue(data == data2)
from vivopump import read_csv_fp, write_csv_fp, improve_jobcode_description import sys data_in = read_csv_fp(sys.stdin) var_names = data_in[ data_in.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns in", var_names data_out = {} for row, data in data_in.items(): new_data = dict(data) # Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['title'] = improve_jobcode_description( new_data['JOBCODE_DESCRIPTION']) new_data['hr_title'] = new_data['JOBCODE_DESCRIPTION'] # Delete these columns del new_data['JOBCODE'] del new_data['HR_POSITION'] del new_data['JOBCODE_DESCRIPTION'] data_out[row] = new_data var_names = data_out[ data_out.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
""" null_value_filter.py -- Replace "NULL" with empty string """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2015 (c), Michael Conlon" __license__ = "New BSD License" __version__ = "0.01" from vivopump import read_csv_fp, write_csv_fp import shelve import sys data_in = read_csv_fp(sys.stdin) data_out = {} null_count = 0 for row, data in data_in.items(): new_data =dict(data) for name, val in new_data.items(): if val == "NULL": new_data[name] = "" null_count += 1 data_out[row] = new_data print >>sys.stderr, "NULL values replaced", null_count write_csv_fp(sys.stdout, data_out)
data_out['affiliation'] = get_author_affiliation(row_data['affiliation']) try: if len(vivo_journals.get(row_data['issn'])) > 0: issn_uri = vivo_journals.get(row_data['issn']) else: utils.print_err("\nISSN not found: {}\n".format(row_data['issn'])) issn_uri = '' except TypeError: continue # try: # issn_uri = vivo_journals.get(row_data['issn']) # except KeyError: # utils.print_err("\nISSN not found: {}\n".format(row_data['issn'])) # issn_uri = '' utils.print_err("data_out is: \n{}".format(data_out)) data_in[row_index]['author'] = data_out['author'] data_in[row_index]['affiliation'] = data_out['affiliation'] data_in[row_index]['journal'] = issn_uri data_in[row_index].pop('issn') for line in disamb_dict: disamb_file.write(line) disamb_file.close() write_csv_fp(sys.stdout, data_in)
pubs_missing_doi_dict[row]= data continue data_out[row] = data # name is not vivo. These are the ones to add if data['doi'] not in vivo_pubs: data_out[row]['uri'] = '' else: data_out[row]['uri'] = vivo_pubs[data['doi']] try: if len(vivo_journals.get(data['issn'])) > 0: issn_uri = vivo_journals.get(data['issn']) else: utils.print_err("\nISSN not found: {}\n".format(data['issn'])) issn_uri = '' except TypeError: continue data_out[row]['journal'] = issn_uri data_out[row].pop('issn') write_csv_fp(pubs_missing_doi_file,pubs_missing_doi_dict) utils.print_err('{} rows in the output'.format(len(data_out))) write_csv_fp(sys.stdout, data_out)
def bib2csv(bib_data): """ Given bib_data as created by bibtexparser, return a csv object as modeled by vivotools :param bib_data: :return: csv_data """ csv_data = {} row = 0 col_names = set(y for x in bib_data.entries for y in x.keys()) for x in bib_data.entries: row += 1 csv_data[row] = {} for col_name in col_names: v = x.get(col_name, "") v = v.replace("\n", " ") v = v.replace("\r", " ") v = v.replace("\t", " ") csv_data[row][col_name] = v return csv_data bib_str = "" for line in sys.stdin: bib_str += line bib_data = loads(bib_str) print >>sys.stderr, "Entries", len(bib_data.entries) csv_data = bib2csv(bib_data) print >>sys.stderr, "Rows", len(csv_data) write_csv_fp(sys.stdout, csv_data)
def bib2csv(bib_data): """ Given bib_data as created by bibtexparser, return a csv object as modeled by vivotools :param bib_data: :return: csv_data """ csv_data = {} row = 0 col_names = set(y for x in bib_data.entries for y in x.keys()) for x in bib_data.entries: row += 1 csv_data[row] = {} for col_name in col_names: v = x.get(col_name, '') v = v.replace('\n', ' ') v = v.replace('\r', ' ') v = v.replace('\t', ' ') csv_data[row][col_name] = v return csv_data bib_str = "" for line in sys.stdin: bib_str += line bib_data = loads(bib_str) print >> sys.stderr, "Entries", len(bib_data.entries) csv_data = bib2csv(bib_data) print >> sys.stderr, "Rows", len(csv_data) write_csv_fp(sys.stdout, csv_data)