Пример #1
0
 def test_write_csv_fp(self):
     data = read_csv("data/buildings.txt", delimiter='\t')
     fp = open('data/buildings_out.txt', 'w')
     write_csv_fp(fp, data, delimiter='\t')
     fp.close()
     data2 = read_csv("data/buildings.txt", delimiter='\t')
     self.assertTrue(data == data2)
Пример #2
0
from vivopump import read_csv_fp, write_csv_fp, improve_jobcode_description
import sys

data_in = read_csv_fp(sys.stdin)
var_names = data_in[
    data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns in", var_names
data_out = {}
for row, data in data_in.items():
    new_data = dict(data)

    # Add these columns

    new_data['remove'] = ''
    new_data['uri'] = ''
    new_data['title'] = improve_jobcode_description(
        new_data['JOBCODE_DESCRIPTION'])
    new_data['hr_title'] = new_data['JOBCODE_DESCRIPTION']

    # Delete these columns

    del new_data['JOBCODE']
    del new_data['HR_POSITION']
    del new_data['JOBCODE_DESCRIPTION']

    data_out[row] = new_data
var_names = data_out[
    data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns out", var_names
write_csv_fp(sys.stdout, data_out)
Пример #3
0
"""
    null_value_filter.py -- Replace "NULL" with empty string
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2015 (c), Michael Conlon"
__license__ = "New BSD License"
__version__ = "0.01"

from vivopump import read_csv_fp, write_csv_fp
import shelve
import sys

data_in = read_csv_fp(sys.stdin)
data_out = {}
null_count = 0
for row, data in data_in.items():
    new_data =dict(data)
    for name, val in new_data.items():
        if val == "NULL":
            new_data[name] = ""
            null_count += 1
    data_out[row] = new_data
print >>sys.stderr, "NULL values replaced", null_count
write_csv_fp(sys.stdout, data_out)





Пример #4
0
    data_out['affiliation'] = get_author_affiliation(row_data['affiliation'])

    try:
        if len(vivo_journals.get(row_data['issn'])) > 0:
            issn_uri = vivo_journals.get(row_data['issn'])
        else:
            utils.print_err("\nISSN not found: {}\n".format(row_data['issn']))
            issn_uri = ''
    except TypeError:
        continue

    # try:
    #     issn_uri = vivo_journals.get(row_data['issn'])
    # except KeyError:
    #     utils.print_err("\nISSN not found: {}\n".format(row_data['issn']))
    #     issn_uri = ''

    utils.print_err("data_out is: \n{}".format(data_out))

    data_in[row_index]['author'] = data_out['author']
    data_in[row_index]['affiliation'] = data_out['affiliation']
    data_in[row_index]['journal'] = issn_uri
    data_in[row_index].pop('issn')

for line in disamb_dict:
    disamb_file.write(line)

disamb_file.close()

write_csv_fp(sys.stdout, data_in)
Пример #5
0
        pubs_missing_doi_dict[row]= data
        continue

    data_out[row] = data

    # name is not vivo.  These are the ones to add
    if data['doi'] not in vivo_pubs:
        data_out[row]['uri'] = ''
    else:
        data_out[row]['uri'] = vivo_pubs[data['doi']]

    try:
        if len(vivo_journals.get(data['issn'])) > 0:
            issn_uri = vivo_journals.get(data['issn'])
        else:
            utils.print_err("\nISSN not found: {}\n".format(data['issn']))
            issn_uri = ''
    except TypeError:
        continue

    data_out[row]['journal'] = issn_uri
    data_out[row].pop('issn')




write_csv_fp(pubs_missing_doi_file,pubs_missing_doi_dict)

utils.print_err('{} rows in the output'.format(len(data_out)))
write_csv_fp(sys.stdout, data_out)
Пример #6
0
def bib2csv(bib_data):
    """
    Given bib_data as created by bibtexparser, return a csv object as modeled by vivotools
    :param bib_data:
    :return: csv_data
    """
    csv_data = {}
    row = 0
    col_names = set(y for x in bib_data.entries for y in x.keys())
    for x in bib_data.entries:
        row += 1
        csv_data[row] = {}
        for col_name in col_names:
            v = x.get(col_name, "")
            v = v.replace("\n", " ")
            v = v.replace("\r", " ")
            v = v.replace("\t", " ")
            csv_data[row][col_name] = v
    return csv_data


bib_str = ""
for line in sys.stdin:
    bib_str += line

bib_data = loads(bib_str)
print >>sys.stderr, "Entries", len(bib_data.entries)
csv_data = bib2csv(bib_data)
print >>sys.stderr, "Rows", len(csv_data)
write_csv_fp(sys.stdout, csv_data)
Пример #7
0
def bib2csv(bib_data):
    """
    Given bib_data as created by bibtexparser, return a csv object as modeled by vivotools
    :param bib_data:
    :return: csv_data
    """
    csv_data = {}
    row = 0
    col_names = set(y for x in bib_data.entries for y in x.keys())
    for x in bib_data.entries:
        row += 1
        csv_data[row] = {}
        for col_name in col_names:
            v = x.get(col_name, '')
            v = v.replace('\n', ' ')
            v = v.replace('\r', ' ')
            v = v.replace('\t', ' ')
            csv_data[row][col_name] = v
    return csv_data


bib_str = ""
for line in sys.stdin:
    bib_str += line

bib_data = loads(bib_str)
print >> sys.stderr, "Entries", len(bib_data.entries)
csv_data = bib2csv(bib_data)
print >> sys.stderr, "Rows", len(csv_data)
write_csv_fp(sys.stdout, csv_data)