Exemplo n.º 1
0
COMPANY_NAME = 9
COUNTRY = 17

print "Opening %s to read..." % args.input
print "Opening %s to output..." % args.output

with open(args.input, "r") as csv_in:
    reader = csv.DictReader(csv_in)
    with open(args.output, "w") as csv_out:
        writer = csv.writer(csv_out)
        for in_row in reader:
            out_row = [''] * 22
            addr = in_row['ADDRESS']
            if addr:
                print "Cleaning address..."
                print "Input: " + addr
                addr = clean(addr)
                if addr:
                    (street, city, state, postal, country) = addr
                    print "---- %s, %s, %s, %s, %s -----" % addr
                    out_row[STREET] = street if street else ''
                    out_row[CITY] = city if city else ''
                    out_row[STATE] = state if state else ''
                    out_row[POSTAL] = postal if postal else ''
                    out_row[COUNTRY] = country if country else ''
                else:
                    print "NO MATCHING ADDRESS"
            out_row[CUSIP6] = in_row['CUSIP6']
            out_row[COMPANY_NAME] = in_row['NAME']
            writer.writerow(out_row)
Exemplo n.º 2
0
 def test_clean_ireland_address(self):
     addr = clean("70 SIR JOHN ROGERSON'S QUAY, Dublin 2, L2 2, Ireland")
     self.assertEqual((u"70 Sir John Rogerson's Quay", u'Dublin', u'Dublin', None, u'Ireland'), addr)
Exemplo n.º 3
0
reload(sys)
sys.setdefaultencoding('utf8')

parser = argparse.ArgumentParser(description='Geocode csv file.')
parser.add_argument("path", help="Path to csv")
parser.add_argument("field", help="Name of address field")
args = parser.parse_args()


def read_fieldnames(path):
    with open(path, "r") as path_csv:
        reader = csv.reader(path_csv)
        row = reader.next()
        return row


fieldnames = read_fieldnames(
    args.path) + ['STREET', 'CITY', 'STATE', 'POSTAL', 'COUNTRY']
with open(args.path, "r") as path_csv:
    reader = csv.DictReader(path_csv)
    with open("geocoded.csv", "w") as geocoded_out:
        writer = csv.DictWriter(geocoded_out, fieldnames)
        for row in reader:
            address = row[args.field]
            print "Geocoding " + address
            geo_address = clean(address)
            if geo_address:
                (row['STREET'], row['CITY'], row['STATE'], row['POSTAL'],
                 row['COUNTRY']) = geo_address
                writer.writerow(row)
Exemplo n.º 4
0
 def test_clean_weird_chinese_address(self):
     addr = clean('M5 1 Jiuxianqiao East Road, Chaoyang District, Beijing 100016, People s Republic of China')
     self.assertEqual(None, addr)
import argparse
import sys

reload(sys)
sys.setdefaultencoding('utf8')

parser = argparse.ArgumentParser(description='Geocode csv file.')
parser.add_argument("path", help="Path to csv")
parser.add_argument("field", help="Name of address field")
args = parser.parse_args()

def read_fieldnames(path):
    with open(path, "r") as path_csv:
        reader = csv.reader(path_csv)
        row = reader.next()
        return row

fieldnames = read_fieldnames(args.path) + ['STREET', 'CITY', 'STATE', 'POSTAL', 'COUNTRY']
with open(args.path, "r") as path_csv:
    reader = csv.DictReader(path_csv)
    with open("geocoded.csv", "w") as geocoded_out:
        writer = csv.DictWriter(geocoded_out, fieldnames)
        for row in reader:
            address = row[args.field]
            print "Geocoding " + address
            geo_address = clean(address)
            if geo_address:
                (row['STREET'], row['CITY'], row['STATE'], row['POSTAL'], row['COUNTRY']) = geo_address
                writer.writerow(row)