Пример #1
0
def printRecordsJson(located_recs):
    recs = []
    for r, coder, locatable in located_recs:
        rec = {
            'id': r.photo_id(),
            'folder': r.location().replace('Folder: ', ''),
            'date': record.CleanDate(r.date()),
            'title': record.CleanTitle(r.title()),
            'description': r.description(),
            'url': r.preferred_url,
            'extracted': {
                'date_range': [None, None]
            }
        }
        if r.note(): rec['note'] = r.note()

        start, end = r.date_range()
        rec['extracted']['date_range'][0] = '%04d-%02d-%02d' % (
            start.year, start.month, start.day)
        rec['extracted']['date_range'][1] = '%04d-%02d-%02d' % (
            end.year, end.month, end.day)

        if coder:
            rec['extracted']['latlon'] = locatable.getLatLon()
            rec['extracted']['located_str'] = str(locatable)
            rec['extracted']['technique'] = coder

        recs.append(rec)
    print json.dumps(recs, indent=2)
Пример #2
0
    def codeRecord(self, r):
        title = coders.sf_streets.clean_street(
            record.CleanTitle(r.title()).lower())

        # look for an exact address
        m = re.search(self._addr_re, title)
        if m:
            addr = m.group(0)
            if not should_reject_address(addr):
                return coders.locatable.fromAddress(addr)

        # Common cross-street patterns
        for idx, pat in enumerate(self._forms):
            m = re.search(pat, title)
            if m:
                self._stats[str(1 + idx)] += 1
                if idx != 0:
                    return coders.locatable.fromCross(m.group(1),
                                                      m.group(2),
                                                      source='%s (form %d)' %
                                                      (m.group(0), idx))
                else:
                    return coders.locatable.fromStreetAndCrosses(
                        m.group(1), [m.group(2), m.group(3)])

        # No dice.
        return None
Пример #3
0
def printRecordsText(located_recs):
    for r, coder, locatable in located_recs:
        date = record.CleanDate(r.date())
        title = record.CleanTitle(r.title())
        folder = r.location()
        if folder: folder = record.CleanFolder(folder)

        if locatable:
            loc = (str(locatable.getLatLon()) or '') + '\t' + str(locatable)
        else:
            loc = 'n/a\tn/a'

        print '\t'.join([
            r.photo_id(), date, folder, title, r.preferred_url, coder
            or 'failed', loc
        ])
Пример #4
0
def printRecordsText(located_recs):
    for r, coder, location_data in located_recs:
        date = record.CleanDate(r.date())
        title = record.CleanTitle(r.title())
        folder = r.location()
        if folder: folder = record.CleanFolder(folder)

        if location_data:
            lat = location_data['lat']
            lon = location_data['lon']
            loc = (str((lat, lon)) or '') + '\t' + location_data['address']
        else:
            loc = 'n/a\tn/a'

        print '\t'.join([
            r.photo_id(), date, folder, title, r.preferred_url, coder
            or 'failed', loc
        ])
Пример #5
0
    def codeRecord(self, r):
        loc = r.location()
        loc = loc.replace('Folder: S.F. Earthquakes-1906-Streets',
                          'Folder: S.F. Streets')
        loc = loc.replace('Sheet: S.F. Streets', 'Folder: S.F. Streets')
        if not loc.startswith("Folder: S.F. Streets-"): return None
        st = get_street_cat(loc)
        if not st: return None
        st = clean_street_cat(st.lower())

        title = record.CleanTitle(r.title()).lower()
        matches = self.extract_matches(title, st)
        if not matches: return None

        # matches is a mix of locatables and cross-street strings.
        # locatables take precedence, since they're more precise.
        for match in matches:
            if type(match) == coders.locatable.Locatable:
                return match

        # We've got a street and cross-streets
        assert not (None in matches), '%s: %s' % (r.photo_id(), title)
        return coders.locatable.fromStreetAndCrosses(st, matches)
Пример #6
0
def printRecordsJson(located_recs):
    recs = []
    for r, coder, location_data in located_recs:
        rec = {
            'id': r.photo_id(),
            'folder': removeNonAscii(r.location().replace('Folder: ', '')),
            'date': record.CleanDate(r.date()),
            'title': removeNonAscii(record.CleanTitle(r.title())),
            'description': removeNonAscii(r.description()),
            'url': r.preferred_url,
            'extracted': {
                'date_range': [None, None]
            }
        }
        if r.note(): rec['note'] = r.note()

        start, end = r.date_range()
        rec['extracted']['date_range'][0] = '%04d-%02d-%02d' % (
            start.year, start.month, start.day)
        rec['extracted']['date_range'][1] = '%04d-%02d-%02d' % (
            end.year, end.month, end.day)

        if coder:
            rec['extracted']['latlon'] = (location_data['lat'],
                                          location_data['lon'])
            rec['extracted']['located_str'] = removeNonAscii(
                location_data['address'])
            rec['extracted']['technique'] = coder

        try:
            x = json.dumps(rec)
        except Exception as e:
            sys.stderr.write('%s\n' % rec)
            raise e

        recs.append(rec)
    print json.dumps(recs, indent=2)
Пример #7
0
#!/usr/bin/python

import sys
sys.path += (sys.path[0] + '/..')

import csv
import record
rs = record.AllRecords()

csv_writer = csv.writer(open('entries.csv', 'wb'))
csv_writer.writerow(['photo_id', 'date', 'folder', 'title', 'library_url'])

for r in rs:
    date = record.CleanDate(r.date())
    title = record.CleanTitle(r.title())
    folder = record.CleanFolder(r.location())

    csv_writer.writerow([r.photo_id(), date, folder, title, r.preferred_url])
Пример #8
0
        continue

    print folder
    print '  Located:'
    located = set()
    for id, latlon, locatable_str in ccs:
        print '    %s (%s)' % (locatable_str, latlon)
        located.add(id)
    print '  Others: %d' % (len(recs) - len(ccs))
    dated_rs = [(r.date(), r) for r in recs]
    for date, r in sorted(dated_rs):
        c = ' '
        if r.photo_id() in located: c = '*'
        print '   %s%s %15s %s %s' % (
            c, r.photo_id(), record.CleanDate(
                r.date()), record.CleanTitle(r.title()), r.preferred_url)
    print ''

    response = raw_input('generalize? (y or n or photo_id): ')
    if response in ['y', 'n', 'yes', 'no'] or '-' in response:
        file('generalizations.txt', 'a').write('%s:%s\n' % (folder, response))
    else:
        print '(Skipping)'

    print ''
    print ''
    print ''

sys.stderr.write('Saveable records: %d\n' % saved)
sys.stderr.write('Saved: %d\n' % len(generalizations))