Пример #1
0
 def handle_all():
     for (_, pat) in pp.read_events():
         if not handle_patent(pat):
             return False
         clear(pat)
     return True
Пример #2
0
def parse_gen3(fname_in):
    global i, o, p

    for (event, elem) in iterparse(fname_in,
                                   tag='patent-assignment',
                                   events=['end'],
                                   recover=True):
        # top-level section
        record = elem.find('assignment-record')
        assignor = elem.find('patent-assignors')[0]
        assignee = elem.find('patent-assignees')[0]
        patents = elem.find('patent-properties')

        # conveyance
        convey = get_text(record, 'conveyance-text')

        # names
        assignor_name = get_text(assignor, 'name')
        assignee_name = get_text(assignee, 'name')

        # dates
        exec_sec = assignor.find('execution-date')
        recd_sec = record.find('recorded-date')

        exec_date = get_text(exec_sec, 'date') if exec_sec is not None else ''
        recd_date = get_text(recd_sec, 'date') if recd_sec is not None else ''

        # location
        assignee_country = get_text(assignee,
                                    'country-name',
                                    default='UNITED STATES')
        assignee_state = get_text(assignee, 'state')

        # patent info
        patnums = list(gen_patnums(patents))
        npat = len(patnums)
        if npat == 0:
            continue

        # code names
        src_type = org_type(assignor_name)
        dst_type = org_type(assignee_name)
        ctype = convey_type(convey)

        # throw out individuals
        if src_type == ORG_INDV or dst_type == ORG_INDV or ctype == CONV_OTHER:
            o += 1
            continue

        # output
        for pn in patnums:
            chunker.insert(None, pn, exec_date, recd_date, convey,
                           assignor_name, assignee_name, assignee_state,
                           assignee_country)

        # free memory
        clear(elem)

        # stats
        i += 1
        p += npat

        # logging
        if i % 1000 == 0:
            print('%4d: %40.40s -> %30.30s (%20.20s, %20.20s)' %
                  (npat, assignor_name, assignee_name, assignee_state,
                   assignee_country))

        # break
        if args.limit and i >= args.limit:
            return False

    return True
Пример #3
0
 def handle_all():
     for (_, pat) in pp.read_events():
         if not handle_patent(pat):
             return False
         clear(pat)
     return True