Пример #1
0
    def handle_patent(elem):
        pat = defaultdict(str)
        pat['gen'] = 2

        # top-level section
        bib = elem.find('SDOBI')

        # published patent
        pubref = bib.find('B100')
        pat['patnum'] = get_text(pubref, 'B110/DNUM/PDAT')
        pat['grantdate'] = get_text(pubref, 'B140/DATE/PDAT')

        # filing date
        appref = bib.find('B200')
        pat['filedate'] = get_text(appref, 'B220/DATE/PDAT')

        # ipc code
        patref = bib.find('B500')
        ipcsec = patref.find('B510')
        ipcver = get_text(ipcsec, 'B516/PDAT')
        ipclist = []
        ipc1 = get_text(ipcsec, 'B511/PDAT')
        if ipc1 is not None:
            ipclist.append((ipc1, ipcver))
        for child in ipcsec.findall('B512'):
            ipc = get_text(child, 'PDAT')
            ipclist.append((ipc, ipcver))
        pat['ipclist'] = ipclist

        # citations
        cites = []
        refs = patref.find('B560')
        if refs is not None:
            for cite in refs.findall('B561'):
                pcit = get_text(cite, 'PCIT/DOC/DNUM/PDAT')
                cites.append(pcit)
        pat['citlist'] = cites

        # title
        pat['title'] = get_text(patref, 'B540/STEXT/PDAT')

        # claims
        pat['claims'] = get_text(patref, 'B570/B577/PDAT')

        # applicant name and address
        ownref = bib.find('B700/B730/B731/PARTY-US')
        if ownref is not None:
            pat['owner'] = get_text(ownref, 'NAM/ONM/STEXT/PDAT').upper()
            address = ownref.find('ADR')
            if address is not None:
                pat['state'] = get_text(address, 'STATE/PDAT')
                pat['country'] = get_text(address, 'CTRY/PDAT', default='US')

        # abstract
        abspars = elem.findall('SDOAB/BTEXT/PARA')
        if len(abspars) > 0:
            pat['abstract'] = '\n'.join([raw_text(e) for e in abspars])

        # roll it in
        return store_patent(pat)
Пример #2
0
    def handle_patent(elem):
        pat = defaultdict(str)
        pat['gen'] = 2

        # top-level section
        bib = elem.find('SDOBI')

        # published patent
        pubref = bib.find('B100')
        pat['patnum'] = get_text(pubref, 'B110/DNUM/PDAT')
        pat['grantdate'] = get_text(pubref, 'B140/DATE/PDAT')

        # filing date
        appref = bib.find('B200')
        pat['filedate'] = get_text(appref, 'B220/DATE/PDAT')

        # ipc code
        patref = bib.find('B500')
        ipcsec = patref.find('B510')
        ipcver = get_text(ipcsec, 'B516/PDAT')
        ipclist = []
        ipc1 = get_text(ipcsec, 'B511/PDAT')
        if ipc1 is not None:
            ipclist.append((ipc1, ipcver))
        for child in ipcsec.findall('B512'):
            ipc = get_text(child, 'PDAT')
            ipclist.append((ipc, ipcver))
        pat['ipclist'] = ipclist

        # citations
        cites = []
        refs = patref.find('B560')
        if refs is not None:
            for cite in refs.findall('B561'):
                pcit = get_text(cite, 'PCIT/DOC/DNUM/PDAT')
                cites.append(pcit)
        pat['citlist'] = cites

        # title
        pat['title'] = get_text(patref, 'B540/STEXT/PDAT')

        # claims
        pat['claims'] = get_text(patref, 'B570/B577/PDAT')

        # applicant name and address
        ownref = bib.find('B700/B730/B731/PARTY-US')
        if ownref is not None:
            pat['owner'] = get_text(ownref, 'NAM/ONM/STEXT/PDAT').upper()
            address = ownref.find('ADR')
            if address is not None:
                pat['state'] = get_text(address, 'STATE/PDAT')
                pat['country'] = get_text(address, 'CTRY/PDAT', default='US')

        # abstract
        abspars = elem.findall('SDOAB/BTEXT/PARA')
        if len(abspars) > 0:
            pat['abstract'] = '\n'.join([raw_text(e) for e in abspars])

        # roll it in
        return store_patent(pat)
Пример #3
0
def parse_grants_gen3(elem):
    pat = copy(default)

    # top-level section
    bib = elem.find('us-bibliographic-data-application')
    pubref = bib.find('publication-reference')
    appref = bib.find('application-reference')

    # published patent
    pubinfo = pubref.find('document-id')
    pat['pubnum'] = get_text(pubinfo, 'doc-number')
    pat['pubdate'] = get_text(pubinfo, 'date')

    # filing date
    pat['appnum'] = get_text(appref, 'document-id/doc-number')
    pat['appdate'] = get_text(appref, 'document-id/date')
    pat['appname'] = get_text(bib, 'assignees/assignee/orgname')

    # title
    pat['title'] = get_text(bib, 'invention-title')

    # ipc code
    ipcsec = bib.find('classifications-ipcr')
    if ipcsec is not None:
        ipclist = list(gen3_ipcr(ipcsec))
        pat['ipc1'], pat['ipcver'] = ipclist[0]
        pat['ipc2'] = ';'.join([i for i, _ in ipclist])

    ipcsec = bib.find('classification-ipc')
    if ipcsec is not None:
        ipclist = list(gen3_ipc(ipcsec))
        pat['ipc1'], pat['ipcver'] = ipclist[0]
        pat['ipc2'] = ';'.join([i for i, _ in ipclist])

    # applicant name and address
    address = bib.find('parties/applicants/applicant/addressbook/address')
    if address is not None:
        pat['city'] = get_text(address, 'city')
        pat['state'] = get_text(address, 'state')
        pat['country'] = get_text(address, 'country')

    # abstract
    abspar = elem.find('abstract')
    if abspar is not None:
        pat['abstract'] = raw_text(abspar, sep=' ')

    # roll it in
    return store_patent(pat)
Пример #4
0
def parse_grants_gen2(elem):
    pat = copy(default)

    # top-level section
    bib = elem.find('subdoc-bibliographic-information')

    # publication data
    pub = bib.find('document-id')
    if pub is not None:
        pat['pubnum'] = get_text(pub, 'doc-number')
        pat['pubdate'] = get_text(pub, 'document-date')

    # application data
    app = bib.find('domestic-filing-data')
    if app is not None:
        pat['appnum'] = get_text(app, 'application-number/doc-number')
        pat['appdate'] = get_text(app, 'filing-date')
    pat['appname'] = get_text(bib, 'assignee/organization-name')

    # title
    tech = bib.find('technical-information')
    pat['title'] = get_text(tech, 'title-of-invention')

    # ipc code
    ipcsec = tech.find('classification-ipc')
    pat['ipcver'] = get_text(ipcsec, 'classification-ipc-edition')
    if ipcsec is not None:
        ipclist = list(gen2_ipc(ipcsec))
        if len(ipclist) > 0:
            pat['ipc1'] = ipclist[0]
            pat['ipc2'] = ';'.join(ipclist)

    # applicant info
    address = bib.find('correspondence-address/address')
    if address is not None:
        pat['city'] = get_text(address, 'city')
        pat['state'] = get_text(address, 'state')
        pat['country'] = get_text(address, 'country/country-code')

    # abstract
    abst = elem.find('subdoc-abstract')
    if abst is not None:
        pat['abstract'] = raw_text(abst, sep=' ')

    # roll it in
    return store_patent(pat)
Пример #5
0
    def handle_patent(elem):
        pat = defaultdict(str)
        pat['gen'] = 3

        # top-level section
        bib = elem.find('us-bibliographic-data-grant')
        pubref = bib.find('publication-reference')
        appref = bib.find('application-reference')

        # published patent
        pubinfo = pubref.find('document-id')
        pat['patnum'] = get_text(pubinfo, 'doc-number')
        pat['grantdate'] = get_text(pubinfo, 'date')

        # filing date
        pat['filedate'] = get_text(appref, 'document-id/date')

        # title
        pat['title'] = get_text(bib, 'invention-title')

        # ipc code
        ipclist = []

        ipcsec = bib.find('classifications-ipcr')
        if ipcsec is not None:
            for ipc in ipcsec.findall('classification-ipcr'):
                ipclist.append(
                    ('%s%s%s%3s%s' %
                     (get_text(ipc, 'section'), get_text(ipc, 'class'),
                      get_text(ipc, 'subclass'), get_text(
                          ipc, 'main-group'), get_text(ipc, 'subgroup')),
                     get_text(ipc, 'ipc-version-indicator/date')))

        ipcsec = bib.find('classification-ipc')
        if ipcsec is not None:
            ipcver = get_text(ipcsec, 'edition')
            ipc0 = ipcsec.find('main-classification')
            for ipc in chain([ipc0], ipcsec.findall('further-classification')):
                itxt = ipc.text
                itxt = itxt[:4] + itxt[4:7].replace(
                    '0', ' ') + itxt[7:].replace('/', '')
                ipclist.append((itxt, ipcver))

        pat['ipclist'] = ipclist

        # us class
        oclsec = bib.find('classification-national')
        if oclsec is not None:
            pat['class'] = get_text(oclsec, 'main-classification')

        # claims
        pat['claims'] = get_text(bib, 'number-of-claims')

        # citations
        refs = bib.find('references-cited')
        prefix = ''
        if refs is None:
            refs = bib.find('us-references-cited')
            prefix = 'us-'

        cites = []
        if refs is not None:
            for cite in refs.findall(prefix + 'citation'):
                pcite = cite.find('patcit')
                if pcite is not None:
                    docid = pcite.find('document-id')
                    pnum = get_text(docid, 'doc-number')
                    kind = get_text(docid, 'kind')
                    if kind == 'A' or kind.startswith('B'):
                        cites.append(pnum)
        pat['citlist'] = cites

        # applicant name and address
        assignee = bib.find('assignees/assignee/addressbook')
        if assignee is not None:
            pat['owner'] = get_text(assignee, 'orgname').upper()
            address = assignee.find('address')
            pat['city'] = get_text(address, 'city').upper()
            pat['state'] = get_text(address, 'state')
            pat['country'] = get_text(address, 'country')

        # abstract
        abspar = elem.find('abstract')
        if abspar is not None:
            pat['abstract'] = raw_text(abspar, sep=' ')

        # roll it in
        return store_patent(pat)
Пример #6
0
    def handle_patent(elem):
        pat = defaultdict(str)
        pat['gen'] = 3

        # top-level section
        bib = elem.find('us-bibliographic-data-grant')
        pubref = bib.find('publication-reference')
        appref = bib.find('application-reference')

        # published patent
        pubinfo = pubref.find('document-id')
        pat['patnum'] = get_text(pubinfo, 'doc-number')
        pat['grantdate'] = get_text(pubinfo, 'date')

        # filing date
        pat['filedate'] = get_text(appref, 'document-id/date')

        # title
        pat['title'] = get_text(bib, 'invention-title')

        # ipc code
        ipclist = []

        ipcsec = bib.find('classifications-ipcr')
        if ipcsec is not None:
            for ipc in ipcsec.findall('classification-ipcr'):
                ipclist.append(('%s%s%s%3s%s' % (get_text(ipc, 'section'),
                                                 get_text(ipc, 'class'),
                                                 get_text(ipc, 'subclass'),
                                                 get_text(ipc, 'main-group'),
                                                 get_text(ipc, 'subgroup')),
                                get_text(ipc, 'ipc-version-indicator/date')))

        ipcsec = bib.find('classification-ipc')
        if ipcsec is not None:
            ipcver = get_text(ipcsec, 'edition')
            ipc0 = ipcsec.find('main-classification')
            for ipc in chain([ipc0], ipcsec.findall('further-classification')):
                itxt = ipc.text
                itxt = itxt[:4] + itxt[4:7].replace('0',' ') + itxt[7:].replace('/','')
                ipclist.append((itxt, ipcver))

        pat['ipclist'] = ipclist

        # claims
        pat['claims'] = get_text(bib, 'number-of-claims')

        # citations
        refs = bib.find('references-cited')
        prefix = ''
        if refs is None:
            refs = bib.find('us-references-cited')
            prefix = 'us-'

        cites = []
        if refs is not None:
            for cite in refs.findall(prefix + 'citation'):
                pcite = cite.find('patcit')
                if pcite is not None:
                    docid = pcite.find('document-id')
                    pnum = get_text(docid, 'doc-number')
                    kind = get_text(docid, 'kind')
                    if kind == 'A' or kind.startswith('B'):
                        cites.append(pnum)
        pat['citlist'] = cites

        # applicant name and address
        assignee = bib.find('assignees/assignee/addressbook')
        if assignee is not None:
            pat['owner'] = get_text(assignee, 'orgname').upper()
            address = assignee.find('address')
            pat['state'] = get_text(address, 'state')
            pat['country'] = get_text(address, 'country')

        # abstract
        abspar = elem.find('abstract')
        if abspar is not None:
            pat['abstract'] = raw_text(abspar, sep=' ')

        # roll it in
        return store_patent(pat)