示例#1
0
def normalize_patent_au(patent):
    """
    Normalizes "Australian" format, e.g. AU2003212220A1, AU200042655B2, AU00784257B2

    Patent Application Number:
      old: 4+5 digits  (Patadmin, before 5 July 2002)
      new: 4+6 digits  (PAMS, after 5 July 2002)
      http://apa.hpa.com.au:8080/ipapa/intro
      http://pericles.ipaustralia.gov.au/aub/aub_pages_1.intro
    Patent Number:
      6 digits
      http://pericles.ipaustralia.gov.au/aub/aub_pages_1.intro
    """

    assert patent['country'] == 'AU'

    patched = copy(patent)

    length = len(patent['number'])

    # convert from 4+5 to 4+6 (old to new format)
    if length == 9:
        fullyear = patched['number'][0:4]
        seqnumber = patched['number'][4:]

        # pad sequential number to 6 characters
        patched['number'] = fullyear + pad_left(seqnumber, '0', 6)

    else:
        patched['number'] = trim_leading_zeros(patched['number'])

    if len(patched['number']) < 6:
        patched['number'] = pad_left(patched['number'], '0', 6)

    return patched
示例#2
0
def denormalize_patent_wo_algo(fullyear, seqnumber):

    """
    Nummernanalyse WO:

      Übergang von 2+5 zu 2+6:
      WO=02/51230
      WO=02/051231

      Übergang von 2+6 zu 4+6:
      WO03107732   (24.12.2003)
      WO2004000001 (31.12.2003)
    """

    # 2+5
    if fullyear <= 2001 or (fullyear == 2002 and seqnumber <= 51230):
        number = str(fullyear)[2:4] + pad_left(seqnumber, '0', 5)

    # 2+6
    elif (fullyear == 2002 and seqnumber >= 51231) or (fullyear == 2003):
        number = str(fullyear)[2:4] + pad_left(seqnumber, '0', 6)

    # 4+6
    else:
        number = str(fullyear) + pad_left(seqnumber, '0', 6)

    return number
示例#3
0
def patch_patent_old_archive(patent):
    if patent:
        patched = copy(patent)

        if patched['country'] == 'WO':
            patched = denormalize_patent_wo(patched)

        # from patlib.inc.php (new 2005-03-18):
        # check for more files returned under certain circumstances
        # e.g. query for "US2005015034A1" returns "US_20050015034_A1.pdf"
        # Wenn die ersten vier Ziffern eine Jahreszahl sind, dann danach "0" einfügen
        elif patched['country'] == 'US':
            """
            if int(patched['number'][0:4]) >= 1900 and len(patched['number'][4:]) < 7:
              patched['number'] = patched['number'][0:4] + '0' + patched['number'][4:]
            """
            # 2007-07-26: US applications are 4+7
            patched = normalize_patent_us(patched)


        # pad to 8 characters with leading zeros; this is the worst thing ever
        patched['number'] = pad_left(patched['number'], '0', 8)

        #print "patched (old):", patent, patched
        return patched
示例#4
0
def denormalize_patent_wo(patent):
    assert patent['country'] == 'WO'

    patched = patent.copy()

    length = len(patent['number'])

    # convert from 4+6 to 2+5 ...
    if length == 10:

        fullyear = patched['number'][0:4]
        century = fullyear[0:2]
        seqnumber = patched['number'][4:]

        # ... for fullyear == 19*: convert to 2+5
        if century == '19':
            seqnumber = str(int(seqnumber))
            patched['number'] = fullyear[2:4] + pad_left(seqnumber, '0', 5)

        # ... for fullyear == 20*
        if century == '20':
            patched['number'] = denormalize_patent_wo_algo(int(fullyear), int(seqnumber))


    # convert from 2+6 to 2+5 ...
    elif length == 8:

        year = patched['number'][0:2]
        seqnumber = patched['number'][2:]

        fullyear = fullyear_from_year(year)
        #print fullyear
        #print patched['number']
        patched['number'] = denormalize_patent_wo_algo(int(fullyear), int(seqnumber))
        #print patched['number']


    # wrong format: assume 4+5, convert to 2+5 ...
    elif length == 9:
        fullyear = patched['number'][0:4]
        seqnumber = patched['number'][4:]
        patched['number'] = denormalize_patent_wo_algo(int(fullyear), int(seqnumber))


    return patched
示例#5
0
def normalize_patent_wo_pct(patent):
    """
    Normalizes to "WIPO Application Number" format, e.g. PCT/US2005/009417
    Takes inputs like WOPCT/US02/03226, PCT/US1999/9417 or WOEP/2004/008531

    see "International Application No.":
    http://www.wipo.int/pctdb/en/wo.jsp?IA=PCT/US2005/009417
    http://www.wipo.int/pctdb/en/wo.jsp?IA=US2005009417

    see also:
    http://www.wipo.int/edocs/pctdocs/en/2005/pct_2005_42-section3.pdf
    """

    assert patent['country'] == 'WO'

    patched = copy(patent)
    #print patched

    r = re.compile('[\/|-]')
    parts = r.split(patched['number'])

    # handle special formatting like "WOPCT/WO9831467": convert to WO publication number
    if len(parts) == 2:
        pct = parts[0]
        patent_number = parts[1]
        if patent_number.startswith('WO'):
            wo_patent = split_patent_number(patent_number)
            return normalize_patent_wo(wo_patent)

    # only allow numbers containing three segments
    if not len(parts) == 3:
        return

    # assign segment names
    pct = parts[0]
    country_year = parts[1]
    seqnumber = parts[2]

    # handle special formatting like "WOPCT-WO97/29690": convert to WO publication number
    if country_year.startswith('WO'):
        wo_patent = split_patent_number(country_year + seqnumber)
        return normalize_patent_wo(wo_patent)

    # handle special formatting like "WOEP/2004/008531"
    if pct.startswith('WO') and len(pct) == 4:
        country_year = pct[2:4] + country_year

    # assume s.th. like "EP02": expand year to full year
    if len(country_year) == 4:
        # assume for century: 78-99 => 19, otherwise => 20
        # build fullyear from (2-digit) year
        fullyear = fullyear_from_year(country_year[2:])
        country_year = country_year[0:2] + fullyear

    # pad sequential number to six digits with leading zeros
    seqnumber = pad_left(seqnumber, '0', 6)

    # delete country,
    patched['country'] = ''
    patched['number'] = ('%s/%s/%s' % (pct, country_year, seqnumber))

    return patched
示例#6
0
def normalize_patent_wo(patent):
    """
    Normalizes to "WIPO Publication Number" format, e.g. WO2005092324

    see "Pub. No.":
    http://www.wipo.int/pctdb/en/wo.jsp?IA=WO/2005/092324
    http://www.wipo.int/pctdb/en/wo.jsp?IA=WO0067640
    """

    assert patent['country'] == 'WO'

    patched = copy(patent)

    # filter: leave special documents untouched (with alphanumeric prefix)
    pattern = '^\D+'
    r = re.compile(pattern)
    if r.match(patched['number']):
        return patched

    length = len(patent['number'])

    # convert from 2+5 or 2+6 to 4+6
    if length == 7 or length == 8:

        year = patched['number'][0:2]
        seqnumber = patched['number'][2:]

        # assume for century: 78-99 => 19, otherwise => 20
        # build fullyear from (2-digit) year
        fullyear = fullyear_from_year(year)
        """
        # try different decoding: 1 zero + 2 year + 5 seqnumber
        # (wrong format due to "pad everything to 8 characters" logic of Bestellsystem)
        # so strip off first leading zero before decoding again
        # TODO: what about WO09802618A2, WO00202618A2, WO00402618A2, WO09201000A1, WO09901000A3, WO00101000A1?
        if length == 8:

          # 1. numbers like WO00101000A1 are ambiguous, could be WO2000101000A1 or WO2001001000A1
          ambiguous_2000_2003 = ( 2000 <= int(fullyear) and int(fullyear) <= 2003 and patched['number'].startswith('00') )

          # 2. same with 8 digit numbers >= 2004, starting with "WO004..."
          #    hint: WO00402618A2 can not be WO2000402618A2 (due to format 2+6 and release date), so must be WO2004002618A2
          ambiguous_2004_bis  = ( int(fullyear) >= 2004 )

          if ambiguous_2000_2003:  # or ambiguous_2004_bis:
            patched['number'] = patched['number'][1:]
            year = patched['number'][0:2]
            seqnumber = patched['number'][2:]
            fullyear = fullyear_from_year(year)
        """

        #if length == 8 and patched['number'].startswith('0') and int(fullyear) < 2003:
        #    return

        # pad sequential number to 6 characters
        patched['number'] = fullyear + pad_left(seqnumber, '0', 6)

    # convert from 4+5 to 4+6 (wrong format)
    elif length == 9:
        fullyear = patched['number'][0:4]
        seqnumber = patched['number'][4:]

        # pad sequential number to 6 characters
        patched['number'] = fullyear + pad_left(seqnumber, '0', 6)

    patched['number'] = trim_leading_zeros(patched['number'])
    return patched
示例#7
0
def patch_patent(patent, provider=None):

    if not patent:
        return

    number_length = len(patent['number'])

    patched = copy(patent)
    #print 'patched:', patched

    # strip leading zeros of *publication* to 6 digits, if seqnumber is longer than 6 digits
    # examples: publication: AT401234; application: AT 967/1994 => AT96794
    if patched['country'] == 'AT':
        """
        if len(patched['number']) > 6 and not '/' in patched['number']:
            patched['number'] = trim_leading_zeros(patched['number'])
            patched['number'] = pad_left(patched['number'], '0', 6)
        """
        patched['number'] = trim_leading_zeros(patched['number'])

    # pad to 6 characters with leading zeros
    elif patched['country'] == 'AR':
        patched['number'] = patched['number'].lstrip('0').rjust(6, '0')

    elif patched['country'] == 'AU':
        patched = normalize_patent_au(patched)

    elif patched['country'] == 'BR':
        patched['number'] = patched['number'].lstrip('0')

    # strip leading zeros with exception of kindcode == T1, then pad to 7 digits like EP
    # "Veröffentlichung der europäischen Patentanmeldung"
    elif patched['country'] == 'DE':
        patched['number'] = trim_leading_zeros(patched['number'])
        #if patched.get('kind') == 'T1':
        #    patched['number'] = pad_left(patched['number'], '0', 7)

    # The Eurasian Patent Organization (EAPO)
    # Pad to 6 characters with leading zeros
    elif patched['country'] == 'EA' and number_length < 9:
        patched['number'] = trim_leading_zeros(patched['number'])
        patched['number'] = pad_left(patched['number'], '0', 6)

    # pad to 7 characters with leading zeros
    elif patched['country'] == 'EP':
        patched['number'] = trim_leading_zeros(patched['number'])
        patched['number'] = pad_left(patched['number'], '0', 7)

    elif patched['country'] == 'GE':
        patched['number'] = patched['number'].lstrip('0')

        # e.g.
        # GE00U200501210Y = GEU20051210Y
        # GE00P200503700B = GEP20053700B
        #print '77777777777:', patched['number'][5]
        if patched['number'][5] == '0':
            patched['number'] = patched['number'][:5] + patched['number'][6:]

    elif patched['country'] == 'IT':
        patched['number'] = patched['number'].lstrip('0')
        patched = normalize_patent_it(patched)

    # 2017-09-06: KR numbers
    # e.g. KR1020150124192A => KR20150124192A
    elif patched['country'] == 'KR':
        patched['number'] = trim_leading_zeros(patched['number'])
        if len(patched['number']) > 11 and patched['number'][:2] == '10':
            patched['number'] = patched['number'][2:]

    # 2009-11-09: JP numbers
    elif patched['country'] == 'JP':
        patched = normalize_patent_jp(patched)

    # 2015-09-01: SE numbers
    elif patched['country'] == 'SE':
        patched = normalize_patent_se(patched)
        patched['number'] = trim_leading_zeros(patched['number'])

    # 2007-07-26: US applications are 4+7
    elif patched['country'] == 'US':
        patched = normalize_patent_us(patched, provider=provider)

    # normalize wo numbers to 4+6 format
    elif patched['country'] == 'WO':
        # WOPCT/US86/01765 or WOEP/2004/008531
        if patched['number'].startswith('PCT'):
            patched = normalize_patent_wo_pct(patched)
        else:
            patched = normalize_patent_wo(patched)
            #patched = denormalize_patent_wo(patched)

    # strip leading zeros
    else:
        patched['number'] = trim_leading_zeros(patched['number'])

    #print "patched (regular):", patent, patched
    return patched