ListeBrevetAug = [] Done = [] else: ListeBrevetAug = [] if ficOk and GatherFamilly: registered_client = epo_ops.RegisteredClient(key, secret) # data = registered_client.family('publication', , 'biblio') registered_client.accept_type = 'application/json' DejaVu = [] for Brev in ListeBrevet: if Brev is not None and Brev != '' and Brev not in Done: temp = GetFamilly(registered_client, Brev, ResultContents) if temp is not None: for pat in temp: pat = CleanPatent(pat) if pat not in ListeBrevetAug and pat != '': if pat['label'] in DejaVu: temporar = [ patent for patent in temp if patent['label'] == pat['label'] ][0] #hum should be unique temporar = UnNest(temporar) for cle in temporar.keys(): temporar[cle] = UnNest(temporar[cle]) temporar = CleanPatent(Update(temporar, pat)) temporar = CleanPatent(temporar) ListeBrevetAug.append(temporar) #temp.append(temporar) else: pat = CleanPatent(pat)
ListeBrevetAug = [] Done = [] else: ListeBrevetAug = [] if ficOk and GatherFamilly: registered_client = epo_ops.RegisteredClient(key, secret) # data = registered_client.family('publication', , 'biblio') registered_client.accept_type = 'application/json' DejaVu = [] for Brev in ListeBrevet: if Brev is not None and Brev != '' and Brev not in Done: temp = GetFamilly(registered_client, Brev, ResultContents) if temp is not None: for pat in temp: pat = CleanPatent(pat) if pat not in ListeBrevetAug and pat != '': if pat['label'] in DejaVu: temporar = [patent for patent in temp if patent['label'] == pat['label']][0] #hum should be unique temporar=UnNest(temporar) for cle in temporar.keys(): temporar[cle] = UnNest(temporar[cle]) temporar = CleanPatent(Update(temporar, pat)) temporar = CleanPatent(temporar) ListeBrevetAug.append(temporar) #temp.append(temporar) else: pat = CleanPatent(pat) for cle in pat.keys(): pat[cle] = UnNest(pat[cle]) ListeBrevetAug.append(CleanPatent(pat))
def ExtractPatent(pat, ResultContents, BiblioPatents): DejaLa = [bre['label'] for bre in BiblioPatents] for cle in ['inventeur', 'applicant', 'date', 'dateDate', 'titre']: if cle != 'date' and cle !='dateDate': if pat[cle] == None: pat[cle] = 'empty' else: if cle == 'date' and pat[cle] == None: import datetime pat[cle] = str(datetime.date.today().year) + '-' + str(datetime.date.today().month) + '-' + str(datetime.date.today().day) elif cle == 'dateDate' and pat[cle] == None: import datetime pat[cle] = datetime.date.today().year cles = [key for key in pat.keys() if pat[key]==None] for cle in cles: if cle=='date': pat[cle] = unicode(datetime.date.today().year) elif cle=="dateDate": pat[cle] = datetime.date.today() else: bre[cle] = u'empty' if None not in pat.values(): #if Brev['label'] == Brev["prior"]: # just using primary patents not all the family if isinstance(pat['classification'], list): for classif in pat['classification']: tempo2 = ExtractClassificationSimple2(classif) for cle in tempo2.keys(): if cle in pat.keys() and tempo2[cle] not in pat[cle]: if pat[cle] == '': pat[cle] = [] if isinstance(tempo2[cle], list): pat[cle].extend(tempo2[cle]) else: pat[cle].append(tempo2[cle]) else: pat[cle] = [] if isinstance(tempo2[cle], list): pat[cle].extend(tempo2[cle]) else: pat[cle].append(tempo2[cle]) if pat[cle].count(',')>0: print pat[cle] #hum, strage state else: tempo2 = ExtractClassificationSimple2(pat['classification']) for cle in tempo2.keys(): if cle in pat.keys() and tempo2[cle] not in pat[cle]: if pat[cle] == '': pat[cle] = [] if isinstance(tempo2[cle], list): pat[cle].extend(tempo2[cle]) else: pat[cle].append(tempo2[cle]) else: pat[cle] = [] if isinstance(tempo2[cle], list): pat[cle].extend(tempo2[cle]) else: pat[cle].append(tempo2[cle]) if pat[cle].count(',')>0: print pat[cle] #hum, strage state # print classif pat = SeparateCountryField(pat) for clekey in pat.keys(): if isinstance(pat[clekey], list): pat[clekey] = UnNest(pat[clekey]) if isinstance(pat['IPCR1'], list): CIB1 = '-'.join(dat for dat in pat['IPCR1']) else: CIB1 = pat['IPCR1'] if isinstance(pat['IPCR3'], list): CIB3 = '-'.join(dat for dat in pat['IPCR3']) else: CIB3 = pat['IPCR3'] if isinstance(pat['IPCR4'], list): CIB4 = '-'.join(dat for dat in pat['IPCR4']) else: CIB4 = pat['IPCR4'] IRAM = '**** *Label_' + ndb +' *Country_'+pat['pays']+ ' *CIB3_'+CIB3 + ' *CIB1_'+CIB1 + ' *CIB4_'+CIB4 + ' *Date_' + str(pat['dateDate'].year) + ' *Applicant_'+UniClean('-'.join(coupeEnMots(pat['applicant'])))[0:12] IRAM = IRAM.replace('_ ', '_empty', IRAM.count('_ ')) +'\n' TXT=dict() if isinstance(patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'], list): for tempo in patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document']: if tempo.has_key('abstract'): txtTemp = ExtractAbstract(tempo['abstract']) for cleLang in txtTemp: if TXT.has_key(cleLang): TXT[cleLang] += txtTemp[cleLang] else: TXT[cleLang] = txtTemp[cleLang] else: if patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'].has_key('abstract'): TXT = ExtractAbstract(patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'][u'abstract']) for lang in TXT.keys(): EcritContenu(IRAM + TXT[lang], ResultAbstractPath+'//'+lang+'-'+ndb+'.txt') if pat['label'] in DejaLa: #checking multiples status tempor = [patent for patent in BiblioPatents if patent['label'] == pat["label"]][0] #should be unique BiblioPatents.remove(tempor) tempor = Update(tempor, pat) for key in tempor.keys(): if isinstance(tempor[key], list): tempor[key] = UnNest(tempor[key]) tempor = CleanPatent(tempor) BiblioPatents.append(CleanPatent(tempor)) else: for key in pat.keys(): if isinstance(pat[key], list): pat[key] = UnNest(pat[key]) pat = CleanPatent(pat) BiblioPatents.append(CleanPatent(pat)) DejaLa.append(pat['label']) return pat, YetGathered, BiblioPatents else:#None values avoiding this patent if pat.has_key('label'): DejaLa.append(pat['label']) return None, DejaLa, BiblioPatents
if 'abstracts' not in os.listdir(RepDir): os.makedirs(RepDir+"//Abstracts") except: pass #os.chdir(ndf.replace('.dump', '')) desc, clm, ft, abstract = 0,0,0, 0 Langues = set() if GatherContent: for brevet in lstBrevet: #tempo =('publication', Docdb(,, )) #if brevet['label'] == 'FR2997041': # tempo =('publication', Docdb(brevet[u'publication-ref'][u'document-id'][0][u'doc-number']['$'],brevet['publication-ref'][u'document-id'][0][u'country']['$'], brevet['publication-ref'][u'document-id'][0][u'kind']['$'])) # tempo2 =('publication', Epodoc(brevet['publication-ref'][u'document-id'][0][u'country']['$']+brevet[u'publication-ref'][u'document-id'][0][u'doc-number']['$']))#, brevet[u'document-id'][u'kind']['$'])) #tempoDocDb = brevet = CleanPatent(brevet) brevet = CleanPatent(brevet) ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$']) #check for already gathered patents lstfic = os.listdir(ResultPathContent+'//Abstracts/') fichier = [fics[3:] for fics in lstfic] if ndb+'.txt' not in fichier: for content in [u'abstract']:#claims', u'description']: #, u'fulltext' try : tmp = Epodoc(ndb) tempo = ('publication', tmp) data = registered_client.published_data(*tempo, endpoint = content) #registered_client.published_data() if 'abstract' not in str(data.json()): tmp = Docdb(ndb[2:], ndb[0:2],brevet['status'])
comptBad += 1 print comptBad, " identified problems." if comptBad == 0: print "This is good!" #we filter data for exporting most significant values LstExp = [] LstExp2 = [] for brev in LstBrevet: if brev['label'] == 'WO2007000665': print #cleaningbre[c] for cle in brev.keys(): brev[cle] = UnNest(brev[cle]) brev = CleanPatent(brev) brev = CleanPatentOthers2(brev) ## tempo = brev # this one for DataTable tempo2 = dict() #the one for pitable PaysInv = [] #new field PaysApp = [] #tempo = CleanPatent(tempo) tempo2 = copy.deepcopy(tempo) #ugly tempo3 = dict() #what the problem LstExp.append(tempo) for ket in brev.keys(): tempo3[ket] = tempo[ket] if isinstance(tempo2[ket], list):
comptBad +=1 print comptBad, " identified problems." if comptBad == 0: print "This is good!" #we filter data for exporting most significant values LstExp = [] LstExp2 = [] for brev in LstBrevet: if brev['label'] == 'WO2007000665': print #cleaningbre[c] for cle in brev.keys(): brev[cle] = UnNest(brev[cle]) brev= CleanPatent(brev) brev= CleanPatentOthers2(brev) ## tempo = brev # this one for DataTable tempo2 = dict() #the one for pitable PaysInv= [] #new field PaysApp = [] #tempo = CleanPatent(tempo) tempo2 = copy.deepcopy(tempo) #ugly tempo3 = dict() #what the problem LstExp.append(tempo) for ket in brev.keys(): tempo3[ket] = tempo[ket] if isinstance(tempo2[ket], list):
with open(ListPatentPath+'//'+ndf, 'r') as data: DataBrevet = pickle.load(data) if isinstance(LstBrevet, dict): data = LstBrevet LstBrevet = data['brevets'] if data.has_key('requete'): DataBrevet['requete'] = data["requete"] if data.has_key('number'): print "Found ", data["number"], " patents! Formating to HMTL tables" LstExp = [] LstExp2 = [] for brev in LstBrevet: brev = CleanPatent(brev) tempo = dict() # this one for DataTable tempo2 = dict() #the one for pitable PaysInv= [] #new field PaysApp = [] # tempo = CleanPatent(brev) # brevet= SeparateCountryField(tempo) #cleaning classification cles = [key for key in brev.keys() if brev[key]==None or brev[key] == [u'None', None] or brev[key] == [None]] for cle in cles: if cle=='date': brev[cle] = unicode(datetime.date.today().year) elif cle=="dateDate": brev[cle] = datetime.date.today()