Exemplo n.º 1
0
            ListeBrevetAug = []
            Done = []
    else:
        ListeBrevetAug = []
    if ficOk and GatherFamilly:
        registered_client = epo_ops.RegisteredClient(key, secret)
        #        data = registered_client.family('publication', , 'biblio')
        registered_client.accept_type = 'application/json'
        DejaVu = []
        for Brev in ListeBrevet:

            if Brev is not None and Brev != '' and Brev not in Done:
                temp = GetFamilly(registered_client, Brev, ResultContents)
                if temp is not None:
                    for pat in temp:
                        pat = CleanPatent(pat)
                        if pat not in ListeBrevetAug and pat != '':
                            if pat['label'] in DejaVu:
                                temporar = [
                                    patent for patent in temp
                                    if patent['label'] == pat['label']
                                ][0]  #hum should be unique
                                temporar = UnNest(temporar)
                                for cle in temporar.keys():
                                    temporar[cle] = UnNest(temporar[cle])
                                temporar = CleanPatent(Update(temporar, pat))
                                temporar = CleanPatent(temporar)
                                ListeBrevetAug.append(temporar)
                                #temp.append(temporar)
                            else:
                                pat = CleanPatent(pat)
         ListeBrevetAug = []
         Done = []
 else: 
     ListeBrevetAug = []
 if ficOk and GatherFamilly:
     registered_client = epo_ops.RegisteredClient(key, secret)
 #        data = registered_client.family('publication', , 'biblio')
     registered_client.accept_type = 'application/json'
     DejaVu = []
     for Brev in ListeBrevet:
         
         if Brev is not None and Brev != '' and Brev not in Done:
             temp = GetFamilly(registered_client, Brev, ResultContents)
             if temp is not None:
                 for pat in temp:
                     pat = CleanPatent(pat)
                     if pat not in ListeBrevetAug and pat != '':
                         if pat['label'] in DejaVu:
                             temporar = [patent for patent in temp if patent['label'] == pat['label']][0] #hum should be unique
                             temporar=UnNest(temporar)
                             for cle in temporar.keys():
                                 temporar[cle] = UnNest(temporar[cle])
                             temporar = CleanPatent(Update(temporar, pat))      
                             temporar = CleanPatent(temporar)
                             ListeBrevetAug.append(temporar)
                             #temp.append(temporar)
                         else:
                             pat = CleanPatent(pat)
                             for cle in pat.keys():
                                 pat[cle] = UnNest(pat[cle])
                             ListeBrevetAug.append(CleanPatent(pat))
def ExtractPatent(pat, ResultContents, BiblioPatents):
    DejaLa = [bre['label'] for bre in BiblioPatents]
    for cle in ['inventeur', 'applicant', 'date', 'dateDate', 'titre']:
        if cle != 'date' and cle !='dateDate':
            if pat[cle] == None:
                pat[cle] = 'empty'
        else:
            if cle == 'date' and pat[cle] == None:
                import datetime
                pat[cle] = str(datetime.date.today().year) + '-' + str(datetime.date.today().month) + '-' + str(datetime.date.today().day)
            elif cle == 'dateDate' and pat[cle] == None:
                import datetime
                pat[cle] = datetime.date.today().year

    
    cles = [key for key in pat.keys() if pat[key]==None]
    for cle in cles:
        if cle=='date':
            pat[cle] = unicode(datetime.date.today().year)
        elif cle=="dateDate":
            pat[cle] = datetime.date.today()
        else:
            bre[cle] = u'empty'

    if None not in pat.values():        
#if Brev['label'] == Brev["prior"]: # just using primary patents not all the family
        if isinstance(pat['classification'], list):
            for classif in pat['classification']:
                tempo2 = ExtractClassificationSimple2(classif)
                for cle in tempo2.keys():
                    if cle in pat.keys() and tempo2[cle] not in pat[cle]:
                        if pat[cle] == '':
                            pat[cle] = []
                        if isinstance(tempo2[cle], list):
                            pat[cle].extend(tempo2[cle])
                        else:
                            pat[cle].append(tempo2[cle])
                    else:
                        pat[cle] = []
                        if isinstance(tempo2[cle], list):
                            pat[cle].extend(tempo2[cle])
                        else:
                            pat[cle].append(tempo2[cle])
                    if pat[cle].count(',')>0:
                        print pat[cle] #hum, strage state
        else:
            tempo2 = ExtractClassificationSimple2(pat['classification'])
            for cle in tempo2.keys():
                if cle in pat.keys() and tempo2[cle] not in pat[cle]:
                    if pat[cle] == '':
                        pat[cle] = []
                    if isinstance(tempo2[cle], list):
                        pat[cle].extend(tempo2[cle])
                    else:
                        pat[cle].append(tempo2[cle])
                else:
                    pat[cle] = []
                    if isinstance(tempo2[cle], list):
                        pat[cle].extend(tempo2[cle])
                    else:
                        pat[cle].append(tempo2[cle])
                if pat[cle].count(',')>0:
                    print pat[cle] #hum, strage state

            
                    #                print classif
        pat = SeparateCountryField(pat)
        for clekey in pat.keys():
            if isinstance(pat[clekey], list):
                pat[clekey] = UnNest(pat[clekey])
        if isinstance(pat['IPCR1'], list):
            CIB1 = '-'.join(dat for dat in pat['IPCR1'])
        else:
            CIB1 =  pat['IPCR1']
            
        if isinstance(pat['IPCR3'], list):
            CIB3 = '-'.join(dat for dat in pat['IPCR3'])
        else:
            CIB3 =  pat['IPCR3']
        if isinstance(pat['IPCR4'], list):
            CIB4 = '-'.join(dat for dat in pat['IPCR4'])
        else:
            CIB4 =  pat['IPCR4']
        IRAM = '**** *Label_' + ndb +' *Country_'+pat['pays']+ ' *CIB3_'+CIB3 + ' *CIB1_'+CIB1 + ' *CIB4_'+CIB4 + ' *Date_' + str(pat['dateDate'].year) + ' *Applicant_'+UniClean('-'.join(coupeEnMots(pat['applicant'])))[0:12]
        IRAM = IRAM.replace('_ ', '_empty', IRAM.count('_ ')) +'\n'
        TXT=dict()
        if isinstance(patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'], list):
            for tempo in patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document']:
                if tempo.has_key('abstract'):
                    txtTemp = ExtractAbstract(tempo['abstract'])
                    for cleLang in txtTemp:
                        if TXT.has_key(cleLang):
                            TXT[cleLang] += txtTemp[cleLang]
                        else:
                            TXT[cleLang] = txtTemp[cleLang]
            
        else:
            if patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'].has_key('abstract'):
                TXT = ExtractAbstract(patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'][u'abstract'])
                for lang in TXT.keys():                            
                    EcritContenu(IRAM + TXT[lang], ResultAbstractPath+'//'+lang+'-'+ndb+'.txt')   
        if pat['label'] in DejaLa: #checking multiples status
                tempor = [patent for patent in BiblioPatents if patent['label'] == pat["label"]][0] #should be unique
                BiblioPatents.remove(tempor)
                tempor = Update(tempor, pat)
                for key in tempor.keys():
                    if isinstance(tempor[key], list):
                        tempor[key] = UnNest(tempor[key])
                tempor = CleanPatent(tempor)
                BiblioPatents.append(CleanPatent(tempor))
                
        else:
            for key in pat.keys():
                if isinstance(pat[key], list):
                    pat[key] =  UnNest(pat[key])
            pat = CleanPatent(pat)
            BiblioPatents.append(CleanPatent(pat))
            DejaLa.append(pat['label'])
        return pat, YetGathered, BiblioPatents
    else:#None values avoiding this patent
        if pat.has_key('label'):
            DejaLa.append(pat['label'])
        return None, DejaLa, BiblioPatents
    if 'abstracts' not in os.listdir(RepDir):
        os.makedirs(RepDir+"//Abstracts")
except:
    pass
#os.chdir(ndf.replace('.dump', ''))
desc, clm, ft, abstract = 0,0,0, 0
Langues = set()
if GatherContent:

    for brevet in lstBrevet:
        #tempo =('publication', Docdb(,, ))
        #if brevet['label'] == 'FR2997041':
#        tempo =('publication', Docdb(brevet[u'publication-ref'][u'document-id'][0][u'doc-number']['$'],brevet['publication-ref'][u'document-id'][0][u'country']['$'], brevet['publication-ref'][u'document-id'][0][u'kind']['$']))
#        tempo2 =('publication', Epodoc(brevet['publication-ref'][u'document-id'][0][u'country']['$']+brevet[u'publication-ref'][u'document-id'][0][u'doc-number']['$']))#, brevet[u'document-id'][u'kind']['$']))
        #tempoDocDb = 
        brevet = CleanPatent(brevet)
        brevet = CleanPatent(brevet)
        ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
#check for already gathered patents        
        lstfic = os.listdir(ResultPathContent+'//Abstracts/')
        fichier = [fics[3:] for fics in lstfic]      
        if ndb+'.txt' not in fichier:
            for content in [u'abstract']:#claims', u'description']: #, u'fulltext'              
                try :
                    
                    tmp = Epodoc(ndb)
                    
                    tempo = ('publication', tmp)
                    data = registered_client.published_data(*tempo, endpoint = content)             #registered_client.published_data()
                    if 'abstract' not in str(data.json()):
                        tmp = Docdb(ndb[2:], ndb[0:2],brevet['status'])
Exemplo n.º 5
0
        comptBad += 1
print comptBad, " identified problems."
if comptBad == 0:
    print "This is good!"

#we filter data for exporting most significant values
LstExp = []
LstExp2 = []
for brev in LstBrevet:
    if brev['label'] == 'WO2007000665':
        print
    #cleaningbre[c]
    for cle in brev.keys():
        brev[cle] = UnNest(brev[cle])

    brev = CleanPatent(brev)
    brev = CleanPatentOthers2(brev)

    ##

    tempo = brev  # this one for DataTable
    tempo2 = dict()  #the one for pitable
    PaysInv = []  #new field
    PaysApp = []
    #tempo = CleanPatent(tempo)
    tempo2 = copy.deepcopy(tempo)  #ugly
    tempo3 = dict()  #what the problem
    LstExp.append(tempo)
    for ket in brev.keys():
        tempo3[ket] = tempo[ket]
        if isinstance(tempo2[ket], list):
        comptBad +=1
print comptBad, " identified problems."
if comptBad == 0:
    print "This is good!"

#we filter data for exporting most significant values
LstExp = [] 
LstExp2 = [] 
for brev in LstBrevet:
    if brev['label'] == 'WO2007000665':
        print
    #cleaningbre[c]
    for cle in brev.keys():
        brev[cle] = UnNest(brev[cle])

    brev= CleanPatent(brev)
    brev= CleanPatentOthers2(brev)
    
    ##
    
    tempo = brev # this one for DataTable
    tempo2 = dict() #the one for pitable
    PaysInv= [] #new field
    PaysApp = []
    #tempo = CleanPatent(tempo)
    tempo2 = copy.deepcopy(tempo) #ugly
    tempo3 = dict() #what the problem        
    LstExp.append(tempo)
    for ket in brev.keys():
        tempo3[ket] = tempo[ket]
        if isinstance(tempo2[ket], list):
Exemplo n.º 7
0
    
with open(ListPatentPath+'//'+ndf, 'r') as data:
    DataBrevet = pickle.load(data)

if isinstance(LstBrevet, dict):
    data = LstBrevet
    LstBrevet = data['brevets']    
    if data.has_key('requete'): 
        DataBrevet['requete'] = data["requete"]
    if data.has_key('number'):
        print "Found ", data["number"], " patents! Formating to HMTL tables"
    
LstExp = [] 
LstExp2 = [] 
for brev in LstBrevet:
    brev = CleanPatent(brev)
    
    
    tempo = dict() # this one for DataTable
    tempo2 = dict() #the one for pitable
    PaysInv= [] #new field
    PaysApp = []
#    tempo = CleanPatent(brev)
#    brevet= SeparateCountryField(tempo)
    #cleaning classification
    cles = [key for key in brev.keys() if brev[key]==None or brev[key] == [u'None', None] or brev[key] == [None]]
    for cle in cles:
        if cle=='date':
            brev[cle] = unicode(datetime.date.today().year)
        elif cle=="dateDate":
            brev[cle] = datetime.date.today()