def get_register(number): """ Get EP Register data for a particular EP publication no. (e.g. EP3065066) """ # Add here to first check cached data? - do this externally to function? try: register_search = registered_client.register("publication", Epodoc(number)) return register_search.json() except: return None
def family(self, document_number): """ Request family information for single document with number in epodoc format. """ logger.info('Requesting family information for document "{}"'.format( document_number)) response = self.client.family('publication', Epodoc(document_number), 'biblio') data = response.json() return data
def get_images_meta(ops_client, patent_label, path_json): # Try to retrieve JSON meta info from local, otherwise get online from OPS try: return json.load(file(path_json)) except: try: ans = ops_client.published_data(reference_type='publication', input=Epodoc(patent_label), endpoint='images') file(path_json, 'w').write(ans.content) return ans.json() except Exception as err: print "...Image meta for {} error".format(patent_label), err if hasattr(err, 'response') and err.response.status_code == 404: file(path_json, 'w').write('{}') return None
def register(self, document_number): """ Request register information for single document with number in epodoc format. """ logger.info('Requesting register information for document "{}"'.format( document_number)) try: response = self.client.register('publication', Epodoc(document_number)) except requests.HTTPError as ex: response = ex.response try: data = response.json() except ValueError: data = None return data
pays = pays[0] for content in [typeSrc+'Abstract', typeSrc+'Claims',typeSrc+'Description']: if content not in Nombre.keys(): Nombre [content] = 0 try: lstfic = os.listdir(ResultPathContent+'//' + content) except: lstfic = [] endP= content.replace(typeSrc, "").lower() if endP == 'abstract': endP = 'biblio' fichier = [fics[3:] for fics in lstfic] # content already gathered if ndb+'.txt' not in fichier: #hack here as chinese patents seems not be in claims or description endpoint #, u'fulltext' temp =('publication', Epodoc(pays+ndb[2:])) #, brevet[u'document-id'][u'kind']['$'])) try: data = registered_client.published_data(*temp, endpoint = endP) #registered_client.published_data() if data.ok and content.replace(typeSrc, "").lower() in str(data.json()): CheckDocDB = False else: CheckDocDB = True except: CheckDocDB = True if CheckDocDB: if isinstance(brevet[u'kind'], list): tempoData = [] for cc in brevet[u'kind']: temp =('publication', Docdb(ndb[2:],pays, cc)) # hope all comes from same country try: tempoData.append(registered_client.published_data(*temp, endpoint = endP))
pass #os.chdir(ndf.replace('.dump', '')) desc, clm, ft = 0,0,0 if GatherContent: for brevet in lstBrevet: #tempo =('publication', Docdb(,, )) #if brevet['label'] == 'FR2997041': ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$']) #check for already gathered patents lstfic =[] #alreadycollected for content in [u'claims', u'description']: lstfic += os.listdir(ResultPathContent+'//'+content+'//') fichier = [fics[3:] for fics in lstfic] if fichier.count(ndb+'.txt') < 2: #one or both files claim or desc are missing tmp = Epodoc(ndb) tempo2 = ('publication', tmp) tmp = Docdb(ndb[2:], ndb[0:2],brevet['status']) tempo = ('publication', tmp) ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$']) if True: #avoid check of chinese patents since they aren't descibed in english for content in [u'claims', u'description']: #, u'fulltext' if content not in os.listdir(RepDir): os.makedirs(RepDir +'//' +content) # optional, list of constituents try : data = registered_client.published_data(*tempo, endpoint = content) #registered_client.published_data()
def test_epodoc_as_api_input(): params = ["US08/921,321", "B2", "20140122"] assert Epodoc(*params).as_api_input() == "(US08/921%2C321).(B2).(20140122)" params = ["US08/921,321", "", "20140122"] assert Epodoc(*params).as_api_input() == "(US08/921%2C321).(20140122)"
def test_epodoc_required(): with raises(MissingRequiredValue): Epodoc("")
]: if content not in Nombre.keys(): Nombre[content] = 0 try: lstfic = os.listdir(ResultPathContent + '//' + content) except: lstfic = [] endP = content.replace(typeSrc, "").lower() if endP == 'abstract': endP = 'biblio' fichier = [fics[3:] for fics in lstfic] # content already gathered if ndb + '.txt' not in fichier: #hack here as chinese patents seems not be in claims or description endpoint #, u'fulltext' temp = ('publication', Epodoc(pays + ndb[2:]) ) #, brevet[u'document-id'][u'kind']['$'])) try: data = ops_client.published_data( *temp, endpoint=endP) #ops_client.published_data() if data.ok and content.replace( typeSrc, "").lower() in str(data.json()): CheckDocDB = False else: CheckDocDB = True except Exception as err: CheckDocDB = True if CheckDocDB: if isinstance(brevet[u'kind'], list): tempoData = []
def OPSChercheAbstractBrevet(pat, DirStockage): import epo_ops from epo_ops.models import Docdb from epo_ops.models import Epodoc fic = open('../cles-epo.txt', 'r') key, secret = fic.read().split(',') key, secret = key.strip(), secret.strip() fic.close() ops_client = epo_ops.Client(key, secret) ops_client.accept_type = 'application/json' ndb = pat[ 'label'] #[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$']) Abstracts = dict() if isinstance(ndb, list): ndb = ndb[0] #print("Retrieving ", ndb) pays = pat['country'] for key in ['label', 'country', 'kind']: if isinstance(pat[key], list): pat[key] = list( set(pat[key]) ) # hum some problem (again) in cleaning data within the family gatherer... 22/12/15 if isinstance(pays, list): pays = pays[0] content = 'Abstract' endP = 'biblio' # temp = ('publication', Epodoc(pays + ndb[2:]) ) #, brevet[u'document-id'][u'kind']['$'])) try: data = ops_client.published_data( *temp, endpoint=endP) #ops_client.published_data() if data.ok and 'abstract' in str(data.json()): CheckDocDB = False else: CheckDocDB = True except Exception as err: CheckDocDB = True if CheckDocDB: if isinstance(pat['kind'], list): tempoData = [] for cc in pat['kind']: temp = ('publication', Docdb(ndb[2:], pays, cc) ) # hope all comes from same country try: tempoData.append( ops_client.published_data(*temp, endpoint=endP)) except: data = None pass for dat in tempoData: if dat is not None and dat.ok: contenu = content patentCont = dat.json() Abstracts = MakeIram4(pat, patentCont, contenu) # Make2Iram2 devrait formater le brevet dans un fichier txt au format Iramuteq dans le bon repertoire # Lang est un truc :-) (je crois que cela renvoit la langue de l'abstract récupéré)) else: temp = ('publication', Docdb(pat['label'][2:], pat['country'], pat['kind'])) if data is not None and data.ok: contenu = content patentCont = data.json() Abstracts = MakeIram4(pat, patentCont, contenu) # if ops:world-patent-data exchange-documents exchange-documents abstract return Abstracts
import re import requests from epo_ops.models import Docdb from epo_ops.models import Epodoc from epo_ops.models import Original data = ('publication', Docdb('1000000', 'EP', 'A1')) rdata = ('publication', Epodoc('EP1000000')) def find_range(document, pattern): return re.search("range.*{0}".format(pattern), document) def assert_request_success(response): assert response.status_code == requests.codes.ok assert response.headers['X-API'] == 'ops-v3.1' def assert_family_success(client): response = client.family(*data) assert_request_success(response) assert 'patent-family' in response.text return response def issue_published_data_request(client): return client.published_data(*data)
if GatherBibli and GatherBiblio: registered_client = epo_ops.RegisteredClient(key, secret) # data = registered_client.family('publication', , 'biblio') registered_client.accept_type = 'application/json' for brevet in lstBrevets: YetGathered = [u['label'] for u in BiblioPatents] # may be current patent has already be gathered in a previous attempt # should add a condition here to check in os.listdir() tempo =('publication', Docdb(brevet[u'document-id'][u'doc-number']['$'],brevet[u'document-id'][u'country']['$'], brevet[u'document-id'][u'kind']['$'])) tempo2 =('publication', Epodoc(brevet[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']))#, brevet[u'document-id'][u'kind']['$'])) ndb =brevet[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$'] #nameOfPatent if ndb not in YetGathered: try: #trying Epodoc first, unused due to response format (multi document instead of one only) data = registered_client.published_data(*tempo2, endpoint = 'biblio') patentBib = data.json() data2 = registered_client.published_data(*tempo, endpoint = 'biblio') if data.ok and data2.ok: patentBibtemp = data.json() patentBibtemp2= data2.json() if len(str(patentBibtemp)) > len(str(patentBibtemp2)): patentBib = patentBibtemp else: patentBib = patentBibtemp2 except:
def test_epodoc_as_api_input(): params = ['US08/921,321', 'B2', '20140122'] assert Epodoc(*params).as_api_input() == '(US08/921%2C321).(B2).(20140122)' params = ['US08/921,321', '', '20140122'] assert Epodoc(*params).as_api_input() == '(US08/921%2C321).(20140122)'
def assert_bulk_service_retrival_success(client): input_list = [Docdb("1000000", "EP", "A1"), Epodoc("US2018265402")] response = client.published_data("publication", input=input_list) assert response.status_code == requests.codes.ok
import re import requests from epo_ops.models import Docdb, Epodoc, Original data = ("publication", Docdb("1000000", "EP", "A1")) rdata = ("publication", Epodoc("EP1000000")) idata = ("published-data/images/EP/1000000/A1/fullimage", 1) # idata path is the result @path from images published-data json request def find_range(document, pattern): return re.search("range.*{0}".format(pattern), document) def assert_request_success(response): assert response.status_code == requests.codes.ok assert response.headers["X-API"] == "ops-v3.2" def assert_family_success(client): response = client.family(*data) assert_request_success(response) assert "patent-family" in response.text return response def assert_family_biblio_success(client): response = client.family(*data, constituents=["biblio"]) assert_request_success(response)