示例#1
0
def adhoc_interface(options):
    """
    Patent2Net ad-hoc mode interface
    """

    # Read OPS credentials from designated file
    credentials = OPSCredentials(credentials_file='cles-epo.txt')
    key, secret = credentials.read()

    # Create Patent2Net instance
    patent2net = Patent2Net(key, secret)

    # Gather data
    results = patent2net.gather(options['expression'],
                                with_family=options['with-family'],
                                with_register=options['with-register'])

    # Display search results for given query expression in raw OPS format (JSON), e.g. run::
    # p2n adhoc search --expression='TA=lentille'
    if options['search']:
        payload = patent2net.response_data
        print(json.dumps(payload, cls=JsonObjectEncoder))

    # Display full results for given query expression in OpsExchangeDocument or Patent2NetBrevet format (JSON), e.g. run::
    # p2n adhoc dump --expression='TA=lentille'
    if options['dump']:
        if options['format'] == 'ops':
            payload = [result.as_dict() for result in results.documents]
        elif options['format'] == 'brevet':
            payload = results.brevets
        else:
            logger.error('Unknown format "{}" for dumping.'.format(
                options['format']))
            sys.exit(1)

        print(json.dumps(payload, cls=JsonObjectEncoder))

    # Display list of values from single field for given query expression, e.g. run::
    # p2n adhoc list --expression='TA=lentille'
    if options['list']:
        documents = results.documents
        output = []
        for document in documents:
            try:
                values = [operator.attrgetter(options['field'])(document)]
                output += values
            except AttributeError:
                pass
        print(json.dumps(output, indent=4))

    # Generate world map over given field, e.g. run::
    # p2n adhoc worldmap --expression='TA=lentille' --country-field='applicants'
    if options['worldmap']:
        mapdata = results.worldmap(options['country-field'])
        print(json.dumps(mapdata))

    # Generate data for PivotTable.js
    if options['pivot']:
        mapdata = results.pivot(options['format'])
        print(json.dumps(mapdata))
#from P2N_Lib import EcritContenu, coupeEnMots
from Patent2Net.P2N_Config import LoadConfig
from p2n.config import OPSCredentials
import datetime
import epo_ops
import os
import sys
#from epo_ops.models import Docdb
#from epo_ops.models import Epodoc
os.environ['REQUESTS_CA_BUNDLE'] = 'cacert.pem'
global key
global secret

# put your credential from epo client in this file...
# chargement clés de client
c = OPSCredentials(credentials_file='../cles-epo.txt')
key, secret = c.read()

DureeBrevet = 20
SchemeVersion = '20140101'  # for the url to the classification scheme
import os

ListeBrevet = []  # LA iste de brevets
# ouverture fichier de travail

ficOk = False
cptNotFound = 0
nbTrouves = 0

lstBrevets = []  # The patent List
BiblioPatents = []  # The bibliographic data
示例#3
0
def classic_interface(options):
    """
    Patent2Net classic interface
    """
    # running flask server
    #run_script('app.py', configfile=None, directory='.')

    # Convenience: Write OPS API credentials to file "cles-epo.txt"
    if options['ops'] and options['init']:
        if options['key'] and options['secret']:
            credentials = OPSCredentials()
            credentials.write(options['key'], options['secret'])
        sys.exit()

    # All tasks from here require a configuration file.
    configfile = options['config']
    if not configfile:
        configfile = os.environ.get('P2N_CONFIG')

    if not configfile:
        logger.error(
            'No configuration file given. Either use --config commandline argument or P2N_CONFIG environment variable.'
        )
        sys.exit(1)

    # Patent2Net classic steps, aggregated

    if options['acquire'] or options['run']:
        #print(configfile)
        run_script('OPSGatherPatentsv2.py', configfile)
        run_script('PatentListFiltering.py', configfile)
        if options['with-family']:
            run_script('OPSGatherAugment-Families.py', configfile)

        # the following will fail if Families aren't gathered yet....
        run_script('preProcessNormalisationNames.py', configfile)

    # normalisation features New 08/2020
    if options['with-family'] or options['run']:
        run_script('PatentListFiltering.py', configfile)
        run_script('OPSGatherAugment-Families.py', configfile)

    if options['normalisation'] or options['run']:
        run_script('PatentListFiltering.py', configfile)
        run_script('preProcessNormalisationNames.py', configfile)

    if options['maps'] or options['run']:
        run_script('FormateExportCountryCartography.py', configfile)
        run_script('FormateExportAttractivityCartography.py', configfile)

    if options['bibfile'] or options['run']:
        run_script('FormateExportBiblio.py', configfile)

    if options['tables'] or options['run']:
        run_script('FormateExportDataTableFamilies.py', configfile)
        AnnonceProgres(Appli='p2n_tables', valMax=100, valActu=50)  # approx
        run_script('FormateExportDataTable.py', configfile)
        AnnonceProgres(Appli='p2n_tables', valMax=100, valActu=75)  # approx
        run_script('FormateExportPivotTable.py', configfile)
        AnnonceProgres(Appli='p2n_tables', valMax=100, valActu=100)  # approx

    if options['networks'] or options['run']:
        run_script('P2N-Nets-new.py', configfile)
        #             run_script('P2N-NetworksBis.py {network}'.format(network=network), configfile)
        networks = [
            "CountryCrossTech",
            "CrossTech",
            "InventorsCrossTech",
            "Applicants_CrossTech",
            "Inventors",
            "ApplicantInventor",
            "Applicants",
            "References",
            "Citations",
            "Equivalents",
        ]
        compt = 0
        for network in networks:
            compt += 1
            #             run_script('P2N-PreNetworks.py {network}'.format(network=network), configfile)
            run_script('P2N-NetworksBis.py {network}'.format(network=network),
                       configfile)
            AnnonceProgres(Appli='p2n_network',
                           valMax=100,
                           valActu=compt * 100 /
                           len(networks))  # valMax is unused in front end
            # run_script('P2N-NetworksJS.py {network}'.format(network=network), configfile)
    #adding the complete net a buggy very very slow process

    # run_script('P2N-NetworksFull.py', configfile)
    if options['freeplane'] or options['run']:
        run_script('P2N-FreePlane.py', configfile)
    # AnnonceProgres (Appli = 'p2n_network', valMax = len(networks), valActu = compt*100/len(networks)) # valMax is unused in front end

    #Gather more textual data if it exsts (Claims, Descriptions) an format them
    #for Iramuteq
    if options['iramuteq'] or options['run']:
        if options['with-family']:
            run_script('OPSGatherAugment-Families.py', configfile)
        run_script('OPSGatherContentsV2-Iramuteq.py', configfile)
        run_script('FusionIramuteq2.py', configfile)

    #format xml files for Carrot2
    if options['carrot'] or options['run']:
        if options['with-family']:
            run_script('OPSGatherAugment-Families.py', configfile)
        run_script('FusionCarrot2.py', configfile)
        run_script('P2N-Indexer.py', configfile)

    #Gather images
    if options['images'] or options['run']:
        run_script('OPSGatherContentsV2-Images.py', configfile)
        run_script('FusionImages.py', configfile)

    #Cluster processing
    if options['cluster'] or options['run']:
        if options['with-family']:
            run_script('OPSGatherAugment-Families.py', configfile)
        run_script('IPC-WS-metrics.py', configfile)
        run_script('ClusterPreProcess.py', configfile)
        run_script('P2N-Cluster.py', configfile)
    #Present data through HTML
    if options['interface'] or options['run']:
        run_script('Interface2.py', configfile)
示例#4
0
def autom_request_spliter_time(RequestOrig, directory):

    today = datetime.datetime.today()

    jourOk, moisOk, ipcOk = False, False, False
    Request = RequestOrig + ' AND PD=date'

    DataDir = directory + '/segments/' + directory
    targetDirectory = '../RequestsAuto/' + directory

    fic = open("REQUESTS/requestModel.cql", 'r')  #requestModel.cql
    DataReq = targetDirectory
    data = fic.read()
    fic.close()

    if 'RequestsAuto' not in os.listdir('..') or directory not in os.listdir(
            '../RequestsAuto'):
        os.makedirs(targetDirectory)
        lstFicOk = []
    else:
        lstFicOk = os.listdir(targetDirectory)

    Months = {
        'jan': 31,
        'fev': 29,
        'mar': 31,
        'apr': 30,
        'may': 31,
        'jun': 30,
        'jui': 31,
        'aug': 31,
        'sep': 30,
        'octo': 31,
        'nov': 30,
        'dec': 31
    }
    IPC = {"A", "B", "C", "D", "E", "F", "G", "H"}

    os.environ['REQUESTS_CA_BUNDLE'] = 'cacert.pem'
    global key
    global secret

    # put your credential from epo client in this file...
    # chargement clés de client
    c = OPSCredentials(credentials_file='../cles-epo.txt')
    key, secret = c.read()

    def checkRequest(req):
        ops_client = epo_ops.Client(key, secret)
        #        data = ops_client.family('publication', , 'biblio')
        ops_client.accept_type = 'application/json'
        try:
            lstBrevets2, nbTrouves = PatentSearch(ops_client, req)
            return nbTrouves
        except:
            return 0

    Total = 0
    nbFiles = 0
    toBeFound = checkRequest(RequestOrig)

    if toBeFound > 2000:
        print(
            'wow ', toBeFound,
            ' (or more) patents to retreive... A good reason to use this script'
        )
        Need = True
    else:
        print("no need to split, gather directly your request '", RequestOrig,
              "' with p2n")
        Need = False
    dateDeb = int(
        input("please enter the stardate year for gathering you request"))

    if Need:
        for AN in range(dateDeb, today.year + 1, 1):

            Trouves = checkRequest(Request.replace('=date', '=' + str(AN)))
            if 2000 > Trouves > 0:
                Total += Trouves
                # a request for that year is ok
                monthOk = False
                ipcOk = False
                Request2 = Request.replace('=date', '=' + str(AN))
                data2 = data.replace("***requete***", Request2)
                data2 = data2.replace("***dataDir***", DataDir + str(AN))
                NameFic = str(AN) + 'Request.cql'
                with open(DataReq + "/" + NameFic, "w") as ficRes:  #+"-"+ipc
                    print(ficRes.name.split('/')[1])
                    if ficRes.name.split('/')[1] not in lstFicOk:
                        ficRes.write(data2)
                    nbFiles += 1
                    print(ficRes.name, 'file written, ', Trouves,
                          ' patents expected and ', Total, ' cumulative.')
            if Trouves == 0:
                monthOk = False
                ipcOk = False
                jourOk = False
                #nothing to do
            if Trouves >= 2000:
                # we have to split by monthes
                monthOk = True
                jourOk = False

                cpt = 0  #used as monthes

                for month in Months.keys():
                    cpt += 1
                    if len(
                            str(cpt)
                    ) < 2:  # monthes are numbered thanks to cpt (ugly isn't it ?)
                        mois = '0' + str(cpt)
                    else:
                        mois = str(cpt)
                    Request2 = Request.replace('=date', '=' + str(AN) + mois)
                    Trouves = checkRequest(Request2)
                    if 2000 > Trouves > 0:
                        Total += Trouves
                        # OK less than 2000 and more than 0 go ahead for that request
                        ipcOk = False
                        jourOk = False
                        data2 = data.replace("***requete***", Request2)
                        data2 = data2.replace("***dataDir***",
                                              DataDir + str(AN) + mois)
                        NameFic = str(AN) + mois + 'Request.cql'
                        if NameFic not in lstFicOk:
                            with open(DataReq + "/" + NameFic,
                                      "w") as ficRes:  #+"-"+ipc

                                ficRes.write(data2)
                            nbFiles += 1
                            print(ficRes.name, 'file written, ', Trouves,
                                  ' patents expected and ', Total,
                                  ' cumulative.')
                    if Trouves == 0:
                        ipcOk = False
                        jourOk = False
                        #nothing to do
                    if Trouves >= 2000:
                        monthOk = True
                        jourOk = True
                        ipcOk = False
                        # spliting days for that month
                        for day in range(1, Months[month] + 1):
                            if len(str(day)) < 2:
                                jour = '0' + str(day)
                            else:
                                jour = str(day)
                            Request2 = Request.replace(
                                '=date', '=' + str(AN) + mois + jour)
                            Trouves = checkRequest(Request2)
                            if 2000 > Trouves > 0:
                                Total += Trouves
                                # go ahead for that day
                                ipcOk = False
                                data2 = data.replace("***requete***", Request2)
                                data2 = data2.replace(
                                    "***dataDir***",
                                    DataDir + str(AN) + mois + jour)
                                NameFic = str(AN) + mois + jour + 'Request.cql'
                                if NameFic not in lstFicOk:
                                    with open(DataReq + "/" + NameFic,
                                              "w") as ficRes:  #+"-"+ipc
                                        ficRes.write(data2)
                                    nbFiles += 1
                                    print(ficRes.name, 'file written, ',
                                          Trouves, ' patents expected and ',
                                          Total, ' cumulative.')
                            if Trouves == 0:
                                ipcOk = False
                                jourOk = False
                                #nothing to do
                            if Trouves >= 2000:
                                monthOk = True
                                jourOk = True
                                # bad days for EPO... we need to split again
                                # last solution IPC splitting
                                # for that day only
                                for ipc in IPC:
                                    Request3 = Request2 + " AND IC=" + ipc
                                    Trouves = checkRequest(Request3)
                                    if Trouves > 2000:
                                        print(
                                            "thats bad... the request : " +
                                            Request3 +
                                            " should be splitted and the limits of this script are reached"
                                        )
                                        break
                                    Total += Trouves
                                    data2 = data.replace(
                                        "***requete***", Request3)
                                    data2 = data2.replace(
                                        "***dataDir***",
                                        DataDir + str(AN) + mois + jour + ipc)
                                    if NameFic not in lstFicOk:
                                        with open(
                                                DataReq + "/" + str(AN) +
                                                mois + '-' + jour + '-' + ipc +
                                                'Request.cql',
                                                "w") as ficRes:  #+"-"+ipc
                                            ficRes.write(data2)
                                        nbFiles += 1
                                        print(ficRes.name, 'file written, ',
                                              Trouves,
                                              ' patents expected and ', Total,
                                              ' cumulative.')

        print("request splitted in ", nbFiles, " files")

        print("Gathering with P2N all this request should lead to ", Total,
              " patents")