Exemplo n.º 1
0
        dateMini = datetime.date(3000,1,1)
        dateMaxi =  datetime.date(1000,1,1)
        NeededInfo .extend(mixNet)  # list of needed field for building the net
        G1 = nx.DiGraph()        # dynamic network for Gephi 
        attr_dict = dict()       # attributes for the net
        G2 = nx.DiGraph()        # flat net for gexf.js may be it is possible to use previous instead of this one...
        print network, ": loading data with ", " and ".join(mixNet), " fields."
        with open (BiblioPath+'//'+ndf, 'r') as fic:    
            DataBrevet = pickle.load(fic)
        
        for brev in DataBrevet["brevets"]:
                #tempo = pickle.load(fic) # we only memorize needed nfo
            pat = OrderedDict ()
            for key in NeededInfo:
                if isinstance(brev[key], list):
                    brev[key]= flatten(brev[key])
                    brev[key]= [cont for cont in brev[key] if (cont !='empty' or cont != 'Empty' or cont !='')]

                elif isinstance (brev[key], unicode) or isinstance (brev[key], str):
                    brev[key].replace('empty', '')
                pat[key] = brev[key]
                
            for flatPat in DecoupeOnTheFly(pat, []):
                if flatPat not in ListeBrevet:
                    ListeBrevet.append(flatPat)
            if pat['label'] not in Patents:
                Patents.add(pat['label'])     
        for lab in Patents:
            temp = []
            for bre in [brev for brev in ListeBrevet if brev['label']==lab]:
                for cat in mixNet:
Exemplo n.º 2
0
                    tempFiltered = []
                    LabList = [pat['label'] for pat in temp]

                    for pat in LabList:
                        tempoPat = [
                            patent for patent in temp if patent['label'] == pat
                        ]  # fusionning several patents wwith same label
                        # OPS model seems to save one entry for several status documents...
                        # in P2N model, label is unique key... so properties are lists.. this is the jobs of update function hereafter

                        tempoRar = dict()
                        for pate in tempoPat:
                            tempoRar = Update(pate, tempoRar)
                            for clef in tempoRar.keys():
                                if isinstance(tempoRar[clef], list):
                                    tempoRar[clef] = flatten(tempoRar[clef])
                                    tempo = []
                                    for contenu in tempoRar[clef]:
                                        if contenu is not None:
                                            tempo.append(contenu)
                                        else:
                                            if '' not in tempo and len(
                                                    tempo) == 0:
                                                tempo.append('')
                                    tempoRar[clef] = tempo
                                else:
                                    pass  #should be good here
                        if pat not in YetIn:
                            tempFiltered.append(dictCleaner(tempoRar))
                            YetIn.append(pat)
                        else:
Exemplo n.º 3
0
            print "please use Comptatibilizer"

        print "Nice, ", len(
            DataBrevet["brevets"]
        ), " patents found. Pre-formating ", sys.argv[1], " net."
        for brev in DataBrevet["brevets"]:
            #tempo = pickle.load(fic) # we only memorize needed nfo
            pat = OrderedDict()
            if "date" not in brev.keys():
                brev['date'] = '1-1-1'
            if isinstance(brev['label'], list):
                brev['label'] = brev['label'][0]
            for key in NeededInfo:

                if key.count('nice') > 0 and isinstance(brev[key], list):
                    brev[key] = flatten(brev[key])
                    machin = []
                    for truc in brev[key]:
                        Tt = Cleaning(truc)
                        if Tt is not None and len(Tt) > 0:
                            machin.append(Tt)
                    if len(machin) > 0:
                        pat[key] = machin  #can you do more ugly ?
                    else:
                        pat[key] = ''

                elif isinstance(brev[key], list):
                    pat[key] = flatten(brev[key])
                    if key.count('Date') == 0:
                        pat[key] = [
                            cont for cont in brev[key]
        dateMaxi = datetime.date(1000, 1, 1)
        NeededInfo.extend(mixNet)  # list of needed field for building the net
        G1 = nx.DiGraph()  # dynamic network for Gephi
        attr_dict = dict()  # attributes for the net
        G2 = nx.DiGraph(
        )  # flat net for gexf.js may be it is possible to use previous instead of this one...
        print network, ": loading data with ", " and ".join(mixNet), " fields."
        with open(BiblioPath + '//' + ndf, 'r') as fic:
            DataBrevet = pickle.load(fic)

        for brev in DataBrevet["brevets"]:
            #tempo = pickle.load(fic) # we only memorize needed nfo
            pat = OrderedDict()
            for key in NeededInfo:
                if isinstance(brev[key], list):
                    brev[key] = flatten(brev[key])
                    brev[key] = [
                        cont for cont in brev[key]
                        if (cont != 'empty' or cont != 'Empty' or cont != '')
                    ]

                elif isinstance(brev[key], unicode) or isinstance(
                        brev[key], str):
                    brev[key].replace('empty', '')
                pat[key] = brev[key]

            for flatPat in DecoupeOnTheFly(pat, []):
                if flatPat not in ListeBrevet:
                    ListeBrevet.append(flatPat)
            if pat['label'] not in Patents:
                Patents.add(pat['label'])