def build_iceberg_lattice(filename, lattice, threshold): irreducable = [] for i, (intent, extent) in enumerate(lattice): coverage = list(intent) if (len(intent) < threshold): continue is_irreducable = True for j, (intent1, extent1) in enumerate(lattice): if (j == i or len(intent1) < threshold or len(intent) <= len(intent1)): continue is_subset = True for obj in intent1: if (not (obj in intent)): is_subset = False break if is_subset: for obj in intent1: if obj in coverage: coverage.remove(obj) if (len(coverage) == 0): is_irreducable = False break if is_irreducable: irreducable.append((intent, extent)) #print intent, extent #print '\n' df = Definition() for intent, extent in irreducable: obj_name = ';'.join(intent) df.add_object(obj_name, list(extent)) conc = Context(*df) conc.tofile(filename='iceberg.' + filename, frmat='csv')
print res return res if __name__ == '__main__': animaux = ["Bat","Eagle","Monkey","Parrot fish","Penguin","Shark","Lantern fish"] proprietes = ["breathes in water","can fly","has beak","has hands","has skeleton","has wings","lives in water","is viviparous","produces light"] matrix = [ (False, True, False, False, True, True, False, True, False), # Bat (False, True, True, False, True, True, False, False, False), # Eagle (False, False, False, True, True, False, False, True, False), # Monkey (True, False, True, False, True, False, True, False, False), # Parrot Fish (False, False, True, False, True, True, True, False, False), # Penguin (True, False, False, False, True, False, True, False, False), # Shark (True, False, False, False, True, False, True, False, True)] # Lantern Fish exportContext(animaux,proprietes,matrix) c = Context(animaux, proprietes, matrix) # doctest: +ELLIPSIS '''clients = ['Anne','Basile','Carole'] articles = ['fromage','vin','lait','lessive'] matrix = [ (True, False, True, False), #A (True, True, False, True), #B (True,False,True,True)] #C c = Context(clients, articles, matrix)''' #print c #c.lattice.graphviz(view=True) #for intent, extent in c.lattice: #print intent, extent c.tofile('animaux.txt',frmat='cxt',encoding='utf-8') writeConcepts(c.lattice)
def buildLattice(pattern = True, inputFiles = "dec", inputAttributes = "arts"): if pattern == True: name = "WithPS" else: name = "WithoutPS" print inputFiles, inputAttributes, name #Le contexte a construire matrixAttribute = [] # listFiles = [] #Liste des fichiers lus pour construire le contexte if(inputFiles == "dec"): listAllFiles = fg.getAllDecisions() elif(inputFiles == "avis"): listAllFiles = fg.getAllAvis() elif(inputFiles == "all"): listAllFiles = fg.getAllFiles() else: print "choix non reconnu. Choix possibles : 'dec' 'avis' 'all'" listAllFiles = fg.getAllDecisions() #Nombre de fichiers lus lengthAllFiles = len(listAllFiles) #L'ensemble des attributs du contexte setOfAttributes = set() #L'ensemble des attributs modifiés du contexte setFormated = set() #L'expression régulière des attributs possibles des différents textes if (inputAttributes == "arts"): expre = expreAttribute() elif(inputAttributes == "artsdocs"): expre = expreAttribute()+'|'+regex.exprReguliereDecision() else: print "choix non reconnu. Choix possibles : 'arts' 'docs' 'artsdocs'" expre = expreAttribute() #Compteur de fichiers lus i = 0 #Lecture des fichiers pour lister les attributs for dfile in listAllFiles: f = open(dfile,'r') #Enlever les sauts de lignes dûs au copier/coller du pdf data = ' '.join([line.rstrip() for line in f]) #Pour chaque expression trouvée dans le texte for m in re.finditer(expre, data): #Expression réguliere attributFormated = m.group(0) #Lissage de l'expression : #Enlever les accents attributFormated = regex.removeAccent(attributFormated) #Corriger les erreurs potentielles attributFormated = correctSyntaxe(attributFormated) attributFormated = regex.supprNumero(attributFormated) setOfAttributes.add(attributFormated) i = i + 0.5 if i%100==0: print str(int(i))+' fichiers lus sur '+str(lengthAllFiles) #Modification des attributs pour éviter les doublons setOfAttributes = list(setOfAttributes) for item in setOfAttributes: setFormated.add(regex.formatArticle(item)) if pattern == True: developAttributes = buildAttributes(setFormated) setFormated = list(developAttributes) else: setFormated = list(setFormated) #Nombre d'attributs dans le contexte lenset = len(setFormated) print str(lenset) writeAttributes(setFormated,name) #Construction du contexte for dfile in listAllFiles: f = open(dfile, 'r') data = ' '.join([line.rstrip() for line in f]) #Lister les documents pour la construction du contexte listFiles.append(regex.nomDocument(dfile)) #Construction d'une ligne du contexte nuplet = (False,)*lenset listuple = list(nuplet) #Pour chaque expression for m in re.finditer(expre, data): attributFormated = m.group(0) #Formater l'expression régulière attributFormated = regex.removeAccent(attributFormated) attributFormated = correctSyntaxe(attributFormated) attributFormated = regex.supprNumero(attributFormated) attributFormated = regex.formatArticle(attributFormated) #Si pattern, on découpe chaque attribut if pattern == True: listAtt = developAttribute(attributFormated) for item in listAtt: #Trouver l'indice de l'attribut index = setFormated.index(item) #Mettre à jour la valeur listuple[index] = True #Sinon on cherche juste les attributs else: index = setFormated.index(attributFormated) listuple[index] = True i = i + 0.5 if i%100==0: print str(int(i))+' fichiers lus sur '+str(lengthAllFiles) nuplet = tuple(listuple) #Ajoute le nouvel objet au contexte matrixAttribute.append(nuplet) print str(int(i))+' fichiers lus sur '+str(lengthAllFiles) #Sauvegarde les attributs dans un txt #sauvegarde le contexte dans un json exportContext(listFiles,setFormated,matrixAttribute,name) c = Context(listFiles,setFormated,matrixAttribute) print "construction de la lattice. Cela peut prendre quelques instants" c.lattice.graphviz(view=True) #Sauvegarde le contexte dans un txt writeConcepts(c.lattice,name) c.tofile('latticeEtContext/saveLatticeWithPS.txt',frmat='cxt',encoding='utf-8')
# Creating and save context for implication rules X_train_one_hot['Class'] = y_train X_train_Class_split = pd.concat([ X_train_one_hot, pd.get_dummies(X_train_one_hot['Class'], prefix='Class') ], axis=1) X_train_Class_split = X_train_Class_split.drop(["Class"], axis=1).drop_duplicates() objects = X_train_Class_split.index.values objects = [str(oi) for oi in objects] properties = X_train_Class_split.columns.values bools = list( X_train_Class_split.astype(bool).itertuples(index=False, name=None)) cxt = Context(objects, properties, bools) cxt.tofile('diabetes_context.cxt', frmat='cxt', encoding='utf-8') ## Create concepts lattices for each class c = {} l = {} no_of_classes = 2 X_train_one_hot['Class'] = y_train X_train_one_hot = X_train_one_hot.drop_duplicates() for i in range(0, no_of_classes): X_temp = X_train_one_hot.copy(deep=True) X_temp = X_temp[X_temp['Class'] == i].drop(["Class"], axis=1) objects = X_temp.index.values objects = [str(oi) for oi in objects] properties = X_temp.columns.values bools = list(X_temp.astype(bool).itertuples(index=False, name=None))