def Save(self): try: # SaveLoad.SaveByte(self.lemma, self.lemmaFilename) # SaveLoad.SaveByte(self.radiciList, self.radiciListFilename) # # SaveLoad.SaveByte(self.lemmaRadice, self.lemmaRadiceFilename) # SaveLoad.SaveByte(self.lemmaSuffisso, self.lemmaSuffissoFilename) # # SaveLoad.SaveByte(self.lemmaTipo, self.lemmaTipoFilename) # SaveLoad.SaveByte(self.lemmaTipoEsteso, self.lemmaTipoEstesoFilename) # # SaveLoad.SaveByte(self.lemmaTipoListLemma, self.lemmaTipolistLemmaFilename) # # SaveLoad.SaveByte(self.lemmaTipoListVocabolo, self.lemmaTipoListVocaboloFilename) # # SaveLoad.SaveByte(self.tipoList, self.tipoListFilename) # # # # #sicuro da tenere # SaveLoad.SaveByte(self.parole,self.paroleFilename) # # self.SalvaRadici() # self.SalvaAbrev() print SaveLoad.SaveByte(self.suffissiTag, self.suffissiTagFilename) return True except: return False
def __Save(self): print SaveLoad.SaveByte(self.rankRes, "self.rankRes") for i in self.rankRes.iteritems(): for v in i: v = v + u"\n" SaveLoad.SaveLinesA(v, 'tutti i dialoghi k.txt') return #old for k in self.rankRes.keys(): i = 0 try: for v in self.rankRes[k]: try: dati = k + self.__SEPATATORE + v k = self.pulisci.PulisciFilename(k) #per evitar problemi con la lunghezza dei file if len(k) > 90: k = k[:90] filename = self.folderDialoghi + k + str( i) + self.extFile print "file: ", filename, "Saved:", SaveLoad.SaveLines( dati, filename) i += 1 except: pass except: pass
def allPosTag(self): i = float(1) tot = len(glob.glob(self.folderDialoghiClean + '*')) ifile = len(glob.glob(self.folderDialoghiPosTag + '*')) for fil in glob.glob(self.folderDialoghiClean + u'*'): try: dlg = [] for line in SaveLoad.LoadLines(fil): if len(line.strip()) > 0: line = unicode(line) line = line.lower() l = self.pos.PosTag(line) dlg.append(l) #salvo il file postaggato e senza \n inutili filename = str(ifile) + u'.txt' filename = self.folderDialoghiPosTag + filename #salvo il file dei dialoghi if SaveLoad.SaveByte(dlg, filename): ifile += 1 except: #potrebbero esserci errori di qualsiasi natura nei file, in questo caso ignoro il file e passo al successivo pass print "elaborazione file ", fil[len(self.folderDialoghiClean ):], " - ", i, " / ", tot i += 1
def __init__(self): self.__folder = "risorse\\Dati\\" self.__folderdialoghi = self.__folder + "\\dialoghi\\dialoghiRaw\\" self.SentiAnaliz = ParseCreaDatiSentimentAnalysis.ParseCreaDatiSentimentAnalysis( ) self.__SentimTrainsetFilename = self.__folder + "SentiTrainSet.list" dati = self.AvviaCreazioneDati() print SaveLoad.SaveByte(dati, self.__SentimTrainsetFilename)
def __Save(self): """ Save input: None hidden: salva il tokenize output: True se il processo termina correttamente """ try: SaveLoad.SaveByte(self.__sentsTokenizer, self.__sentsTokenFileName) return True except: return False
def __Save(self, dlgname): print "file Saved:", SaveLoad.SaveByte( self.data, self.folder + dlgname + self.extdlgData) print "file Saved:", SaveLoad.SaveByte( self.vocab, self.folder + dlgname + self.extdlgVoc)
def __Save(self): print SaveLoad.SaveByte(self.wordsFreq, self.wordsFreqFilename) print SaveLoad.SaveByte(self.stopWords,self.stopWordsFilename)
def Save(self): return SaveLoad.SaveByte(self.classificatoreSenti, self.classificatoreSentiFilename)
def __Save(self): SaveLoad.SaveByte(self.__unigram_tagger, self.__unigram_taggerFileName) SaveLoad.SaveByte(self.__bigram_tagger, self.__bigram_taggerFileName) return True
def __init__(self, dlgname): self.extdlg = ".txt" self.extdlgData = ".dat" self.extdlgVoc = ".voc" self.extdictWordIndex = '.dictWordIndex' self.extdictWordScoreRow = '.dictWordsScoreRows' self.folder = "risorse\\Dati\\dialoghi\\" dlg = SaveLoad.LoadLines(self.folder + "\\dialoghiRaw\\" + dlgname + self.extdlg) #dlgfilename="C:\\Esame TTR\\risorse\\Dati\\dialoghi\\dialoghiRaw\\Black Mirror dvdrip.txt" dlgdata = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgData) vocab = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgVoc) try: dlgdata.mean() except: print "file .dat mancante\ncreazione in corso..." #creo il file con tdidft cosine ecc... tfidfDlgCreator.tfidfDataCreator(dlgname) #ricarico i dati dlgdata = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgData) vocab = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgVoc) dictIndexWord = dict() for i in xrange(len(vocab)): dictIndexWord[i] = vocab[i] #dictPosWordsReverse self.dictWordIndex = dict() #[word]=index in dictIndexWord for k, v in dictIndexWord.iteritems(): self.dictWordIndex[v] = k self.dictWordScoreRow = collections.defaultdict(list) #[word][score in row][Row] -> per max # print dlgdata.shape[0] # print dlgdata.shape[1] for row in xrange(dlgdata.shape[0]): for col in xrange(dlgdata.shape[1]): indice = tuple([row, col]) prob = dlgdata[indice] if prob != float(0): # print row, col, prob scoreRow = dict() word = dictIndexWord[col] count = dlg[row].split() count = count.count(word) count = count * prob #dlgdata[row][col] scoreRow[count] = row self.dictWordScoreRow[word].append(scoreRow) del dlgdata print "file Saved:", SaveLoad.SaveByte( self.dictWordScoreRow, self.folder + dlgname + self.extdictWordScoreRow) print "file Saved:", SaveLoad.SaveByte( self.dictWordIndex, self.folder + dlgname + self.extdictWordIndex)
def __Save(self): grammars = self.__defaultgrammar() grammars.extend(self.__vprules) grammars.extend(self.__nprules) print 'saved:', SaveLoad.SaveByte(grammars, self.__grammarsFilename)
import time from sklearn.feature_extraction.text import TfidfVectorizer filename = "C:\\Esame TTR\\risorse\\Dati\\dialoghi\\dialoghiRaw\\Burn_Notice.txt" dlg = SaveLoad.LoadLines(filename) #analiz=AnalizzatoreSents.AnalizzatoreSents() print time.asctime() print 'creo copia in analiz' #copia per analiz dlgcopy = dlg #analiz.Analisi(dlg) print time.asctime() tfidf = TfidfVectorizer() print time.asctime() print 'creo tfidf' tfidfdata = tfidf.fit_transform(dlgcopy) vocab = tfidf.get_feature_names() print type(vocab) print SaveLoad.SaveByte( tfidfdata, "C:\\Esame TTR\\risorse\\Dati\\dialoghi\\Burn_Notice.dat") print SaveLoad.SaveByte( vocab, "C:\\Esame TTR\\risorse\\Dati\\dialoghi\\Burn_Notice.voc") # voc=SaveLoad.LoadByte("C:\\Esame TTR\\risorse\\Dati\\dialoghi\\whip it.voc") print time.asctime() print 'done'