Пример #1
0
    def Save(self):
        try:
            #            SaveLoad.SaveByte(self.lemma, self.lemmaFilename)
            #            SaveLoad.SaveByte(self.radiciList, self.radiciListFilename)
            #
            #            SaveLoad.SaveByte(self.lemmaRadice, self.lemmaRadiceFilename)
            #            SaveLoad.SaveByte(self.lemmaSuffisso, self.lemmaSuffissoFilename)
            #
            #            SaveLoad.SaveByte(self.lemmaTipo, self.lemmaTipoFilename)
            #            SaveLoad.SaveByte(self.lemmaTipoEsteso, self.lemmaTipoEstesoFilename)
            #
            #            SaveLoad.SaveByte(self.lemmaTipoListLemma, self.lemmaTipolistLemmaFilename)
            #
            #            SaveLoad.SaveByte(self.lemmaTipoListVocabolo, self.lemmaTipoListVocaboloFilename)
            #
            #            SaveLoad.SaveByte(self.tipoList, self.tipoListFilename)
            #
            #
            #
            #            #sicuro da tenere
            #            SaveLoad.SaveByte(self.parole,self.paroleFilename)
            #
            #            self.SalvaRadici()
            #
            self.SalvaAbrev()
            print SaveLoad.SaveByte(self.suffissiTag, self.suffissiTagFilename)

            return True
        except:

            return False
Пример #2
0
    def __Save(self):
        print SaveLoad.SaveByte(self.rankRes, "self.rankRes")

        for i in self.rankRes.iteritems():
            for v in i:
                v = v + u"\n"
                SaveLoad.SaveLinesA(v, 'tutti i dialoghi k.txt')

        return

        #old
        for k in self.rankRes.keys():
            i = 0
            try:
                for v in self.rankRes[k]:
                    try:
                        dati = k + self.__SEPATATORE + v
                        k = self.pulisci.PulisciFilename(k)

                        #per evitar problemi con la lunghezza dei file
                        if len(k) > 90:
                            k = k[:90]
                        filename = self.folderDialoghi + k + str(
                            i) + self.extFile
                        print "file: ", filename, "Saved:", SaveLoad.SaveLines(
                            dati, filename)

                        i += 1
                    except:
                        pass
            except:
                pass
Пример #3
0
    def allPosTag(self):
        i = float(1)
        tot = len(glob.glob(self.folderDialoghiClean + '*'))
        ifile = len(glob.glob(self.folderDialoghiPosTag + '*'))

        for fil in glob.glob(self.folderDialoghiClean + u'*'):
            try:
                dlg = []
                for line in SaveLoad.LoadLines(fil):
                    if len(line.strip()) > 0:
                        line = unicode(line)
                        line = line.lower()
                        l = self.pos.PosTag(line)
                        dlg.append(l)

                #salvo il file postaggato e senza \n inutili
                filename = str(ifile) + u'.txt'
                filename = self.folderDialoghiPosTag + filename
                #salvo il file  dei dialoghi
                if SaveLoad.SaveByte(dlg, filename):
                    ifile += 1

            except:
                #potrebbero esserci errori di qualsiasi natura nei file, in questo caso ignoro il file e passo al successivo
                pass

            print "elaborazione file ", fil[len(self.folderDialoghiClean
                                                ):], " - ", i, " / ", tot
            i += 1
Пример #4
0
    def __init__(self):

        self.__folder = "risorse\\Dati\\"
        self.__folderdialoghi = self.__folder + "\\dialoghi\\dialoghiRaw\\"
        self.SentiAnaliz = ParseCreaDatiSentimentAnalysis.ParseCreaDatiSentimentAnalysis(
        )
        self.__SentimTrainsetFilename = self.__folder + "SentiTrainSet.list"

        dati = self.AvviaCreazioneDati()

        print SaveLoad.SaveByte(dati, self.__SentimTrainsetFilename)
Пример #5
0
    def __Save(self):
        """
            
            Save
            
            input: None
            hidden: salva il tokenize
            output: True se il processo termina correttamente
            
        """

        try:
            SaveLoad.SaveByte(self.__sentsTokenizer, self.__sentsTokenFileName)

            return True
        except:
            return False
Пример #6
0
 def __Save(self, dlgname):
     print "file Saved:", SaveLoad.SaveByte(
         self.data, self.folder + dlgname + self.extdlgData)
     print "file Saved:", SaveLoad.SaveByte(
         self.vocab, self.folder + dlgname + self.extdlgVoc)
 def __Save(self):
     
     print SaveLoad.SaveByte(self.wordsFreq, self.wordsFreqFilename)
     print SaveLoad.SaveByte(self.stopWords,self.stopWordsFilename)
Пример #8
0
 def Save(self):
     return SaveLoad.SaveByte(self.classificatoreSenti, self.classificatoreSentiFilename)
Пример #9
0
    def __Save(self):
        SaveLoad.SaveByte(self.__unigram_tagger, self.__unigram_taggerFileName)
        SaveLoad.SaveByte(self.__bigram_tagger, self.__bigram_taggerFileName)

        return True
Пример #10
0
    def __init__(self, dlgname):
        self.extdlg = ".txt"
        self.extdlgData = ".dat"
        self.extdlgVoc = ".voc"
        self.extdictWordIndex = '.dictWordIndex'
        self.extdictWordScoreRow = '.dictWordsScoreRows'

        self.folder = "risorse\\Dati\\dialoghi\\"

        dlg = SaveLoad.LoadLines(self.folder + "\\dialoghiRaw\\" + dlgname +
                                 self.extdlg)
        #dlgfilename="C:\\Esame TTR\\risorse\\Dati\\dialoghi\\dialoghiRaw\\Black Mirror  dvdrip.txt"

        dlgdata = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgData)
        vocab = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgVoc)

        try:
            dlgdata.mean()
        except:
            print "file .dat mancante\ncreazione in corso..."
            #creo il file con tdidft cosine ecc...
            tfidfDlgCreator.tfidfDataCreator(dlgname)

            #ricarico i dati
            dlgdata = SaveLoad.LoadByte(self.folder + dlgname +
                                        self.extdlgData)
            vocab = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgVoc)

        dictIndexWord = dict()
        for i in xrange(len(vocab)):
            dictIndexWord[i] = vocab[i]

        #dictPosWordsReverse
        self.dictWordIndex = dict()
        #[word]=index in dictIndexWord
        for k, v in dictIndexWord.iteritems():
            self.dictWordIndex[v] = k

        self.dictWordScoreRow = collections.defaultdict(list)
        #[word][score in row][Row] -> per max

        #        print dlgdata.shape[0]
        #        print dlgdata.shape[1]

        for row in xrange(dlgdata.shape[0]):
            for col in xrange(dlgdata.shape[1]):
                indice = tuple([row, col])
                prob = dlgdata[indice]
                if prob != float(0):
                    #                    print row, col, prob

                    scoreRow = dict()

                    word = dictIndexWord[col]
                    count = dlg[row].split()
                    count = count.count(word)
                    count = count * prob  #dlgdata[row][col]

                    scoreRow[count] = row
                    self.dictWordScoreRow[word].append(scoreRow)

        del dlgdata

        print "file Saved:", SaveLoad.SaveByte(
            self.dictWordScoreRow,
            self.folder + dlgname + self.extdictWordScoreRow)
        print "file Saved:", SaveLoad.SaveByte(
            self.dictWordIndex, self.folder + dlgname + self.extdictWordIndex)
Пример #11
0
    def __Save(self):
        grammars = self.__defaultgrammar()
        grammars.extend(self.__vprules)
        grammars.extend(self.__nprules)

        print 'saved:', SaveLoad.SaveByte(grammars, self.__grammarsFilename)
Пример #12
0
import time

from sklearn.feature_extraction.text import TfidfVectorizer

filename = "C:\\Esame TTR\\risorse\\Dati\\dialoghi\\dialoghiRaw\\Burn_Notice.txt"

dlg = SaveLoad.LoadLines(filename)

#analiz=AnalizzatoreSents.AnalizzatoreSents()

print time.asctime()
print 'creo copia in analiz'
#copia per analiz
dlgcopy = dlg  #analiz.Analisi(dlg)
print time.asctime()

tfidf = TfidfVectorizer()
print time.asctime()
print 'creo tfidf'
tfidfdata = tfidf.fit_transform(dlgcopy)
vocab = tfidf.get_feature_names()
print type(vocab)

print SaveLoad.SaveByte(
    tfidfdata, "C:\\Esame TTR\\risorse\\Dati\\dialoghi\\Burn_Notice.dat")
print SaveLoad.SaveByte(
    vocab, "C:\\Esame TTR\\risorse\\Dati\\dialoghi\\Burn_Notice.voc")
#   voc=SaveLoad.LoadByte("C:\\Esame TTR\\risorse\\Dati\\dialoghi\\whip it.voc")
print time.asctime()
print 'done'