def apply(self):
     if self.data:
         pb = OWGUI.ProgressBar(self, iterations=len(self.data))
         self.data = orange.ExampleTable(orange.Domain(self.tmpDom), self.tmpData)
         newdata = orngText.extractLetterNGram(self.data, self.size + 2, textAttribute=self.textAttributePos, callback=pb.advance)
         self.lblFeatureNo.setText("\nNo. of features: \n%d" % len(newdata.domain.getmetas(orngText.TEXTMETAID)))
         self.send("Example Table", newdata)
         pb.finish()
     else:
         self.send("Example Table", None)
Пример #2
0
if __name__ == "__main__":
    #from orngTextCorpus import *
    import pickle, orngText
    ##    os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/")
    appl = QApplication(sys.argv)
    ow = OWCorrAnalysis()

    #owb = OWBagofWords.OWBagofWords()
    t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml')
    #owb.data = t
    #owb.show()
    stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt')
    p = orngText.Preprocess(language='hr')
    print('Done with loading')
    t1 = orngText.extractLetterNGram(t, 2)
    #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2)
    #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3)
    #t1 = orngText.extractNamedEntities(t, stopwords = stop)
    #t1 = orngText.bagOfWords(t1, stopwords = stop)
    print(len(t1.domain.getmetas(orngText.TEXTMETAID)))
    print('Done with extracting')
    #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98)
    #print len(t2.domain.getmetas())
    print('Done with feature selection')
    appl.setMainWidget(ow)
    #t3 = orngText.DSS(t2, 'WF', 'MIN', 1)
    #print 'Done with document selection'
    ow.dataset(t1)
    print('Done')
    ow.show()
if __name__=="__main__":
    #from orngTextCorpus import *
    import cPickle, orngText
##    os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/")
    appl = QApplication(sys.argv)
    ow = OWCorrAnalysis()

    #owb = OWBagofWords.OWBagofWords()
    t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml')
    #owb.data = t
    #owb.show()
    stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt')
    p = orngText.Preprocess(language = 'hr')
    print 'Done with loading'
    t1 = orngText.extractLetterNGram(t, 2)
    #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2)
    #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3)
    #t1 = orngText.extractNamedEntities(t, stopwords = stop)
    #t1 = orngText.bagOfWords(t1, stopwords = stop)
    print len(t1.domain.getmetas(orngText.TEXTMETAID))
    print 'Done with extracting'
    #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98)
    #print len(t2.domain.getmetas())
    print 'Done with feature selection'
    appl.setMainWidget(ow)
    #t3 = orngText.DSS(t2, 'WF', 'MIN', 1)
    #print 'Done with document selection'
    ow.dataset(t1)
    print 'Done'
    ow.show()
                maxword = word
            if freq < min:
                min = freq
                minword = word
            sum += freq
            pb.advance()
        avg = sum / len(words)
        if min == ():
            min = 0
        self.lblFeatNo.setText("No. of features: %d" % len(words))
        self.lblMin.setText("Min: %d  Min word = %s" % (min, minword))
        self.lblMax.setText("Max: %d  Max word = %s" % (max,maxword))
        self.lblAvg.setText("Avg: %.3f" % avg)
        pb.finish()

    def selectionChanged(self):
        if self.data:
            self.applyButton.setDisabled(0)


if __name__ == "__main__":
    a = QApplication(sys.argv)
    t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml')
    t2 = orngText.extractLetterNGram(t)
    #print t2.domain.getmetas().values()
    ow = OWTextFeatureSelection()
    a.setMainWidget(ow)
    ow.show()
    ow.dataset(t2)
    a.exec_loop()