示例#1
0
def ilp_wordification(input_dict):
    target_table = input_dict.get('target_table',None)
    other_tables = input_dict.get('other_tables', None)
    context = input_dict.get('context', None)
    word_att_length = int(input_dict.get('f_ngram_size', 1))

    for _ in range(1):
        wordification = Wordification(target_table,other_tables,context,word_att_length)
        wordification.run(1)
        wordification.calculate_tf_idfs(False)
        #wordification.prune(50)
        #wordification.to_arff()
    #from wordification import Wordification_features_test
    #wft=Wordification_features_test(target_table,other_tables,context)
    #wft.print_results()
    return {'arff' : wordification.to_arff(),'corpus': wordification.wordify()}
示例#2
0
def ilp_wordification(input_dict):
    target_table = input_dict.get('target_table',None)
    other_tables = input_dict.get('other_tables', None)
    weighting_measure = input_dict.get('weighting_measure', 'tfidf')
    context = input_dict.get('context', None)
    word_att_length = int(input_dict.get('f_ngram_size', 1))
    idf=input_dict.get('idf', None)

    for _ in range(1):
        wordification = Wordification(target_table,other_tables,context,word_att_length,idf)
        wordification.run(1)
        wordification.calculate_tf_idfs(weighting_measure)
        #wordification.prune(50)
        #wordification.to_arff()

    if 1==0:
        from wordification import Wordification_features_test
        wft=Wordification_features_test(target_table,other_tables,context)
        wft.print_results()
    return {'arff' : wordification.to_arff(),'corpus': wordification.wordify(),'idf':wordification.idf}
示例#3
0
    def __init__(self,target_table,other_tables,context):
        self.max_witem_length=6
        self.results=[]
        self.feature_counts=[]
        self.accuracies=[]

        for prunning_percentage in [None,20,40]:
            rez_a=[]
            rez_c=[]
            for word_att_length in range(1,self.max_witem_length):
                print "percentage:",prunning_percentage,"witem:",word_att_length
                pruned=Wordification(target_table,other_tables,context,word_att_length)
                pruned.run()
                pruned.calculate_tf_idfs("tfidf")
                if prunning_percentage:
                    pruned.prune(prunning_percentage)

                wordification_string=pruned.wordify()

                rez_c.append(len(split_string_to_words(wordification_string)))
                a= pruned.to_arff()
                rez_a.append(get_cross_validation_accuracy(a))
            self.feature_counts.append([prunning_percentage,rez_c])
            self.accuracies.append([prunning_percentage,rez_a])