def ilp_wordification(input_dict): target_table = input_dict.get('target_table',None) other_tables = input_dict.get('other_tables', None) context = input_dict.get('context', None) word_att_length = int(input_dict.get('f_ngram_size', 1)) for _ in range(1): wordification = Wordification(target_table,other_tables,context,word_att_length) wordification.run(1) wordification.calculate_tf_idfs(False) #wordification.prune(50) #wordification.to_arff() #from wordification import Wordification_features_test #wft=Wordification_features_test(target_table,other_tables,context) #wft.print_results() return {'arff' : wordification.to_arff(),'corpus': wordification.wordify()}
def ilp_wordification(input_dict): target_table = input_dict.get('target_table',None) other_tables = input_dict.get('other_tables', None) weighting_measure = input_dict.get('weighting_measure', 'tfidf') context = input_dict.get('context', None) word_att_length = int(input_dict.get('f_ngram_size', 1)) idf=input_dict.get('idf', None) for _ in range(1): wordification = Wordification(target_table,other_tables,context,word_att_length,idf) wordification.run(1) wordification.calculate_tf_idfs(weighting_measure) #wordification.prune(50) #wordification.to_arff() if 1==0: from wordification import Wordification_features_test wft=Wordification_features_test(target_table,other_tables,context) wft.print_results() return {'arff' : wordification.to_arff(),'corpus': wordification.wordify(),'idf':wordification.idf}
def __init__(self,target_table,other_tables,context): self.max_witem_length=6 self.results=[] self.feature_counts=[] self.accuracies=[] for prunning_percentage in [None,20,40]: rez_a=[] rez_c=[] for word_att_length in range(1,self.max_witem_length): print "percentage:",prunning_percentage,"witem:",word_att_length pruned=Wordification(target_table,other_tables,context,word_att_length) pruned.run() pruned.calculate_tf_idfs("tfidf") if prunning_percentage: pruned.prune(prunning_percentage) wordification_string=pruned.wordify() rez_c.append(len(split_string_to_words(wordification_string))) a= pruned.to_arff() rez_a.append(get_cross_validation_accuracy(a)) self.feature_counts.append([prunning_percentage,rez_c]) self.accuracies.append([prunning_percentage,rez_a])