def n_gram_graph_model(data): #data = pd.DataFrame() class_labels = ["too localized", "open", "off topic", "not constructive", "not a real question"] graph_models = {} #Constroi o modelo em grafo para cada classe for class_label in class_labels: data_label = data[data["OpenStatus"] == class_label] corpus = format_data(data_label) model = NGramGraphModel(corpus) graph_model = model.bigram_graph_class() graph_models[class_label] = graph_model corpus = format_data(data) model = NGramGraphModel(corpus) all_data = model.calc_sim_metrics(graph_models, corpus) df_data = pd.DataFrame.from_dict(all_data, orient="index") return df_data
def similarity_metrics(corpus): model = NGramGraphModel(corpus) print "Bigram Graph Class" model.bigram_graph_class() print "Calculating Similarity Metrics" corpus = model.calc_sim_metrics() return corpus