g = ThreadNetwork(CommonAuthor(m)) g(forum) return g def TextSimilarityNetwork(forum, n=0.3): g = ThreadNetwork(TextSimilarity(n, cosine_similarity)) g(forum) return g def PostAfterNetwork(forum, dist=1, count=3): g = DirectedAuthorNetwork(PostAfter(dist=dist, count=count)) g(forum) return g def ThreadParticipationNetwork(forum, k=5): g = AuthorNetwork(ThreadParticipation(k)) g(forum) return g ######## # Analysis ######## if __name__ == '__main__': from DataModel import load_xml, rbp_tokenize #f = load_xml('adcs.xml') f = load_xml('nabble.xml') #f.run_tokenizer(rbp_tokenize) g = ThreadParticipationNetwork(f, k=1) import pdb;pdb.set_trace()
def wanas_Post_features(post): # tokenize if not yet done. if post.thread.token_index is None: post.thread.forum.run_tokenizer(rbp_tokenize) features = dict() features['onThreadTopic'] = onThreadTopic(post) features['overlapPrevious'] = overlapPrevious(post) features['overlapDistance'] = overlapDistance(post) features['timeliness'] = timeliness(post) features['lengthiness'] = lengthiness(post) features['formatEmoticons'] = formatEmoticons(post) features['formatCapitals'] = formatCapitals(post) features['weblinks'] = weblinks(post) assert all((v >= 0.0 for v in features.values())), "Ended up with a negative feature!" return features # From wanas testing #f.run_tokenizer(rbp_tokenize) #for p in f.posts[:100]: # #print p, len(p), onThreadTopic(p), overlapPrevious(p), overlapDistance(p), timeliness(p), lengthiness(p), formatEmoticons(p) # print p, len(p), formatEmoticons(p), formatCapitals(p), weblinks(p) from common import user_post_aggregate if __name__ == '__main__': from DataModel import load_xml, rbp_tokenize f = load_xml('adcs.xml') #f = load_xml('nabble.xml') user_post_aggregate(f, wanas_Post_features) import pdb;pdb.set_trace()