g = ThreadNetwork(CommonAuthor(m))
  g(forum)
  return g

def TextSimilarityNetwork(forum, n=0.3):
  g = ThreadNetwork(TextSimilarity(n, cosine_similarity))
  g(forum)
  return g

def PostAfterNetwork(forum, dist=1, count=3):
  g = DirectedAuthorNetwork(PostAfter(dist=dist, count=count))
  g(forum)
  return g

def ThreadParticipationNetwork(forum, k=5):
  g = AuthorNetwork(ThreadParticipation(k))
  g(forum)
  return g

########
# Analysis
########

if __name__ == '__main__':
  from DataModel import load_xml, rbp_tokenize
  #f = load_xml('adcs.xml')
  f = load_xml('nabble.xml')
  #f.run_tokenizer(rbp_tokenize)
  g = ThreadParticipationNetwork(f, k=1)
  import pdb;pdb.set_trace()
Пример #2
0
def wanas_Post_features(post):
  # tokenize if not yet done.
  if post.thread.token_index is None:
    post.thread.forum.run_tokenizer(rbp_tokenize)
  features = dict()
  features['onThreadTopic'] = onThreadTopic(post)
  features['overlapPrevious'] = overlapPrevious(post)
  features['overlapDistance'] = overlapDistance(post)
  features['timeliness'] = timeliness(post)
  features['lengthiness'] = lengthiness(post)
  features['formatEmoticons'] = formatEmoticons(post)
  features['formatCapitals'] = formatCapitals(post)
  features['weblinks'] = weblinks(post)
  assert all((v >= 0.0 for v in features.values())), "Ended up with a negative feature!"
  return features

# From wanas testing
  #f.run_tokenizer(rbp_tokenize)
  #for p in f.posts[:100]:
  #  #print p, len(p), onThreadTopic(p), overlapPrevious(p), overlapDistance(p), timeliness(p), lengthiness(p), formatEmoticons(p)
  #  print p, len(p), formatEmoticons(p), formatCapitals(p), weblinks(p)
   
from common import user_post_aggregate
if __name__ == '__main__':
  from DataModel import load_xml, rbp_tokenize
  f = load_xml('adcs.xml')
  #f = load_xml('nabble.xml')
  user_post_aggregate(f, wanas_Post_features)

  import pdb;pdb.set_trace()