Пример #1
0
def overlapPrevious(post):
  check_tokenized(post)
  overlaps = []
  for p in post.thread.posts:
    if p == post: break
    overlaps.append(overlap(post.token_index, p.token_index))
  return max(overlaps) if overlaps != [] else 0.0
Пример #2
0
def overlapDistance(post):
  check_tokenized(post)
  overlaps = []
  for i,p in enumerate(post.thread.posts):
    if p == post: break
    overlaps.append(overlap(post.token_index, p.token_index))
  return (i - overlaps.index(max(overlaps))) if overlaps != [] else 0
Пример #3
0
def mostSimilarTextRelative(post):
  check_tokenized(post.thread)
  position = post.position
  if position == 0: return 0
  def similarity(other_post):
    return cosine_similarity(post.token_index, other_post.token_index)
  most_similar = max(post.thread.posts[:position], key=similarity)
  return position - most_similar.position
Пример #4
0
def onThreadTopic(post):
  check_tokenized(post)
  # Leading post is treated specially
  if post == post.thread.posts[0]:
    title_tokens = post.tokenizer(post.thread.title)
    r = overlap(post.token_index, title_tokens)
  else:
    r = overlap(post.token_index, post.thread.posts[0].token_index)
  return r