def parse_post_per_word_face(collect, parameters, RECENT, LIMIT, SKIP):
    word = parameters[0]['$match'].pop('word')

    output = []

    # MongoDB find
    db_cursor = collect.aggregate(parameters)
    print('\nData acquired.\n')

    for doc in db_cursor:
        # print(doc)
        text = doc['message']

        if not text:
            text = ''

        tmp_text = filtered(text)
        tmp_text = tmp_text.translate(punct_tab)
        tmp_text = tmp_text.split(' ')

        if any(filtered(word) == filtered(w) for w in tmp_text):
            output.append(doc)

    output = sorted(output, key=itemgetter('like_count'), reverse=True)
    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    output = output[SKIP:SKIP + LIMIT]  # pagination implementation

    return output
Пример #2
0
def parse_post_per_word_no_rt(collect, parameters):
    """
Returns list of tweets per word.
  """

    word = parameters[0]['$match'].pop('word')

    output = []

    # MongoDB find
    db_cursor = collect.aggregate(parameters)
    print('\nData acquired.\n')

    for doc in db_cursor:
        text = doc['status']['text']

        tmp_text = filtered(text)
        tmp_text = tmp_text.translate(punct_tab)
        tmp_text = tmp_text.split(' ')

        if any(filtered(word) == filtered(w) for w in tmp_text):
            output.append(doc)

    return output
Пример #3
0
def parse_word_face(collect, FILTER, SKIP, LIMIT, parameters, RECENT):
    word_count = {}
    top = []

    # MongoDB find
    db_cursor = collect.aggregate(parameters)

    print('\nPosts Acquired.\n')

    for doc in db_cursor:

        text = doc['message']
        if not text:
            text = ''
        tmp = clear_text(text)
        # creates a word count
        for word in tmp:
            try:
                word_count[filtered(word)] += 1
            except KeyError:
                word_count[filtered(word)] = 1

    # CREATES LIST WITH COUNT
    for word in word_count:
        top.append([word, word_count[word]])

    top = sorted(top, key=lambda x: x[1], reverse=True)
    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    top = top[SKIP:SKIP + LIMIT]  # pagination implementation

    top_list = []

    for item in top:
        top_list.append({'word': item[0], 'count': item[1]})

    return top_list
Пример #4
0
def parse_post_per_word(collect, FILTER, projection, SKIP,LIMIT, parameters, RECENT):
  """
Returns list of tweets per word.
  """

  word = parameters[0]['$match'].pop('word')
  
  output = []

  # MongoDB aggreg
  db_cursor = collect.aggregate(parameters)
  print('\nRetweets acquired.\n')

  for doc in db_cursor:
    # print(doc)
    # input()
    text = doc['status']['retweeted_status']['text']

    tmp_text = filtered(text)
    tmp_text = tmp_text.translate(punct_tab)
    tmp_text = tmp_text.split(' ')

    if any(filtered(word)==filtered(w) for w in tmp_text):
      output.append(doc)

  # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  # find for original tweets
  if len(output) < LIMIT+SKIP and not RECENT:
    FILTER['status.retweeted_status'] = {'$exists':False}

    db_cursor = collect.find(FILTER,projection)
    print('\nTweets acquired.\n')

    for doc in db_cursor:
      text = doc['status']['text']

      tmp_text = filtered(text)
      tmp_text = tmp_text.translate(punct_tab)
      tmp_text = tmp_text.split(' ')

      if any(filtered(word)==filtered(w) for w in tmp_text):
        doc['count'] = 0
        doc['status']['id'] = doc['status']['id_str']
        output.append(doc)

    db_cursor.close()


  output = sorted(output, key=itemgetter('count'), reverse=True)
  output = output[SKIP:SKIP+LIMIT] # pagination implementation

  return output
Пример #5
0
def parse_word(collect, FILTER, projection, SKIP, LIMIT, parameters, RECENT):
    """
Returns list of top words plus count.
  """
    word_count = {}
    top = []

    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    db_cursor = collect.aggregate(parameters)

    print('\nRetweets\' texts acquired.\n')

    for doc in db_cursor:
        text = doc['retweeted_status']['text']
        rt_count = doc['count']

        tmp = clear_text(text)

        # creates a word count
        temp_words = []
        for word in tmp:
            if word not in temp_words:
                temp_words.append(word)
                try:
                    word_count[filtered(word)] += rt_count

                except KeyError:
                    word_count[filtered(word)] = rt_count
            else:
                # count words only once
                pass

    if not RECENT:
        # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # MongoDB find
        FILTER['status.retweeted_status'] = {'$exists': False}

        db_cursor = collect.find(FILTER, projection)

        print('\nTweets\' texts acquired.\n')

        for doc in db_cursor:
            text = doc['status']['text']

            tmp = clear_text(text)

            temp_words = []
            for word in tmp:
                if word not in temp_words:
                    temp_words.append(word)
                    try:
                        word_count[filtered(word)] += 1

                    except KeyError:
                        word_count[filtered(word)] = 1
                else:
                    # count words only once
                    pass

        db_cursor.close()

    # CREATES LIST WITH COUNT
    for word in word_count:
        tmp_tweets = []

        top.append([word, word_count[word]])

    top = sorted(top, key=lambda x: x[1], reverse=True)
    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    top = top[SKIP:SKIP + LIMIT]  # pagination implementation

    top_list = []

    for item in top:
        top_list.append({'word': item[0], 'count': item[1]})

    return top_list