Python analyze_articles示例，analysis.analysis_core.analyze_articles Python示例

示例#1

0

显示文件

文件： analysis_app.py 项目： mingkim/ward

def analyze_hit_article(analyzer, group, article_set, is_post=False, is_comment=False):
    """
    analyze article
    :param analyzer: analyzer for analysis
    :param group: group object
    :param article_set: list of articles
    :return string of status
    """
    for article in article_set:                # better algorithm
        if is_post is True:
            attach = Attachment.objects.filter(post=article)

            if not attach:
                if article.message is None:
                    return "no_message"

                words = core.analyze_articles(analyzer, article.message)
                add_words_db(group, words, article.like_count, article.comment_count)
            else:
                if attach[0].description is not None:
                    message = attach[0].description
                elif attach[0].title is not None:
                    message = attach[0].title
                else:
                    return "no_message"

                words = core.analyze_articles(analyzer, message)
                add_words_db(group, words, article.like_count, article.comment_count)

        elif is_comment is True:
            attach = Attachment.objects.filter(comment=article)

            if not attach:
                if article.message is None:
                    return "no_message"

                words = core.analyze_articles(analyzer, article.message)
                add_words_db(group, words, article.like_count, article.comment_count)
            else:
                if attach[0].description is not None:
                    message = attach[0].description
                elif attach[0].title is not None:
                    message = attach[0].title
                else:
                    return "no_message"

                words = core.analyze_articles(analyzer, message)
                add_words_db(group, words, article.like_count, article.comment_count)
        else:
            return "no_article"

示例#2

0

显示文件

文件： analysis_app.py 项目： iitians/ward

def analyze_hit_article(group, article_set, is_post=False, is_comment=False):
    """
    analyze article
    :param group: group object
    :param article_set: list of articles
    :return string of status
    """
    for article in article_set:                # better algorithm
        if is_post is True:
            attach = Attachment.objects.filter(post=article)

            if not attach:
                if article.message is None:
                    return "no_message"

                words = core.analyze_articles(article.message)
                add_words_db(group, words, article.like_count, article.comment_count)
            else:
                if attach[0].description is not None:
                    message = attach[0].description
                elif attach[0].title is not None:
                    message = attach[0].title
                else:
                    return "no_message"

                words = core.analyze_articles(message)
                add_words_db(group, words, article.like_count, article.comment_count)

        elif is_comment is True:
            attach = Attachment.objects.filter(comment=article)

            if not attach:
                if article.message is None:
                    return "no_message"

                words = core.analyze_articles(article.message)
                add_words_db(group, words, article.like_count, article.comment_count)
            else:
                if attach[0].description is not None:
                    message = attach[0].description
                elif attach[0].title is not None:
                    message = attach[0].title
                else:
                    return "no_message"

                words = core.analyze_articles(message)
                add_words_db(group, words, article.like_count, article.comment_count)
        else:
            return "no_article"

示例#3

0

显示文件

def run_app():
    # test-only method      lifecoding - 174499879257223, node - 168705546563077, import analysis.spam_app as spam
    group = Group.objects.filter(id=168705546563077)[0]
    print("name is " + group.name)
    a = core.analyze_articles(
        analyzer, '$아줌마$상대로섹알바하실분$1일수익80만홈피 $ w w w . y c c 6 6 . c o m !')
    print(a)

示例#4

0

显示文件

def delete_spam_sequence(data_object):
    """
    analyze deleted spam article from spam list and add spam words to spamwordlist
    :param data_object: deleted article object
    """

    words = core.analyze_articles(analyzer, data_object.message)

    add_spam_words(data_object.group, words)
    update_words_level(data_object.group)

示例#5

0

显示文件

文件： analysis_app.py 项目： mingkim/ward

def analyze_feed(analyzer, data_object, group):
    """
    analyze a feed
    :param analyzer: analyzer for analysis
    :param data_object: data object
    :return: words from analyzed feed
    """
    #if data_object.message is not None:
    #    message = data_object.message
    #else:
    #    message = ''

    #if 'attachment' in data_object:
    #    attach = data_object.get('attachment')
    #    if 'description' in attach:
    #        attach_message = attach.get('description')
    #    elif 'title' in attach:
    #        attach_message = attach.get('title')
    #    else:
    #        attach_message = ''
    #else:
    #    attach_message = ''

    message = data_object
    attach_message = ''

    if message is not '':
        message_word_set = core.analyze_articles(analyzer, message)
    else:
        message_word_set = []

    if attach_message is not '':
        attach_word_set = core.analyze_articles(analyzer, attach_message)
    else:
        attach_word_set = []

    temp_set = message_word_set + attach_word_set
    word_set = list(set(temp_set))      # make better algorithm

    word_db = ArchiveAnalysisWord.objects.filter(group=group, weigh__gte=100)
    data_set = [sp.word for sp in word_db]

    return core.analysis_text_by_words(data_set, word_set)

示例#6

0

显示文件

文件： spam_app.py 项目： cbajs12/ward

def delete_spam_sequence(data_object):
    """
    analyze deleted spam article from spam list and add spam words to spamwordlist
    :param data_object: deleted article object
    """

    words = core.analyze_articles(analyzer, data_object.message)

    add_spam_words(data_object.group, words)
    update_words_level(data_object.group)

示例#7

0

显示文件

文件： analysis_app.py 项目： iitians/ward

def analyze_feed(data_object):
    """
    analyze a feed
    :param data_object: data object
    :return: words from analyzed feed
    """
    if data_object.message is not None:
        message = data_object.message
    else:
        message = ''

    if 'attachment' in data_object:
        attach = data_object.get('attachment')
        if 'description' in attach:
            attach_message = attach.get('description')
        elif 'title' in attach:
            attach_message = attach.get('title')
        else:
            attach_message = ''
    else:
        attach_message = ''

    # message = data_object
    # attach_message = ''

    if message is not '':
        message_word_set = core.analyze_articles(message)
    else:
        message_word_set = []

    if attach_message is not '':
        attach_word_set = core.analyze_articles(attach_message)
    else:
        attach_word_set = []

    temp_set = message_word_set + attach_word_set
    word_set = list(set(temp_set))      # make better algorithm

    word_db = MonthlyWords.objects.filter(group=data_object.group)
    data_set = [sp.word for sp in word_db]

    return core.analysis_text_by_words(data_set, word_set, 5)

示例#8

0

显示文件

文件： analysis_app.py 项目： pjwards/ward

def analyze_feed(data_object):
    """
    analyze a feed
    :param data_object: data object
    :return: words from analyzed feed
    """
    if data_object.message is not None:
        message = data_object.message
    else:
        message = ""

    if "attachment" in data_object:
        attach = data_object.get("attachment")
        if "description" in attach:
            attach_message = attach.get("description")
        elif "title" in attach:
            attach_message = attach.get("title")
        else:
            attach_message = ""
    else:
        attach_message = ""

    # message = data_object
    # attach_message = ''

    if message is not "":
        message_word_set = core.analyze_articles(message)
    else:
        message_word_set = []

    if attach_message is not "":
        attach_word_set = core.analyze_articles(attach_message)
    else:
        attach_word_set = []

    temp_set = message_word_set + attach_word_set
    word_set = list(set(temp_set))  # make better algorithm

    word_db = MonthlyWords.objects.filter(group=data_object.group)
    data_set = [sp.word for sp in word_db]

    return core.analysis_text_by_words(data_set, word_set, 5)

示例#9

0

显示文件

文件： spam_app.py 项目： mingkim/ward

def analyze_feed_spam(analyzer, group, message):
    """
    Return true if analyzed words and spam words are same
    :param analyzer: analyzer for analysis
    :param group: group object
    :param message: message of post or comment
    :return: true or false
    """
    spam_db = SpamWordList.objects.filter(Q(group=group), Q(status='filter') | Q(status='user'))
    data_set = [sp.word for sp in spam_db]
    word_set = core.analyze_articles(analyzer, message)

    return core.analysis_text_by_words(data_set, word_set)

示例#10

0

显示文件

文件： analysis_app.py 项目： iitians/ward

def run_app():
    print('hello')
    message = '초보자들 헷갈리라고 넣는 겁니다 (웃음). 지금의 단계에서는 굳이 이해하려고 하실 필요가 없습니다. 꼭 알고 싶으시다면 "변수는 정보와 실체의 합으로서 존재하는데, test는 변수에 담긴 정보를 의미하고 &test는 변수의 실체를 의미한다" 정도로 생각해 두시면 되겠습니다. 더 쉽게 말하면, int b = 72; 이렇게 값이 72인 변수 b가 있을 때, 우리는 이것을 b라는 상자에 72라는 정보가 담겨 있는 것으로 이해할 수 있습니다. b라고 지정하면 72라는 값을 꺼내 주는 것이고, &b라고 지정하면 72가 담긴 상자를 통째로 건네주는 것입니다. 내용물을 꺼내서 주면 받는 쪽에서는 상자에 접근할 수 없으므로 상자의 내용물을 바꿀 수는 없습니다. 상자를 통째로 주면 받는 쪽에서 상자에 다른 물건을 담아놓을 수 있겠죠.'
    core.analyze_articles(message)
    message = '우려했던 K-알파고가 곧 나올듯 합니다.... :('
    core.analyze_articles(message)
    message = '역시 ..LG'
    core.analyze_articles(message)

示例#11

0

显示文件

文件： analysis_app.py 项目： pjwards/ward

def run_app():
    print("hello")
    message = '초보자들 헷갈리라고 넣는 겁니다 (웃음). 지금의 단계에서는 굳이 이해하려고 하실 필요가 없습니다. 꼭 알고 싶으시다면 "변수는 정보와 실체의 합으로서 존재하는데, test는 변수에 담긴 정보를 의미하고 &test는 변수의 실체를 의미한다" 정도로 생각해 두시면 되겠습니다. 더 쉽게 말하면, int b = 72; 이렇게 값이 72인 변수 b가 있을 때, 우리는 이것을 b라는 상자에 72라는 정보가 담겨 있는 것으로 이해할 수 있습니다. b라고 지정하면 72라는 값을 꺼내 주는 것이고, &b라고 지정하면 72가 담긴 상자를 통째로 건네주는 것입니다. 내용물을 꺼내서 주면 받는 쪽에서는 상자에 접근할 수 없으므로 상자의 내용물을 바꿀 수는 없습니다. 상자를 통째로 주면 받는 쪽에서 상자에 다른 물건을 담아놓을 수 있겠죠.'
    core.analyze_articles(message)
    message = "우려했던 K-알파고가 곧 나올듯 합니다.... :("
    core.analyze_articles(message)
    message = "역시 ..LG"
    core.analyze_articles(message)

示例#12

0

显示文件

def analyze_feed_spam(group, message):
    """
    Return true if analyzed words and spam words are same
    :param group: group object
    :param message: message of post or comment
    :return: true or false
    """
    spam_db = SpamWordList.objects.filter(
        Q(group=group),
        Q(status='filter') | Q(status='user'))
    data_set = [sp.word for sp in spam_db]
    word_set = core.analyze_articles(analyzer, message)

    # arg = SpamWordList.objects.filter(group=group).aggregate(avgcount=Avg('count'))

    return core.analysis_text_by_words(data_set, word_set, 0)

示例#13

0

显示文件

文件： analysis_app.py 项目： iitians/ward

def weekly_analyze_feed(group):
    """
    analyze monthly feed
    :param group: group object
    :return: if works well return true
    """
    now = timezone.now()
    time = now - timezone.timedelta(days=40)
    # print(time)
    if MonthlyWords.objects.filter(group=group).exists():
        # print('part1')
        groupobjcet = MonthlyWords.objects.filter(group=group)[0]
        rd = rdelta.relativedelta(now, groupobjcet.lastfeeddate)

        # print(rd.days)
        if groupobjcet is None or rd.days > 1:
            MonthlyWords.objects.filter(group=group).delete()

            post = Post.objects.filter(group=group, created_time__range=(time, now))

            if post.count() == 0:
                post = None

            comment = Comment.objects.filter(group=group, created_time__range=(time, now))

            if comment.count() == 0:
                comment = None

            if post and comment is None:
                return False
            else:
                post_dict = {}
                if post is not None:
                    for p in post:
                        words = core.analyze_articles(p.message)

                        for w in words:
                            if post_dict.get(w) is not None:
                                post_dict[w] += (p.like_count + p.comment_count)
                            else:
                                post_dict[w] = 0

                if comment is not None:
                    for c in comment:
                        words = core.analyze_articles(c.message)

                        for w in words:
                            if post_dict.get(w) is not None:
                                post_dict[w] += (c.like_count + c.comment_count)
                            else:
                                post_dict[w] = 0

                total_words = 1
                count_value = 1

                for i in post_dict.values():
                    if i != 0:
                        total_words += 1
                        count_value += i

                avg = int(count_value/total_words*2)

                refinewords = []

                for i in post_dict.items():
                    if i[1] > avg:
                        refinewords.append((i[0], i[1]))

                for wd in refinewords:
                    MonthlyWords(group=group, lastfeeddate=now, word=wd[0], weigh=wd[1]).save()

                return True
        else:
            return False
    else:
        post = Post.objects.filter(group=group, created_time__range=(time, now))

        if post.count() == 0:
            post = None

        comment = Comment.objects.filter(group=group, created_time__range=(time, now))

        if comment.count() == 0:
            comment = None

        print(post.count())
        print(comment.count())

        if post and comment is None:
            return False
        else:
            post_dict = {}
            if post is not None:
                for p in post:
                    words = core.analyze_articles(p.message)

                    for w in words:
                        if post_dict.get(w) is not None:
                            post_dict[w] += (p.like_count + p.comment_count)
                        else:
                            post_dict[w] = 0

            if comment is not None:
                for c in comment:
                    words = core.analyze_articles(c.message)

                    for w in words:
                        if post_dict.get(w) is not None:
                            post_dict[w] += (c.like_count + c.comment_count)
                        else:
                            post_dict[w] = 0

            total_words = 1
            count_value = 1

            for i in post_dict.values():
                if i != 0:
                    total_words += 1
                    count_value += i

            avg = int(count_value/total_words*2)

            refinewords = []

            for i in post_dict.items():
                if i[1] > avg:
                    refinewords.append((i[0], i[1]))

            for wd in refinewords:
                MonthlyWords(group=group, lastfeeddate=now, word=wd[0], weigh=wd[1]).save()

            return True

示例#14

0

显示文件

文件： analysis_app.py 项目： iitians/ward

def analyze_monthly_post(group, from_date, to_date):
    """
    Make trend words
    :param group: group object
    :param from_date: collect data from this date
    :param to_date: collect data to this date
    :return: if works well return true
    """
    if MonthTrendWord.objects.filter(group=group).exists():
        try:
            group_objcet = MonthTrendWord.objects.filter(group=group, datedtime__range=(from_date, to_date))[0]
        except IndexError:
            post = Post.objects.filter(group=group, created_time__range=(from_date, to_date))

            if post.count() == 0:
                return False

            post_dict = {}
            if post is not None:
                for p in post:
                    words = core.analyze_articles(p.message)

                    for w in words:
                        if post_dict.get(w) is not None:
                            post_dict[w] += (p.like_count + p.comment_count)
                        else:
                            post_dict[w] = 0

            total_words = 1
            count_value = 1

            for i in post_dict.values():
                if i != 0:
                    total_words += 1
                    count_value += i

            avg = int(count_value/total_words*2)

            refinewords = []

            for i in post_dict.items():
                if i[1] > avg:
                    refinewords.append((i[0], i[1]))

            for wd in refinewords:
                MonthTrendWord(group=group, datedtime=from_date, word=wd[0], weigh=wd[1], lastfeeddate=to_date).save()

            return True

        rd = rdelta.relativedelta(to_date, group_objcet.datedtime)

        if rd.days < 30:
            rdt = rdelta.relativedelta(to_date, group_objcet.lastfeeddate)
            if rdt.days > 1:
                MonthTrendWord.objects.filter(group=group, datedtime__range=(from_date, to_date)).delete()
                post = Post.objects.filter(group=group, created_time__range=(from_date, to_date))

                if post.count() == 0:
                    return False

                post_dict = {}
                if post is not None:
                    for p in post:
                        words = core.analyze_articles(p.message)

                        for w in words:
                            if post_dict.get(w) is not None:
                                post_dict[w] += (p.like_count + p.comment_count)
                            else:
                                post_dict[w] = 0

                total_words = 1
                count_value = 1

                for i in post_dict.values():
                    if i != 0:
                        total_words += 1
                        count_value += i

                avg = int(count_value/total_words*2)

                refinewords = []

                for i in post_dict.items():
                    if i[1] > avg:
                        refinewords.append((i[0], i[1]))

                for wd in refinewords:
                    MonthTrendWord(group=group, datedtime=from_date, word=wd[0], weigh=wd[1], lastfeeddate=to_date).save()

                return True
            else:
                return False
        else:
            return False

    else:
        post = Post.objects.filter(group=group, created_time__range=(from_date, to_date))

        if post.count() == 0:
            return False

        post_dict = {}
        if post is not None:
            for p in post:
                words = core.analyze_articles(p.message)

                for w in words:
                    if post_dict.get(w) is not None:
                        post_dict[w] += (p.like_count + p.comment_count)
                    else:
                        post_dict[w] = 0

        total_words = 1
        count_value = 1

        for i in post_dict.values():
            if i != 0:
                total_words += 1
                count_value += i

        avg = int(count_value/total_words*2)

        refinewords = []

        for i in post_dict.items():
            if i[1] > avg:
                refinewords.append((i[0], i[1]))

        for wd in refinewords:
            MonthTrendWord(group=group, datedtime=from_date, word=wd[0], weigh=wd[1], lastfeeddate=to_date).save()

        return True

示例#15

0

显示文件

文件： analysis_app.py 项目： pjwards/ward

def analyze_monthly_post(group, from_date, to_date):
    """
    Make trend words
    :param group: group object
    :param from_date: collect data from this date
    :param to_date: collect data to this date
    :return: if works well return true
    """
    if MonthTrendWord.objects.filter(group=group).exists():
        try:
            group_objcet = MonthTrendWord.objects.filter(group=group, datedtime__range=(from_date, to_date))[0]
        except IndexError:
            post = Post.objects.filter(group=group, created_time__range=(from_date, to_date))

            if post.count() == 0:
                return False

            post_dict = {}
            if post is not None:
                for p in post:
                    words = core.analyze_articles(p.message)

                    for w in words:
                        if post_dict.get(w) is not None:
                            post_dict[w] += p.like_count + p.comment_count
                        else:
                            post_dict[w] = 0

            total_words = 1
            count_value = 1

            for i in post_dict.values():
                if i != 0:
                    total_words += 1
                    count_value += i

            avg = int(count_value / total_words * 2)

            refinewords = []

            for i in post_dict.items():
                if i[1] > avg:
                    refinewords.append((i[0], i[1]))

            for wd in refinewords:
                MonthTrendWord(group=group, datedtime=from_date, word=wd[0], weigh=wd[1], lastfeeddate=to_date).save()

            return True

        rd = rdelta.relativedelta(to_date, group_objcet.datedtime)

        if rd.days < 30:
            rdt = rdelta.relativedelta(to_date, group_objcet.lastfeeddate)
            if rdt.days > 1:
                MonthTrendWord.objects.filter(group=group, datedtime__range=(from_date, to_date)).delete()
                post = Post.objects.filter(group=group, created_time__range=(from_date, to_date))

                if post.count() == 0:
                    return False

                post_dict = {}
                if post is not None:
                    for p in post:
                        words = core.analyze_articles(p.message)

                        for w in words:
                            if post_dict.get(w) is not None:
                                post_dict[w] += p.like_count + p.comment_count
                            else:
                                post_dict[w] = 0

                total_words = 1
                count_value = 1

                for i in post_dict.values():
                    if i != 0:
                        total_words += 1
                        count_value += i

                avg = int(count_value / total_words * 2)

                refinewords = []

                for i in post_dict.items():
                    if i[1] > avg:
                        refinewords.append((i[0], i[1]))

                for wd in refinewords:
                    MonthTrendWord(
                        group=group, datedtime=from_date, word=wd[0], weigh=wd[1], lastfeeddate=to_date
                    ).save()

                return True
            else:
                return False
        else:
            return False

    else:
        post = Post.objects.filter(group=group, created_time__range=(from_date, to_date))

        if post.count() == 0:
            return False

        post_dict = {}
        if post is not None:
            for p in post:
                words = core.analyze_articles(p.message)

                for w in words:
                    if post_dict.get(w) is not None:
                        post_dict[w] += p.like_count + p.comment_count
                    else:
                        post_dict[w] = 0

        total_words = 1
        count_value = 1

        for i in post_dict.values():
            if i != 0:
                total_words += 1
                count_value += i

        avg = int(count_value / total_words * 2)

        refinewords = []

        for i in post_dict.items():
            if i[1] > avg:
                refinewords.append((i[0], i[1]))

        for wd in refinewords:
            MonthTrendWord(group=group, datedtime=from_date, word=wd[0], weigh=wd[1], lastfeeddate=to_date).save()

        return True

示例#16

0

显示文件

文件： analysis_app.py 项目： pjwards/ward

def weekly_analyze_feed(group):
    """
    analyze monthly feed
    :param group: group object
    :return: if works well return true
    """
    now = timezone.now()
    time = now - timezone.timedelta(days=40)
    # print(time)
    if MonthlyWords.objects.filter(group=group).exists():
        # print('part1')
        groupobjcet = MonthlyWords.objects.filter(group=group)[0]
        rd = rdelta.relativedelta(now, groupobjcet.lastfeeddate)

        # print(rd.days)
        if groupobjcet is None or rd.days > 1:
            MonthlyWords.objects.filter(group=group).delete()

            post = Post.objects.filter(group=group, created_time__range=(time, now))

            if post.count() == 0:
                post = None

            comment = Comment.objects.filter(group=group, created_time__range=(time, now))

            if comment.count() == 0:
                comment = None

            if post and comment is None:
                return False
            else:
                post_dict = {}
                if post is not None:
                    for p in post:
                        words = core.analyze_articles(p.message)

                        for w in words:
                            if post_dict.get(w) is not None:
                                post_dict[w] += p.like_count + p.comment_count
                            else:
                                post_dict[w] = 0

                if comment is not None:
                    for c in comment:
                        words = core.analyze_articles(c.message)

                        for w in words:
                            if post_dict.get(w) is not None:
                                post_dict[w] += c.like_count + c.comment_count
                            else:
                                post_dict[w] = 0

                total_words = 1
                count_value = 1

                for i in post_dict.values():
                    if i != 0:
                        total_words += 1
                        count_value += i

                avg = int(count_value / total_words * 2)

                refinewords = []

                for i in post_dict.items():
                    if i[1] > avg:
                        refinewords.append((i[0], i[1]))

                for wd in refinewords:
                    MonthlyWords(group=group, lastfeeddate=now, word=wd[0], weigh=wd[1]).save()

                return True
        else:
            return False
    else:
        post = Post.objects.filter(group=group, created_time__range=(time, now))

        if post.count() == 0:
            post = None

        comment = Comment.objects.filter(group=group, created_time__range=(time, now))

        if comment.count() == 0:
            comment = None

        print(post.count())
        print(comment.count())

        if post and comment is None:
            return False
        else:
            post_dict = {}
            if post is not None:
                for p in post:
                    words = core.analyze_articles(p.message)

                    for w in words:
                        if post_dict.get(w) is not None:
                            post_dict[w] += p.like_count + p.comment_count
                        else:
                            post_dict[w] = 0

            if comment is not None:
                for c in comment:
                    words = core.analyze_articles(c.message)

                    for w in words:
                        if post_dict.get(w) is not None:
                            post_dict[w] += c.like_count + c.comment_count
                        else:
                            post_dict[w] = 0

            total_words = 1
            count_value = 1

            for i in post_dict.values():
                if i != 0:
                    total_words += 1
                    count_value += i

            avg = int(count_value / total_words * 2)

            refinewords = []

            for i in post_dict.items():
                if i[1] > avg:
                    refinewords.append((i[0], i[1]))

            for wd in refinewords:
                MonthlyWords(group=group, lastfeeddate=now, word=wd[0], weigh=wd[1]).save()

            return True

示例#17

0

显示文件

文件： spam_app.py 项目： cbajs12/ward

def run_app():
    # test-only method      lifecoding - 174499879257223, node - 168705546563077, import analysis.spam_app as spam
    group = Group.objects.filter(id=168705546563077)[0]
    print("name is "+group.name)
    a = core.analyze_articles(analyzer, '$아줌마$상대로섹알바하실분$1일수익80만홈피 $ w w w . y c c 6 6 . c o m !')
    print(a)