Пример #1
0
            comments = raw_comments.split("</comment>")
            courses += [[title, comments]]

    return courses


"""
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('maxent_treebank_pos_tagger')
"""

courses = parse_course_file("2014QComments")
positive = ["doable"]
negative = ["difficult", "hard", "work"]
analyzer = Analyze.SentimentAnalysis(positive, negative)

for course_num, course in enumerate(courses):
    # Nouns and adjectives, run nltk.help.upenn_tagset() to see all possible tags
    # pos = ["JJ", "JJR", "JJS", "NN", "NNP", "NNPS", "NNS"]
    pos = ["NN", "NNP", "NNPS", "NNS"]
    window = 2
    sentences = []
    custom_stop = [
        "course", "class", "this", "will", "in", "you", "make", "sure",
        "expect"
    ]
    min_keyword_len = 4

    key_phrases = key_phrases_for_course(course, pos, window, custom_stop,
                                         min_keyword_len)