示例#1
0
    def db_objects_to_articles(db_objects):
        """
        Converts db_objects from database.py -> retrieve_articles()
        :param db_objects: List of dicts
        :return: List of ArticleInfo instances
        """
        articles = []

        for db_object in db_objects:
            comments = []
            for comment in json.loads(db_object["Comments"]):
                comment_object = Comment()
                comment_object.author = comment["Author"]
                comment_object.city = comment["City"]
                comment_object.text = comment["Text"]
                comment_object.likes = comment["Likes"]
                comment_object.dislikes = comment["Dislikes"]
                comment_object.ratio = float(comment["Ratio"])
                comment_object.time = comment["Time"]
                comments.append(comment_object)
            articles.append(
                ArticleInfo(db_object["Link"], db_object["Header"],
                            db_object["Description"], db_object["Category"],
                            db_object["Author"], db_object["Published_at"],
                            db_object["Modified_at"], db_object["Paragraphs"],
                            comments))
        return articles
示例#2
0
def grab_best_comments():
    comment_count = 5
    if 'len' in request.args:
        comment_count = int(request.args.get("len"))

    articles = retrieve_newest_articles()

    # finds all comments and sorts them
    comments = Comment.find_comments(articles)
    sorted_comments = sorted(comments,
                             key=lambda obj: (obj.ratio, obj.likes),
                             reverse=True)

    return jsonify(Comment.as_dicts(sorted_comments[:comment_count]))
示例#3
0
def parse_comments(response):
    """
    Retrieves comments from article
    :param response: Scrapy response
    :return: Comments as list of Comment objects
    """
    article = response.meta.get("article_object")

    comments = []

    # parses all info about comments from comments page (xpath needed here to better access to elements)
    author_texts = response.xpath(
        "//a[@data-dot='souhlasim']/../../div/div/div/text()").getall()
    texts = response.xpath(
        "//a[@data-dot='souhlasim']/../../../../div/div/text()").getall()
    likes = response.xpath("//a[@data-dot='souhlasim']/span/text()").getall()
    dislikes = response.xpath(
        "//a[@data-dot='nesouhlasim']/span/text()").getall()
    times = response.xpath(
        "//a[@data-dot='souhlasim']/../../div/div/div/span/text()").getall()

    for i in range(len(likes)):
        comments.append(
            Comment(author_texts[i], texts[i], likes[i], dislikes[i],
                    times[i]))

    article.comments = comments
示例#4
0
 def as_dict(self):
     return {
         'Link':
         self.link,
         'Header':
         self.header,
         'Description':
         self.description,
         'Category':
         self.category,
         'Author':
         self.author,
         'Published_at':
         self.published_at,
         'Modified_at':
         self.modified_at,
         'Paragraphs':
         self.paragraphs,
         'Paragraphs_count':
         len(self.paragraphs),
         'Comments':
         json.dumps(Comment.as_dicts(self.comments), ensure_ascii=False)
     }
示例#5
0
threads = []
corpus = Corpus("")
for thread in threadsOrigin:
    threadId = thread['THREAD_SEQUENCE']
    t = Thread(threadId)
    qSub = thread.RelQuestion.RelQSubject.string
    qBody = thread.RelQuestion.RelQBody.string
    q = Question(qSub, qBody)
    i = 1
    comments = thread.find_all('RelComment')
    for comment in comments:
        commentId = comment['RELC_ID']
        username = comment['RELC_USERNAME']
        relevance = comment['RELC_RELEVANCE2RELQ']
        body = comment.RelCText.string
        c = Comment(commentId, username, relevance, body, i)
        q.add_comment(c)
        i += 1
        corpus.addCorpusText(body)
    obo.setQuestionDictionaries(q)
    t.set_question(q)
    threads.append(t)

obo.setCorpusDictionary(corpus)
for t in threads:
    q = t.question
    freq.percentageQuestion(corpus, q)
    filter.commentRang(q)
    # print("question: " + str(t.id) + ", percentage: " + str(q.percentage))

示例#6
0
 def parse(self, soup, indent=0):
     comments = []
     for comment_obj in soup.findAll("div", {
             "class": "comment",
             "data-indent": indent
     }):
         comment = Comment()
         comment.set_author_name(
             comment_obj.findAll(
                 "div", {"class": "comment__user"})[0]['data-name'])
         comment.set_author_url(
             self.main_url +
             comment_obj.findAll("a", {"class": "user"})[0]['href'])
         comment.set_datetime(
             comment_obj.findAll(
                 "time", {"class": "comment__datetime"})[0]['datetime'])
         comment.set_link(
             comment_obj.findAll("a", {
                 "class": "comment__tool",
                 "data-role": "link"
             })[0]['href'])
         comment.set_rating(
             comment_obj.findAll(
                 "div", {"class": "comment__rating-count"})[0].text)
         comment.set_formatted_text(
             self._clean_text(
                 comment_obj.findAll(
                     "div", {"class": "comment__content"})[0].text))
         comment.set_pluses([
             int(s) for s in comment_obj.findAll(
                 "div", {"class": "comment__rating-count"})[0]
             ['aria-label'].split() if s.isdigit()
         ][0])
         #comment.set_minuses([int(s) for s in comment_obj.findAll("div", {"class": "comment__rating-count"})[0]['aria-label'].split() if s.isdigit()][1])
         comment.set_subcomments(
             self.parse(
                 comment_obj.findAll("div", {"class": "comment__children"}
                                     )[0], indent +
                 1) if self._has_subcomments(comment_obj) else [])
         comment.set_comment_level(indent)
         comments.append(comment)
     return comments
 def test_stops_parsing(self):
     comment = Comment("<!-- A comment which should end here--> And more text")
     unparsed_xml = comment.parse_to_end({})
     self.assertEqual(" And more text", unparsed_xml)
 def test_triple_hyphen(self):
     comment = Comment(f"<!--A comment with ---> Text <end/>")
     with self.assertRaises(XMLError):
         comment.parse_to_end({})
 def test_forbidden_characters(self):
     for char in ["\u0001", "\u0003", "\u0010", "\ufffe", "\uffff"]:
         with self.subTest(f"Char: {char}"):
             comment = Comment(f"<!--A comment with {char} --> Text <end/>")
             with self.assertRaises(XMLError):
                 comment.parse_to_end({})