Пример #1
0
def get_readtime_from_url(url):
    news_info = requests.get(url, headers=AUTH_HEADER).json()
    summary_html = news_info['summary']
    body_html = news_info['body']['html']
    summary_time = readtime.of_html(
        summary_html) if summary_html else EmptyResult
    body_time = readtime.of_html(body_html) if body_html else EmptyResult
    total_seconds = summary_time.seconds + body_time.seconds
    readtime_json = {
        "total": {
            "seconds": total_seconds,
            "minutes": math.ceil(total_seconds / 60),
        },
        "sections": {
            "summary": {
                "seconds": summary_time.seconds,
                "minutes": summary_time.minutes,
            },
            "body": {
                "seconds": body_time.seconds,
                "minutes": body_time.minutes,
            },
        },
    }
    return readtime_json
Пример #2
0
def _set_read_time(request, page, is_post_creation=False):
    if hasattr(page, 'estimated_read_duration'):
        html = render_to_string(page.template, {
            'page': page,
            'request': request
        })
        soup = BeautifulSoup(html, 'html.parser')
        for tag in soup.body.find_all(
            ['script', 'noscript', 'link', 'style', 'meta', 'header']):
            tag.decompose()
        # Get the readtime of the main content section of the page (excluding header/footer)
        reading_seconds = readtime.of_html(str(soup.find('main'))).seconds
        video_nodes = soup.find_all(
            'video', attrs={constants.VIDEO_DURATION_DATA_ATTR_NAME: True})
        watching_seconds = sum([
            int(node.get(constants.VIDEO_DURATION_DATA_ATTR_NAME, 0))
            for node in video_nodes
        ])
        seconds = reading_seconds + watching_seconds

        _update_data_for_appropriate_version(
            page=page,
            force_page_update=is_post_creation,
            data_to_update={
                'estimated_read_duration': datetime.timedelta(seconds=seconds)
            },
        )
Пример #3
0
def read_one(id):
    """
    This function responds to a request for /api/story/{id}
    with one matching blog from story

    :param lname:   id of story to find
    :return:        story matching id
    """

    story = get(id)

    # Does the story exist?
    if story:
        readtime_result = readtime.of_html(story['body'])
        return {
            "id": story['id'],
            "title": story['title'],
            "description": story['description'],
            "body": story['body'],
            "timeToRead": convert(readtime_result.seconds),
            "tags": story['tags'],
            "createdAt": story['created'],
            "published": story['published']
        }
    else:
        abort(404, "Record not found")
Пример #4
0
def processURL(url):
    #Scrape the page using requests
    page = requests.get(url)
    #Pull the mimetype and http status code
    mimetype = page.headers['content-type']
    http_status = str(page.status_code)
    #Transforms url into unique id
    id = hashlib.md5(url.encode()).hexdigest()
    #Set default document fields, tags and slugs defualt to empty
    is_archived = 1
    is_starred = 0
    user_name = 'admin'
    user_email = '*****@*****.**'
    user_id = str(1)
    is_public = str(False)
    created_at = str(datetime.now())[:-3]
    updated_at = str(datetime.now())[:-3]
    links = ["api/entries/"+str(id)]
    tags = str([])
    slugs = tags

    #Shorten given url to obtain domain name
    domain_name = re.search('https?:\/\/[^#?\/]+',url).group(0)

    #Load scraped page into beautiful soup parser
    bs = BeautifulSoup(page.content, 'html.parser')
    #Use beautiful soup to pull the images from the page, pick the first as preview image, if no images create one with the title as text
    images = bs.find_all('img', {'src':re.compile('.jpg')})
    title = str(bs.title.string)
    if images == []:
        preview_picture = 'https://dummyimage.com/170/000/ffffff&text='+(title.replace(' ','%20'))
    else:
        preview_picture = images[0]['src']
    #Pull whatever is in the language tag, if its empty set langugage to english
    language = bs.lang
    if language == None:
        language = 'en'
    #Pull the entire html content of the page, as well as text only content
    content = str(bs)
    content_text = bs.text
    #Use readingtime module to estimate reading time
    reading_time = str(readtime.of_html(str(bs)).minutes)
    #Collect all data into a dictionary
    result = {'is_archived':is_archived, 'is_starred':is_starred, 'user_name':user_name,'user_email':user_email, 'user_id':user_id, 'tags':tags, 'slugs':slugs,
    'is_public':is_public, 'id':id, 'title':title, 'url':url, 'content_text':content_text, 'created_at':created_at, 'updated_at':updated_at, 'mimetype':mimetype,
    'language':language, 'reading_time':reading_time, 'domain_name':domain_name, 'preview_picture':preview_picture, 'http_status':http_status, 'links':links, 
    'content':content, 'id':id}
    #Take all of the values in that dict, put them in a list and save that to all, then add all into the dictionary
    all = list(result.values())
    all = [str(i) for i in all]
    result['all'] = all
    #print(result.keys())
    #print([type(i) for i in result.values()])
    #print(id)
    return result
 def save(self, *args, **kwargs):
     """
     Overriding the default save method.
     :param args: Not key-worded arguments of default save method.
     :param kwargs: Key-worded arguments of default save method.
     :return: none, but updates the Post with the slugified title for the reverse URL path, and the readtime of
     post content (the field that is editable with summernote)
     """
     self.slug = slugify(self.title)
     self.post_readtime = readtime.of_html(self.content)
     super().save(*args, **kwargs)
Пример #6
0
    def estimate_link_read_time(self, msg, args):
        """Listen to messages containing a link, and if so, fetch the page
        and estimate the reading time based on the HTML.

        If the bot is not active in the room, it simply does nothing.
        """
        room = msg.to
        if not self.is_active_in_room(room):
            return

        url = re.search(URL_REGEX, msg.body).group(0)
        html = get_page_html(url)
        estimated_time = readtime.of_html(html)
        return 'Estimated time: {} min.'.format(estimated_time.minutes)
Пример #7
0
def find_posts() -> List[Post]:
    post_directories = os.listdir('blog')
    posts_found = list()

    for directory in post_directories:
        post = frontmatter.load(f'blog/{directory}/index.md')
        post_html = render_markdown(post.content)
        posts_found.append(
            Post(
                post["title"],
                post["created_date"],
                post.get("updated_date"),  # Not all posts have update_date
                post["slug"],
                directory,
                post_html,
                readtime.of_html(post_html).text))

    return sorted(posts_found, key=lambda x: x.created_date, reverse=True)
Пример #8
0
 def read_time(self):
     return str(readtime.of_html(str(self.body)))
Пример #9
0
def read_time(value):
    return readtime.of_html(value)
Пример #10
0
def read(html):
    return readtime.of_html(html)
Пример #11
0
 def get_readtime(self):
     return readtime.of_html(self.content)
Пример #12
0
 def reading_time(self):
     return readtime.of_html(self.content).text
Пример #13
0
 def get_read_time(self, instance):
     return str(readtime.of_html(instance.body))
Пример #14
0
def get_article_read_time_from_html(html_text: str):
    try:
        read_time = readtime.of_html(html_text)
        return read_time
    except Exception:
        return ''
Пример #15
0
def template(output_path: str):
    """The main template engine to generate the site's static content"""
    global TEMPLATES
    global ROUTEMAP
    print("[template] emptying working directory")
    directory_empty(output_path)

    print("[template] reading config file at ./config.json")
    config = json.loads(file_read("config.json"))

    print("[template] copying static directory")
    output_file = os.path.join(output_path, "static")
    shutil.copytree(config["static_directory"], output_file)

    print("[template] loading templates from config")
    TEMPLATES = templates_load(config["templates"])

    print("[template] running blog article generator")
    blog_article_listings = ""
    for article in config["articles"]:
        article_url = f"/blog/{article['identifier']}"
        print(
            f"[template/blog] creating article '{article['title']}' at {article_url}"
        )

        content = markdown2.markdown(file_read(article["markdown"]))
        content_time = str(readtime.of_html(content))

        # Create a new listing for the blog archive page
        blog_article_listings += template_fill(
            TEMPLATES["blog-listing"],
            {
                "title": article["title"],
                "datestring": article["datestring"],
                "readtime": content_time,
                "banner": article["banner"],
                "description": article["description"],
                "permalink": article_url,
            },
        )

        # Create blog article from template
        blog_article = template_fill(
            TEMPLATES["blog-article"],
            {
                "title": article["title"],
                "datestring": article["datestring"],
                "readtime": content_time,
                "banner": article["banner"],
                "description": article["description"],
                "permalink": article_url,
                "content": content,
            },
        )
        output_file = os.path.join(output_path,
                                   f"blog-{article['identifier']}.html")
        file_write(output_file, blog_article)
        ROUTEMAP[f"{config['domain']}{article_url}"] = 0.7

    TEMPLATES["@blog-listings"] = blog_article_listings

    print("[template] running page generator")
    for page in config["pages"]:
        page_url = page["location"]
        print(f"[template/page] creating page '{page['title']}' at {page_url}")
        content = template_fill(
            file_read(page["file"]),
            {
                "title": page["title"],
                "description": page["description"],
                "permalink": page_url,
            },
        )
        output_file = os.path.join(output_path, page["destination"])
        file_write(output_file, content)
        ROUTEMAP[f"{config['domain']}{page_url}"] = page["priority"]

    print("[template] copying custom static files")
    for copy in config["copy"]:
        print(
            f"[template/copy] copying file '{copy['file']}' to '{copy['location']}'"
        )
        output_file = os.path.join(output_path, copy["location"])
        shutil.copy(copy["file"], output_file)

    print("[template] compiling sitemap XML")
    sitemap = TEMPLATES["sitemap"]
    for route in ROUTEMAP:
        sitemap += (
            f"<url><loc>{route}</loc><priority>{ROUTEMAP[route]}</priority></url>"
        )
    sitemap += "</urlset>"
    output_file = os.path.join(output_path, "sitemap.xml")
    file_write(output_file, sitemap)

    print("[template] finished")
Пример #16
0
 def get_context(self, request):
     # context = super().get_context(request)
     context = super(ArticlePage, self).get_context(request)
     context["time_to_read"] = readtime.of_html(self.content.__html__())
     return context
Пример #17
0
 def get_read_time(self):
     ''' Returns the read time of the HTML body '''
     string = str(self.main_content)
     result = readtime.of_html(string)
     return result
Пример #18
0
 def test_html(self):
     inp = open('tests/samples/html.html').read()
     result = readtime.of_html(inp)
     self.assertEquals(result.seconds, 236)
     self.assertEquals(result.text, u('4 min'))
     self.assertEquals(u(result), u('4 min read'))
Пример #19
0
 def tempo_de_leitura(self):
     return of_html(self.conteudo).text
Пример #20
0
 def get_read_time(self):
     ''' Returns the read time of the Content body '''
     string = str(self.body)
     result = readtime.of_html(string)
     return result
Пример #21
0
def new(*args, **kwargs):
    currentUser = User.get().filter_by(
        name=kwargs['token']['name']).first_or_404()

    if not currentUser.role.permissions.add_post:
        return make_response(
            jsonify({
                'operation': 'error',
                'error': 'Missing permissions'
            }), 401)

    if not request.form['data']:
        return make_response(
            jsonify({
                'operation': 'error',
                'error': 'Missing data'
            }), 401)

    data = json.loads(str(request.form['data']))

    if not data['title'] or not data['content'] or not data['tags']:
        return make_response(
            jsonify({
                'operation': 'error',
                'error': 'Missing data'
            }), 401)

    index = str(db.session.execute(Sequence('post_id_seq')))
    thumbnail_link = None
    if data['image']:
        thumbnail = SaveImage(index)
        thumbnail_link = url_for('static',
                                 filename='thumbail_post/{}'.format(thumbnail))
    else:
        thumbnail_link = 'none'

    lang = translate.getLanguageForText(
        str(cleanhtml(data['content'])).encode('utf-8-sig'))

    langQuery = Languages.get().filter_by(code=lang.iso_tag).first()

    if langQuery is None:
        new_lang = Languages(name=lang.language, code=lang.iso_tag)
        new_lang.add()
        langQuery = new_lang

    tags_ids = []
    tags = []

    for tag in data['tags']:
        check = Post_Tag.get().filter_by(name=tag).first()

        if check is None:
            new_tag = Post_Tag(name=tag, count=1)
            new_tag.add()
            check = new_tag
        else:
            setattr(check, 'count', Post_Tag.count + 1)
            check.save()

        tags_ids.append(check.id)

    for tag_id in tags_ids:
        tags.append({"post": index, "tag_id": tag_id})

    nPost = NewPostSchema().load({
        "id":
        int(index),
        "title":
        data['title'],
        "read_time":
        str(readtime.of_html(data['content'])),
        "author_id":
        currentUser.id,
        "language_id":
        langQuery.id,
        "info": {
            "thumbnail": thumbnail_link,
            "text": data['content'],
            "tags": tags
        },
        "link":
        '/post/' + (str(data['title']).replace(' ', '-')).replace('?', '') +
        '-' + str(index)
    })

    nPost.add()

    for user in currentUser.followed:
        not_id = str(db.session.execute(Sequence('notification_id_seq')))
        notification = Notification(
            id=int(not_id),
            author=currentUser.id,
            user=user.user,
            type=5,
            title=nPost.title,
            body='{} shared a new post'.format(currentUser.name),
            link=nPost.link + '?notification_id=' + str(not_id))
        send_notification(
            user.user, {
                'text': '{} shared a new post'.format(currentUser.name),
                'link': nPost.link + '?notification_id=' + str(not_id),
                'icon': currentUser.info.avatar_img,
                'id': int(not_id)
            })
        notification.add()

    return make_response(jsonify({
        'operation': 'success',
        'link': nPost.link
    }), 200)
Пример #22
0
def get_article_read_time_from_file(request, file_name: str):
    try:
        read_time = readtime.of_html(read_article_text(request, file_name))
        return read_time
    except Exception:
        return ''