def save_website(url, file_name):
    """Takes a screenshot with puppeteer and saves it as a PDF"""
    try:
        LOGGER.info("puppeteer print for {}".format(url))
        target_path = config.get_config()['pdf_target_path']
        os.makedirs(target_path, exist_ok=True)
        file_path = os.path.join(target_path, file_name)
        LOGGER.info("generating PDF file with name {}".format(file_name))
        tmplt = config.get_config()['puppeteer_template']
        cmd = tmplt.format(url, str(file_path)).split(' ')
        LOGGER.info("calling puppeteer script as -- {}".format(cmd))
        return_code = call(cmd)
        return return_code
    except Exception as e:
        LOGGER.error(e)
def save_pdf(html, file_name):
    """Save Markdown variant as PDF"""
    try:
        target_path = config.get_config()['pdf_target_path']
        os.makedirs(target_path, exist_ok=True)
        file_path = os.path.join(target_path, file_name)
        LOGGER.info("generating PDF file with name {}".format(file_name))
        pdfkit.from_string(html, file_name)
    except Exception as e:
        LOGGER.error(e)
def save_html(html: str, file_name: str):
    """Saves given html as such, directly as html file"""
    try:
        target_path = config.get_config()['html_target_path']
        os.makedirs(target_path, exist_ok=True)
        file_path = os.path.join(target_path, file_name)
        LOGGER.info("saving html file with name {}".format(file_name))
        with open(file_path, mode='w') as f:
            f.write(html)
    except Exception as e:
        LOGGER.error(e)
def save_pdf_directly(response):
    target_path = config.get_config()['pdf_target_path']
    os.makedirs(target_path, exist_ok=True)
    title = re.sub("[\W]", "", response.url.strip())
    file_name = get_fn_from_header(response)
    if file_name is None:
        file_name = "{}.pdf".format(title)

    # making it host specific
    host = urlparse(response.url).netloc
    file_name = host + file_name
    file_path = os.path.join(target_path, file_name)
    if os.path.exists(file_path):
        return
    else:
        print('Saving PDF {}'.format(file_path))
        with open(file_path, 'wb') as f:
            f.write(response.body)
示例#5
0
def try_parse_url(url):
    try:
        req_url = url_template.format(parse.quote_plus(url))
        LOGGER.info("mercury outgoing request -- {}".format(req_url))
        req = request.Request(req_url)
        req.add_header('x-api-key', config.get_config()['mercury'])
        req.add_header('Content-Type', 'application/json')
        response = request.urlopen(req)
        raw_content = response.read()
        LOGGER.info("mercury parsing complete for {}".format(url))
        json_string = raw_content.decode('utf-8')
        summary = json.JSONDecoder().decode(json_string)
        return summary
    except Exception as e:
        LOGGER.error(e)
        LOGGER.warning("might have reached a timeout. waiting a bit")
        LOGGER.warning(url)
        time.sleep(120)
        return None
示例#6
0
from components import telegram_conn, mercury, website, pdf_maker, config
import json
import logging

#configure logging
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')



def startup():
    telegram_conn.start_polling()


#blocks until disconnect
if __name__ == "__main__":
    telegram_conn.init(config.get_config()['telegram'])
    startup()
示例#7
0
from urllib import request, parse
import json
import logging
from components import config, website, mercury

LOGGER = logging.getLogger(__name__)

consumer_key = config.get_config()['pocket']

urls = {
    "auth_post": "https://getpocket.com/v3/oauth/request",
    "get_articles": "https://getpocket.com/v3/get",
    "get_access_token": "https://getpocket.com/v3/oauth/authorize"
}
headers = {
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF8",
    "X-Accept": "application/json"
}


def get_request_token() -> str:
    params = [('consumer_key', consumer_key),
              ('redirect_uri', 'https://pascalbrokmeier.de')]
    params_bytes = parse.urlencode(params).encode()
    req = request.Request(urls['auth_post'],
                          method="POST",
                          data=params_bytes,
                          headers=headers)
    json_string = request.urlopen(req).read().decode('utf-8')
    return json.JSONDecoder().decode(json_string)['code']
示例#8
0
import json
import os
from datetime import date

from components import config
web_root = config.get_config()['website_root']

import logging
LOGGER = logging.getLogger(__name__)

def add_json_summary(summary):
    """takes a json summary from mercury and adds it to the websites list of read things
    """
    summary = process_summary(summary)
    if summary is None:
        return
    _add_summary(summary)

def process_summary(summary: dict):
    """TODO: Docstring for process_summary.

    :summary: dict: TODO
    :returns: TODO

    """
    try:
        del summary['content']
        summary['date_read'] = date.today().isoformat()
    except Exception as e:
        LOGGER.error(e)
        LOGGER.error(summary)