def config_page(): config_path = './web2kindle/config' configs = deepcopy(SCRIPT_CONFIGS) if request.method == 'GET': # 加载默认值 for each_script in configs: path = os.path.join(config_path, each_script['script_name'] + '.yml') a = load_config(path) for config_name, config_value in a.items(): for each_config in each_script['configs']: if each_config['config_name'] == config_name: each_config['value'] = config_value return render_template('config.html', configs=configs) elif request.method == 'POST': new_config = {} form_data = request.form.to_dict() for k, v in form_data.items(): if '_check' in k: new_config[k.replace('_check', '')] = form_data[k.replace('_check', '')] write_config( os.path.join(config_path, form_data['script_name'] + '.yml'), new_config) return Response()
def __init__(self): self.CONFIG = load_config('./web2kindle/config/config.yml') self.log = Log('SendEmail2Kindle') try: self.username = self.CONFIG['EMAIL_USERNAME'] self.password = self.CONFIG['PASSWORD'] self.smtp_addr = self.CONFIG['SMTP_ADDR'] self.kindle_addr = self.CONFIG['KINDLE_ADDR'] except KeyError: self.log.log_it("无法实例化SendEmail2Kindle,请确保config.yml配置完整", 'ERROR') import os os._exit(1) self.sender = self.username self.sended = [] self.client = smtplib.SMTP()
# !/usr/bin/env python # coding: utf-8 import logging import sys import os from logging.handlers import WatchedFileHandler from functools import partial from web2kindle.libs.utils import load_config config = load_config('./web2kindle/config/config.yml') class BaseLog(object): logger_dict = {} @staticmethod def log(logger_name, message, level): if level == 'INFO': BaseLog.get_logger(logger_name).info(message) elif level == 'DEBUG': BaseLog.get_logger(logger_name).debug(message) elif level == 'ERROR': BaseLog.get_logger(logger_name).error(message) elif level == 'WARN': BaseLog.get_logger(logger_name).warning(message) @staticmethod def get_logger(logger_name): if logger_name not in BaseLog.logger_dict: logger = logging.getLogger(logger_name)
import re import time from copy import deepcopy from queue import Queue, PriorityQueue from urllib.parse import urlparse from bs4 import BeautifulSoup from web2kindle import MAIN_CONFIG from web2kindle.libs.crawler import Crawler, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.send_email import SendEmail2Kindle from web2kindle.libs.utils import write, load_config, check_config, md5string from web2kindle.libs.log import Log SCRIPT_CONFIG = load_config('./web2kindle/config/guoke_scientific.yml') LOG = Log("guoke_scientific") API_URL = "http://www.guokr.com/apis/minisite/article.json?retrieve_type=by_subject&limit=20&offset={}&_=1508757235776" DEFAULT_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/' '61.0.3163.100 Safari/537.36' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) ARTICLE_ID_SET = set() def main(start, end, kw): iq = PriorityQueue() oq = PriorityQueue() result_q = Queue()
import re import time from copy import deepcopy from queue import Queue, PriorityQueue from urllib.parse import urlparse, unquote from bs4 import BeautifulSoup from web2kindle import MAIN_CONFIG from web2kindle.libs.crawler import Crawler, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.send_email import SendEmail2Kindle from web2kindle.libs.utils import write, md5string, load_config, check_config, format_file_name from web2kindle.libs.log import Log SCRIPT_CONFIG = load_config('./web2kindle/config/jianshu_user.yml') LOG = Log("jianshu_user") DEFAULT_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/' '61.0.3163.100 Safari/537.36' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) ARTICLE_ID_SET = set() ORDER_TOP = 'top' ORDER_COMMENT = 'commented_at' ORDER_ADD = 'added_at' API_URL = 'https://www.jianshu.com/u/{}?order_by={}&page={}' BASE_URL = 'https://www.jianshu.com/u/{}'
# Created on 2017/10/10 14:05 import os import re import time from copy import deepcopy from queue import Queue, PriorityQueue, Empty from urllib.parse import urlparse, unquote from web2kindle.libs.crawler import Crawler, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.send_email import SendEmail2Kindle from web2kindle.libs.utils import write, md5string, load_config, check_config from web2kindle.libs.log import Log from bs4 import BeautifulSoup SCRIPT_CONFIG = load_config('./web2kindle/config/zhihu_zhuanlan_config.yml') MAIN_CONFIG = load_config('./web2kindle/config/config.yml') LOG = Log("zhihu_zhuanlan") DEFAULT_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/' '61.0.3163.100 Safari/537.36' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) def main(zhuanlan_name_list, start, end, kw): iq = PriorityQueue() oq = PriorityQueue() result_q = Queue()
# !/usr/bin/env python # -*- encoding: utf-8 -*- # vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: # Author: Vincent<*****@*****.**> # http://wax8280.github.io # Created on 2017/10/11 7:48 from web2kindle.libs.utils import load_config CRAWLER_CONFIG = load_config('./web2kindle/config/config.yml')
import datetime import traceback import time from copy import deepcopy from queue import Queue, PriorityQueue from urllib.parse import urlparse from web2kindle.libs.crawler import Crawler, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.send_email import SendEmail2Kindle from web2kindle.libs.utils import write, format_file_name, load_config, check_config, md5string from web2kindle.libs.log import Log from bs4 import BeautifulSoup SCRIPT_CONFIG = load_config('./web2kindle/config/qdaily_config.yml') MAIN_CONFIG = load_config('./web2kindle/config/config.yml') LOG = Log("qdaily_home") API_URL = 'https://www.qdaily.com/homes/articlemore/{}.json' DEFAULT_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/' '61.0.3163.100 Safari/537.36' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) API_BUSINESS = 'https://www.qdaily.com/categories/categorymore/18/{}.json' API_INTELLIGENT = 'https://www.qdaily.com/categories/categorymore/4/{}.json' API_DESIGN = 'https://www.qdaily.com/categories/categorymore/17/{}.json' API_FASHION = 'https://www.qdaily.com/categories/categorymore/19/{}.json' API_ENTERTAINMENT = 'https://www.qdaily.com/categories/categorymore/3/{}.json' API_CITY = 'https://www.qdaily.com/categories/categorymore/5/{}.json'
import re import time from copy import deepcopy from queue import Queue, PriorityQueue from urllib.parse import urlparse, unquote from bs4 import BeautifulSoup from web2kindle import MAIN_CONFIG from web2kindle.libs.crawler import Crawler, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.send_email import SendEmail2Kindle from web2kindle.libs.utils import write, md5string, load_config, check_config, format_file_name from web2kindle.libs.log import Log SCRIPT_CONFIG = load_config('./web2kindle/config/jianshu_zhuanti.yml') LOG = Log("jianshu_zhuanti") DEFAULT_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/' '61.0.3163.100 Safari/537.36' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) ARTICLE_ID_SET = set() ORDER_TOP = 'top' ORDER_COMMENT = 'commented_at' ORDER_ADD = 'added_at' API_URL = 'https://www.jianshu.com/c/{}?order_by={}&page={}' BASE_URL = 'https://www.jianshu.com/c/{}'
import time from copy import deepcopy from queue import Queue, PriorityQueue from urllib.parse import urlparse, unquote from bs4 import BeautifulSoup from web2kindle import MAIN_CONFIG from web2kindle.libs.crawler import Crawler, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.send_email import SendEmail2Kindle from web2kindle.libs.utils import write, md5string, load_config, check_config, get_next_datetime_string, \ compare_datetime_string, get_datetime_string from web2kindle.libs.log import Log SCRIPT_CONFIG = load_config('./web2kindle/config/zhihu_daily.yml') LOG = Log("zhihu_daily") DEFAULT_HEADERS = { 'User-Agent': 'DailyApi/4 (Linux; Android 4.4.2; SM-T525 Build/samsung/picassoltezs/picassolte/KOT49H/zh_CN) ' 'Google-HTTP-Java-Client/1.22.0 (gzip) Google-HTTP-Java-Client/1.22.0 (gzip)' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) ARTICLE_ID_SET = set() TODAY_URL = 'http://news-at.zhihu.com/api/4/stories/latest' # http://http://news-at.zhihu.com/api/4/stories/before/20180212 YESTERDAY_URL = 'http://news-at.zhihu.com/api/4/stories/before/{}' IS_TODAY_URL = True
from copy import deepcopy from queue import Queue, PriorityQueue from threading import current_thread, active_count from urllib.parse import urlparse, unquote import time from bs4 import BeautifulSoup from web2kindle import MAIN_CONFIG from web2kindle.libs.crawler import Crawler, md5string, RetryDownload, Task from web2kindle.libs.db import ArticleDB from web2kindle.libs.utils import write, load_config, check_config from web2kindle.libs.html2kindle import HTML2Kindle from web2kindle.libs.log import Log from web2kindle.libs.send_email import SendEmail2Kindle SCRIPT_CONFIG = load_config('./web2kindle/config/zhihu_collection.yml') GET_BOOK_NAME_FLAG = False LOG = Log('zhihu_collection') DEFAULT_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/' '61.0.3163.100 Safari/537.36' } check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG) ARTICLE_ID_SET = set() def main(collection_num_list, start, end, kw): iq = PriorityQueue() oq = PriorityQueue() result_q = Queue()
# !/usr/bin/env python # -*- encoding: utf-8 -*- # vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: # Author: Vincent<*****@*****.**> # http://wax8280.github.io # Created on 2017/10/11 7:48 from web2kindle.libs.utils import load_config MAIN_CONFIG = load_config('./web2kindle/config/config.yml')