Пример #1
0
def config_page():
    config_path = './web2kindle/config'
    configs = deepcopy(SCRIPT_CONFIGS)

    if request.method == 'GET':
        # 加载默认值
        for each_script in configs:
            path = os.path.join(config_path,
                                each_script['script_name'] + '.yml')
            a = load_config(path)
            for config_name, config_value in a.items():
                for each_config in each_script['configs']:
                    if each_config['config_name'] == config_name:
                        each_config['value'] = config_value
        return render_template('config.html', configs=configs)
    elif request.method == 'POST':
        new_config = {}

        form_data = request.form.to_dict()
        for k, v in form_data.items():
            if '_check' in k:
                new_config[k.replace('_check',
                                     '')] = form_data[k.replace('_check', '')]

        write_config(
            os.path.join(config_path, form_data['script_name'] + '.yml'),
            new_config)
        return Response()
Пример #2
0
    def __init__(self):
        self.CONFIG = load_config('./web2kindle/config/config.yml')
        self.log = Log('SendEmail2Kindle')

        try:
            self.username = self.CONFIG['EMAIL_USERNAME']
            self.password = self.CONFIG['PASSWORD']
            self.smtp_addr = self.CONFIG['SMTP_ADDR']
            self.kindle_addr = self.CONFIG['KINDLE_ADDR']
        except KeyError:
            self.log.log_it("无法实例化SendEmail2Kindle,请确保config.yml配置完整", 'ERROR')
            import os
            os._exit(1)

        self.sender = self.username
        self.sended = []
        self.client = smtplib.SMTP()
Пример #3
0
# !/usr/bin/env python
# coding: utf-8

import logging
import sys
import os
from logging.handlers import WatchedFileHandler
from functools import partial
from web2kindle.libs.utils import load_config

config = load_config('./web2kindle/config/config.yml')


class BaseLog(object):
    logger_dict = {}

    @staticmethod
    def log(logger_name, message, level):
        if level == 'INFO':
            BaseLog.get_logger(logger_name).info(message)
        elif level == 'DEBUG':
            BaseLog.get_logger(logger_name).debug(message)
        elif level == 'ERROR':
            BaseLog.get_logger(logger_name).error(message)
        elif level == 'WARN':
            BaseLog.get_logger(logger_name).warning(message)

    @staticmethod
    def get_logger(logger_name):
        if logger_name not in BaseLog.logger_dict:
            logger = logging.getLogger(logger_name)
Пример #4
0
import re
import time
from copy import deepcopy
from queue import Queue, PriorityQueue
from urllib.parse import urlparse
from bs4 import BeautifulSoup

from web2kindle import MAIN_CONFIG
from web2kindle.libs.crawler import Crawler, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.send_email import SendEmail2Kindle
from web2kindle.libs.utils import write, load_config, check_config, md5string
from web2kindle.libs.log import Log

SCRIPT_CONFIG = load_config('./web2kindle/config/guoke_scientific.yml')
LOG = Log("guoke_scientific")
API_URL = "http://www.guokr.com/apis/minisite/article.json?retrieve_type=by_subject&limit=20&offset={}&_=1508757235776"
DEFAULT_HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    '61.0.3163.100 Safari/537.36'
}
check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)
ARTICLE_ID_SET = set()


def main(start, end, kw):
    iq = PriorityQueue()
    oq = PriorityQueue()
    result_q = Queue()
Пример #5
0
import re
import time
from copy import deepcopy
from queue import Queue, PriorityQueue
from urllib.parse import urlparse, unquote
from bs4 import BeautifulSoup

from web2kindle import MAIN_CONFIG
from web2kindle.libs.crawler import Crawler, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.send_email import SendEmail2Kindle
from web2kindle.libs.utils import write, md5string, load_config, check_config, format_file_name
from web2kindle.libs.log import Log

SCRIPT_CONFIG = load_config('./web2kindle/config/jianshu_user.yml')
LOG = Log("jianshu_user")
DEFAULT_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
                  '61.0.3163.100 Safari/537.36'
}

check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)
ARTICLE_ID_SET = set()

ORDER_TOP = 'top'
ORDER_COMMENT = 'commented_at'
ORDER_ADD = 'added_at'
API_URL = 'https://www.jianshu.com/u/{}?order_by={}&page={}'
BASE_URL = 'https://www.jianshu.com/u/{}'
Пример #6
0
# Created on 2017/10/10 14:05
import os
import re
import time
from copy import deepcopy
from queue import Queue, PriorityQueue, Empty
from urllib.parse import urlparse, unquote
from web2kindle.libs.crawler import Crawler, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.send_email import SendEmail2Kindle
from web2kindle.libs.utils import write, md5string, load_config, check_config
from web2kindle.libs.log import Log
from bs4 import BeautifulSoup

SCRIPT_CONFIG = load_config('./web2kindle/config/zhihu_zhuanlan_config.yml')
MAIN_CONFIG = load_config('./web2kindle/config/config.yml')
LOG = Log("zhihu_zhuanlan")
DEFAULT_HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    '61.0.3163.100 Safari/537.36'
}

check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)


def main(zhuanlan_name_list, start, end, kw):
    iq = PriorityQueue()
    oq = PriorityQueue()
    result_q = Queue()
Пример #7
0
# !/usr/bin/env python
# -*- encoding: utf-8 -*-
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
# Author: Vincent<*****@*****.**>
#         http://wax8280.github.io
# Created on 2017/10/11 7:48
from web2kindle.libs.utils import load_config

CRAWLER_CONFIG = load_config('./web2kindle/config/config.yml')
Пример #8
0
import datetime
import traceback
import time
from copy import deepcopy
from queue import Queue, PriorityQueue
from urllib.parse import urlparse

from web2kindle.libs.crawler import Crawler, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.send_email import SendEmail2Kindle
from web2kindle.libs.utils import write, format_file_name, load_config, check_config, md5string
from web2kindle.libs.log import Log
from bs4 import BeautifulSoup

SCRIPT_CONFIG = load_config('./web2kindle/config/qdaily_config.yml')
MAIN_CONFIG = load_config('./web2kindle/config/config.yml')
LOG = Log("qdaily_home")
API_URL = 'https://www.qdaily.com/homes/articlemore/{}.json'
DEFAULT_HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    '61.0.3163.100 Safari/537.36'
}
check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)
API_BUSINESS = 'https://www.qdaily.com/categories/categorymore/18/{}.json'
API_INTELLIGENT = 'https://www.qdaily.com/categories/categorymore/4/{}.json'
API_DESIGN = 'https://www.qdaily.com/categories/categorymore/17/{}.json'
API_FASHION = 'https://www.qdaily.com/categories/categorymore/19/{}.json'
API_ENTERTAINMENT = 'https://www.qdaily.com/categories/categorymore/3/{}.json'
API_CITY = 'https://www.qdaily.com/categories/categorymore/5/{}.json'
Пример #9
0
import re
import time
from copy import deepcopy
from queue import Queue, PriorityQueue
from urllib.parse import urlparse, unquote
from bs4 import BeautifulSoup

from web2kindle import MAIN_CONFIG
from web2kindle.libs.crawler import Crawler, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.send_email import SendEmail2Kindle
from web2kindle.libs.utils import write, md5string, load_config, check_config, format_file_name
from web2kindle.libs.log import Log

SCRIPT_CONFIG = load_config('./web2kindle/config/jianshu_zhuanti.yml')
LOG = Log("jianshu_zhuanti")
DEFAULT_HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    '61.0.3163.100 Safari/537.36'
}

check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)
ARTICLE_ID_SET = set()

ORDER_TOP = 'top'
ORDER_COMMENT = 'commented_at'
ORDER_ADD = 'added_at'
API_URL = 'https://www.jianshu.com/c/{}?order_by={}&page={}'
BASE_URL = 'https://www.jianshu.com/c/{}'
Пример #10
0
import time
from copy import deepcopy
from queue import Queue, PriorityQueue
from urllib.parse import urlparse, unquote
from bs4 import BeautifulSoup

from web2kindle import MAIN_CONFIG
from web2kindle.libs.crawler import Crawler, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.send_email import SendEmail2Kindle
from web2kindle.libs.utils import write, md5string, load_config, check_config, get_next_datetime_string, \
    compare_datetime_string, get_datetime_string
from web2kindle.libs.log import Log

SCRIPT_CONFIG = load_config('./web2kindle/config/zhihu_daily.yml')
LOG = Log("zhihu_daily")
DEFAULT_HEADERS = {
    'User-Agent':
    'DailyApi/4 (Linux; Android 4.4.2; SM-T525 Build/samsung/picassoltezs/picassolte/KOT49H/zh_CN) '
    'Google-HTTP-Java-Client/1.22.0 (gzip) Google-HTTP-Java-Client/1.22.0 (gzip)'
}

check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)
ARTICLE_ID_SET = set()

TODAY_URL = 'http://news-at.zhihu.com/api/4/stories/latest'
# http://http://news-at.zhihu.com/api/4/stories/before/20180212
YESTERDAY_URL = 'http://news-at.zhihu.com/api/4/stories/before/{}'
IS_TODAY_URL = True
Пример #11
0
from copy import deepcopy
from queue import Queue, PriorityQueue
from threading import current_thread, active_count
from urllib.parse import urlparse, unquote
import time
from bs4 import BeautifulSoup

from web2kindle import MAIN_CONFIG
from web2kindle.libs.crawler import Crawler, md5string, RetryDownload, Task
from web2kindle.libs.db import ArticleDB
from web2kindle.libs.utils import write, load_config, check_config
from web2kindle.libs.html2kindle import HTML2Kindle
from web2kindle.libs.log import Log
from web2kindle.libs.send_email import SendEmail2Kindle

SCRIPT_CONFIG = load_config('./web2kindle/config/zhihu_collection.yml')
GET_BOOK_NAME_FLAG = False
LOG = Log('zhihu_collection')
DEFAULT_HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    '61.0.3163.100 Safari/537.36'
}
check_config(MAIN_CONFIG, SCRIPT_CONFIG, 'SAVE_PATH', LOG)
ARTICLE_ID_SET = set()


def main(collection_num_list, start, end, kw):
    iq = PriorityQueue()
    oq = PriorityQueue()
    result_q = Queue()
Пример #12
0
# !/usr/bin/env python
# -*- encoding: utf-8 -*-
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
# Author: Vincent<*****@*****.**>
#         http://wax8280.github.io
# Created on 2017/10/11 7:48
from web2kindle.libs.utils import load_config

MAIN_CONFIG = load_config('./web2kindle/config/config.yml')