Exemplo n.º 1
0
    def __init__(self):
        self.dburl = MongoUrl()
        self.dbarticle = MongoArticle()
        self.url_set = set()

        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36"
        }
Exemplo n.º 2
0
    def __init__(self, master):
        self.window = master
        sw = self.window.winfo_screenwidth()
        sh = self.window.winfo_screenheight()
        ww = 1400
        wh = 650
        x = (sw - ww) / 2
        y = (sh - wh) / 2
        self.window.geometry('%dx%d+%d+%d' % (ww, wh, x, y))  # 父容器大小
        self.threadnumVar = tk.IntVar()
        self.timeVar = tk.IntVar()
        self.save_pathVar = tk.StringVar()
        self.logMessage = JoinableQueue()
        self.errMessage = JoinableQueue()
        self.dbconf = MongoConfig()
        self.dburl = MongoUrl()
        self.dbarticle = MongoArticle()
        self.create_page()
        self.show_logs()

        self.asyCraler()
Exemplo n.º 3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__author__ = 'AJay'
__mtime__ = '2019/5/9 0009'

"""

from db import MongoArticle, MongoUrl
mu = MongoUrl()
ma = MongoArticle()

import time
from shortuuid import uuid
import os
#TODO:按照随机的文件名,导出每天的新闻到对应当天时间的文件夹中
'''
输入一个路径、如果路径存在、则使用路径、如果路径不存在则使用文件的路径。输出文件路径所在的位置
导出当天的新闻
'''


class EexportTxt():
    def __init__(self):
        self.base_path = os.path.abspath(os.path.dirname(__file__))
        self.ds = 0
        self.length_p = 30
        self.file_size = 300 * 1024  # 300k

    def _is_input_path(self, input_path):
        if not os.path.exists(input_path):  # 路径函数
Exemplo n.º 4
0
 def __init__(self):
     self.dburl = MongoUrl()
     self.dbarticle = MongoArticle()
     self.url_set = set()
     self.url_queue = queue.Queue()
     self.init_set()