Пример #1
0
import json
# import xlsxwriter
import xlrd
import xlwt
from xlutils.copy import copy  #支持对已经存在的文件进行读写
import requests

ask_sql_url = 'http://erp.btomorrow.cn/adminjson/adminjson/ERP_GetCrawlerTaskStatus'  # useless
post_res_url = 'http://erp.btomorrow.cn/adminjson/ERP_ReportPythonCrawlerTask'
fscapture = r'D:\fscapture\FSCapture.exe'

u = Util()
log_path = os.path.abspath('./logs/AliosExcel')
if not os.path.exists(log_path):
    os.makedirs(log_path)
logger = u.record_log(log_path, __name__)

real_ip = '139.224.116.116'
serv_parm = {
    'ip': real_ip,
    'user': '******',
    'pwd': 'hhmt@pwd@123',
    'dst_path': ''
}


class AliyunExcelSpider:
    def __init__(self, user_info):
        self.dst_path = '/data/python/%s/' % user_info['platform']
        self.dir_path = None
        self.line_path = None
Пример #2
0
import json
import os

from platform_crawler.utils.post_get import post
from platform_crawler.utils.utils import Util
from platform_crawler.spiders.pylib.kill_sth import stop_thread, kill_chrome_fscapture  # , clean_desk

spider_type = {}

from platform_crawler.spiders.CPA.qq_finacial_spider import QQFinancialSpider

get_task_url = 'http://erp.btomorrow.cn/adminjson/ERP_PubishCrawlerTask'
u = Util()
sd_path = os.path.abspath('./save_data')
log_path = os.path.abspath('./logs')
logger = u.record_log(log_path, 'YYBHLCPD')

# record the process id
pid = os.getpid()
with open('cm_main.pid', 'w') as pd:
    pd.write(str(pid))


# Run task process with a thread so that it could be strongly killed when it was running timeout
def run_process(task_name, args=None):
    args = args if args else ()
    task_object = task_name()  # 创建任务对象
    task_func = task_object.run_task  # 指定要执行的函数入口
    p = Thread(target=task_func, args=(args, ))
    start_time = time()
    kill_chrome_fscapture()