def __init__(self, config, seed): log.init_log('./log/MiniSpider') cp = ConfigParser.ConfigParser() cp.readfp(open(config)) self.conf_dic = dict(cp._sections) for key in self.conf_dic: self.conf_dic[key] = dict(cp._defaults, **self.conf_dic[key]) self.conf_dic[key].pop('__name__', None) urllib2.socket.setdefaulttimeout(float(self.conf_dic['spider']['crawl_timeout'])) self.seeds = ["http://pycm.baidu.com:8081/page3.html"] # self.seeds = ["http://www.sina.com.cn/"] self.urls = [] self.urlpool = {} for i in range(len(self.seeds)): u = url.Url(self.seeds[i], "") self.urls.append(u) self.urlpool[u.link] = 1 logging.info("init")
import string import time import os from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.header import Header from util.log import init_log from core.SMTP import SendMailDealer from config import * rstr = string.ascii_letters + string.digits RSTR = list(map(lambda x: x.encode(), rstr)) # str --> byte BASE_DIR = os.path.dirname(os.path.abspath(__file__)) LOG_FILE = BASE_DIR + '/log/smtp.log' logger = init_log(LOG_FILE) template = """--- INFO: This is an evaluation email sent by EmailTestTool to help email administrators to evaluate and strengthen their security. If you see this email, it means that you may are vulnerable to the email spoofing attacks. This email uses the {attack_name}({number}). ---------------------------------------------------------------------------------------------------- How to fix it:
# initiate evil nodes for i in range(honest_node_num, honest_node_num + evil_node_num): nodes.append(threading.Thread(target=attacking, args=(i, ))) i = 1 # start honest nodes for n in nodes: n.start() logging.info("node %d starts mining." % (i + 1)) i = i + 1 for n in nodes: n.join() if chain.check(): chain.print() logging.info("block chain length is: %d", chain.length()) logging.info("total evil blocks is: %d" % get_evil_node_block(honest_node_num)) test_growth_rate() print_miner_account() else: logging.warning("THE BLOCK CHAIN IS NOT VALID.") if __name__ == "__main__": init_log(honest_node_num=15, evil_node_num=0, difficulty="0000") simulate_pow(honest_node_num=15, evil_node_num=0, difficulty="0000") # change the arguments
response = Html_Downloader.download(url) if response is not None: proxy_list = html_parser.parse(response, parser) if proxy_list is not None: # 检查爬取到的proxy count, new = 0, 0 for proxy in proxy_list: count += 1 proxy_str = '%s:%s' % (proxy['ip'], proxy['port']) if proxy_str not in self.proxies_set: self.proxies_set.add(proxy_str) new += 1 self.sqlhelper.insert(proxy) self.url_count += 1 logger.info( '%d/%d -- <%s> 获取%d, 未记录的%d' % (self.url_count, self.url_total, url, count, new)) else: self.url_count += 1 logger.warning('%d/%d -- <%s> 解析数据错误' % (self.url_count, self.url_total, url)) else: self.url_count += 1 logger.warning('%d/%d -- <%s> 下载页面错误' % (self.url_count, self.url_total, url)) if __name__ == "__main__": init_log() start_proxy_crawl()
def start_proxy_crawl(): init_log('ip_proxy') crawl = ProxyCrawl() crawl.run()
#!/usr/bin/env python # -*- coding: utf-8 -*- from util import log from util import module_b if __name__ == "__main__": logger = log.init_log("./log/test", __name__) logger.info("Hello main") classb = module_b.ClassB() classb.method()
""" import requests import json import os import warnings import datetime import time import sys import traceback import commands from util import log from datetime import datetime warnings.filterwarnings("ignore") LOG = log.init_log("./log/download", __name__) """ QQ音乐自己找的接口 歌单的接口 (参数:disstid 歌单) https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&disstid=2973463430 歌手的接口 (参数:singermid) https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?format=json&singermid=000foEm54CJUqL&order=listen&begin=0&num=1000 歌手的热门歌曲: https://c.y.qq.com/rsc/fcgi-bin/fcg_order_singer_getnum.fcg?format=json&singermid=001oEyQf4Ub6s7" 搜索接口 https://c.y.qq.com/soso/fcgi-bin/client_search_cp?t=0&aggr=1&cr=1&catZhida=1&lossless=0&flag_qc=0&p=1&n=60&w=%E5%91%A8%E6%9D%B0%E4%BC%A6&format=json
def init_log(): log.init_log()
def __init__(self): with open('config.json', 'rb') as f: config = json.load(f) log_file = config.get('log').get('file') log.init_log(log_file)
# -*- coding:utf-8 -*- import logging from functools import partial import tornado.httpserver import tornado.ioloop import tornado.options import tornado.web from util import log from webRequest.docOcrTaskHandler import docOcrTaskHandler from webRequest.docTypeHandler import docTypeHandler log.init_log('./log/app') log.init_log('./log/access', logging.getLogger("tornado.access")) class Application(tornado.web.Application): def __init__(self): handlers = [(r"/v1/doc", docOcrTaskHandler), (r"/v1/docType", docTypeHandler)] settings = dict( autoescape=None, compress_whitespace=False, autoreload=False, debug=False, decompress_request=True, compress_response=True, ) tornado.web.Application.__init__(self, handlers, **settings)