示例#1
0
    def __init__(self, user_list, cookie_list=None):
        """

        :param user_list: a list of users
        :param cookie_list: a list of cookies, default to be None
        :return:
        """
        if cookie_list is not None:
            self.cookies = cookie_list
            self.fetchers = []
            self.get_fetchers_by_cookie()
            self.parser = HtmlParser()
        else:  # need login by users
            self.users = user_list
            self.fetchers = []
            self.get_fetchers_by_user()
            self.parser = HtmlParser()
            self.database = Database()

        self.main_fetcher = 0  # current fetcher index
        self.follower_list = []  # store followers
        self.followee_list = []  # store followees
        self.timeline_list = []  # store timelines
        self.profile_list = []  # store profiles
        self.start_time = datetime.now()
        self.end_time = None
示例#2
0
def run(key):
    url = set_url(host, key)
    Cookies()
    spider = Spider(url)
    html = spider.spider(BASEHEADERS)
    if not verify(html):
        BASEHEADERS["Cookie"] = BASEHEADERS["Cookie"] + Cookies.cookie_str(
            ["acw_tc", "PHPSESSID"])
        proxieser.proxies()
    parser = HtmlParser(html)
    data = parser.parser("fund")

    print(data)
示例#3
0
    def __init__(self):
        #初始化分布式进程工作节点的连接工程
        #实现第一步,使用BaseManager注册用于获取Queue的方法名称
        BaseManager.register('get_task_queue')
        BaseManager.register('get_result_queue')
        BaseManager.register('get_page_queue')
        BaseManager.register('get_data_queue')
        sever_addr = '127.0.0.1'

        print('Connect to sever %s...' % sever_addr)
        self.m = BaseManager(address=(sever_addr, 8001),
                             authkey='yuan'.encode('utf-8'))
        self.m.connect()
        self.task = self.m.get_task_queue()
        self.result = self.m.get_result_queue()
        self.page = self.m.get_page_queue()
        self.data = self.m.get_data_queue()
        self.downloader = HtmlDownloader()
        self.parser = HtmlParser()
        print('*--------------------------------------------*')
        print('初始化完成')
        print('*--------------------------------------------*')
示例#4
0
def get_title(response):
    title = HtmlParser(response).parser("title")
    # parser = HtmlParser(response)
    # title = parser.parser("title")

    return title
示例#5
0
 def __init__(self):
     self.downloader = HtmlDownloader()
     self.parser = HtmlParser()
     self.output = DataOutput()