Exemplo n.º 1
0
    def __UA_IP(self, signal):
        '''
        get fake ua ip
        '''
        ua_ip = SockCUA_IP()
        ua_ip.state = 1
        ua_ip.num = 5
        package = ua_ip.make_package()
        self.__send(package)
        ua_ip.state = 2
        package = ua_ip.make_package()
        self.__send(package)
        if self.heart_cnt % 3 == 2:
            print_plus('send UA IP')

        def task():
            package = SockHead()
            package.type = 1
            package.opcode = 20000
            self.__send(package.make_package())
            print_plus('send get task')

        if self.task_timer:
            return
        self.task_timer = threading.Timer(10, task)
        self.task_timer.setName(True)
        self.setName('get_task_timer')
        self.task_timer.start()
Exemplo n.º 2
0
 def __cookies(self, package):
     cookies = SockSCookies(package)
     signal = KidSignal(sub_model_opcode=SubModelOpcode.crawler_model,
                        opcode=CrawlerOpcode.cookies,
                        data=cookies)
     self.__add_signal(signal)
     print_plus('recv SockSCookies')
Exemplo n.º 3
0
 def __storage(self, dic):
     task = dic['task']
     item = dic['item']
     if item is None:
         self.__task_state_feedback(task, 10)
         return
     key = item['basic']['key']
     self.__task_state_feedback(task, 5)
     table_name = str(task.attr_id)
     pid = str(task.pid)
     if kid_setting.CRAWLER_TYPE == 2:
         table_name += '_detail'
     ret = storage_manager.write_data(item, table_name, pid, task.storage)
     if not ret:
         print_plus('Storage result failed', level=2)
         self.__task_state_feedback(task, 10)
         return
     self.__task_state_feedback(task, 6)
     if task.storage == 2:  #FTP
         table_name = table_name + '/' + pid
     self.storage_list.append({
         'job_id': task.job_id,
         'attr_id': task.attr_id,
         'table': table_name,
         'key': key,
         'depth': task.depth,
         'cur_depth': task.cur_depth,
         'storage': task.storage
     })
     if len(self.storage_list) >= 10:
         self.__send_msg()
Exemplo n.º 4
0
 def __get_cookies(self, signal):
     cookies = SockCCookies()
     cookies.amount = 5
     cookies.attr_id = signal.data
     package_info = cookies.make_package()
     print_plus('send SockCCookies:%d' % cookies.attr_id)
     self.__send(package_info)
Exemplo n.º 5
0
 def __manager_registting(self, signal):
     '''
     register manager
     '''
     package = SockCManagerREG()
     package_info = package.make_package()
     self.__send(package_info)
     print_plus('send SockCManagerREG')
Exemplo n.º 6
0
 def __device_info(self, signal):
     '''
     feedback device info
     '''
     package = SockCDeviceInfo()
     package_info = package.make_package()
     self.__send(package_info)
     print_plus('send SockCDeviceInfo')
Exemplo n.º 7
0
 def __task_num(self, signal):
     '''
     feedback task num
     '''
     package = SockCTaskNum()
     package.task_num = signal.data
     self.__send(package.make_package())
     print_plus('send SockCTaskNum: %d' % package.task_num)
Exemplo n.º 8
0
 def __task_amount(self, signal):
     '''
     task process
     '''
     package = SockCCrawlingAmount()
     package_info = package.make_package()
     self.__send(package_info)
     print_plus('send SockCCrawlingAmount')
Exemplo n.º 9
0
 def __heart(self, signal):
     '''
     heart
     '''
     if self.heart_cnt % 3 == 2:
         print_plus('send heart')
     self.heart_cnt += 1
     self.__send(signal.data)
Exemplo n.º 10
0
 def __manager_reg_success(self, package):
     '''
     reg success
     '''
     package_info = SockSManagerREG(package)
     current_device_manager.set_manager_id(package_info.manager_id)
     current_device_manager.set_token(package_info.token)
     print_plus('recv SockSManagerREG')
     self.__getting_UA_IP()
Exemplo n.º 11
0
 def __task_process(self, package):
     '''
     get task process
     '''
     package_info = SockSCrawlingAmount(package)
     print package_info.opcode
     print package_info.manager_id
     print package_info.job_id
     print_plus('recv SockSCrawlingAmount')
Exemplo n.º 12
0
 def __sock_work(self, signal):
     '''
     work
     '''
     if signal.opcode not in self.send_selector.keys():
         print_plus('sock_err', file_line=True, level=2)
         return
     send_method = self.send_selector[signal.opcode]
     if send_method:
         send_method(signal)
Exemplo n.º 13
0
 def __send(self, package):
     '''
     send to server
     '''
     if self.caller.caller and package:
         try:
             self.caller.caller.transport.getHandle().sendall(package)
         except Exception, e:
             err = 'send except:%s' % e
             print_plus(err, file_line=True, level=2)
Exemplo n.º 14
0
 def __distribute_task(self, package):
     '''
     distribute task
     '''
     package_info = SockSDistributeTask(package)
     signal = KidSignal(sub_model_opcode=SubModelOpcode.crawler_model,
                        opcode=CrawlerOpcode.task_distribute,
                        data=package_info)
     self.__add_signal(signal)
     print_plus('recv SockSDistributeTask')
Exemplo n.º 15
0
 def __hbase_daemon(self):
     cnt = 0
     while True:
         if cnt >= 5:
             try:
                 self.client.getTableNames()
             except Exception, e:
                 print_plus(content='Hbase Check Alive Failed', level=1)
                 print_plus(content=e, level=1)
                 self.__reconnect_hbase()
             finally:
Exemplo n.º 16
0
 def get_row(self, table=None, rowkey=None):
     '''
     get row
     '''
     if not self.state:
         return
     if table and rowkey:
         try:
             return self.client.getRow(table, rowkey, None)
         except Exception,e:
             print_plus('GetRowExcept: table:%s  %s' % (table, e), level=2)
Exemplo n.º 17
0
 def __getting_device_info(self, package):
     '''
     get device info
     '''
     package_info = SockSDeviceInfo(package)
     print package_info
     print_plus('recv SockSDeviceInfo')
     # TODO 发送设备当前使用情况
     signal = KidSignal(sub_model_opcode=SubModelOpcode.sock_send_model,
                        opcode=KidSockOpcode.c_device_info)
     self.__add_signal(signal)
Exemplo n.º 18
0
 def get(self, table=None, rowkey=None):
     '''
     get row
     '''
     if not self.state:
         return HbaseManager.ConnectError
     if table and rowkey:
         try:
             return self.client.getRow(table, rowkey, None)
         except Exception, e:
             print_plus(content='GetRowExcept: table:%s  %s' % (table, e),
                        level=1)
             return HbaseManager.GetError
Exemplo n.º 19
0
 def __write(self, table, key, value):
     '''
     write
     '''
     mutations = []
     for f in value.keys():
         values = value[f]
         for c in values.keys():
             mutation = Mutation(column='%s:%s' % (f, c), value=values[c])
             mutations.append(mutation)
     try:
         self.client.mutateRow(table, key, mutations, None)
         print_plus('write to hbase success: %s<=>%s' % (table, key))
     except Exception,e:
         print_plus(e, level=2)
Exemplo n.º 20
0
 def __init__(self, host=None, port=None, timeout=15000):
     '''
     Constructor
     '''
     self.state = 0
     self.transport = TSocket.TSocket(host, port)
     self.transport.setTimeout(timeout)
     self.transport = TTransport.TBufferedTransport(self.transport)
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Hbase.Client(self.protocol)
     try:
         self.transport.open()
         self.state = 1
         print_plus('HBase Init Succssed')
     except:
         print_plus('HBase Init Failed', level=1)
Exemplo n.º 21
0
def main():
    '''
    启动爬虫客户端
    '''
    setproctitle.setproctitle(kid_setting.CLIENT_PROC_NAME)
    client_factory = KidClientFactory()
    reactor.__init__()  # @UndefinedVariable
    reactor.suggestThreadPoolSize(25)  # @UndefinedVariable
    connector = reactor.connectTCP(
        kid_setting.SERVER_IP,  # @UndefinedVariable
        kid_setting.SERVER_PORT,
        client_factory)
    connector.transport.getHandle().setsockopt(socket.SOL_SOCKET,
                                               socket.SO_SNDBUF, 4096 * 100)
    print_plus('IP:%s\tPort:%s\tHBase IP:%s\tCrawler Type:%d' %
               (kid_setting.SERVER_IP, kid_setting.SERVER_PORT,
                kid_setting.HBASE_HOST, kid_setting.CRAWLER_TYPE))
    reactor.run()  # @UndefinedVariable
Exemplo n.º 22
0
 def __manage_task(self, signal):
     '''
     task manager
     '''
     package = signal.data
     for task in package.tasks:
         if task.job_id in self.task_id_list:
             continue
         else:
             self.task_pool.append(task)
             self.task_id_list.add(task.job_id)
     for task in package.tasks:
         self.__task_state_feedback(task, 2)
     self.task_cnt += len(self.task_pool)
     try:
         self.__exec_task()
     except Exception, e:
         print_plus('%s' % e, True, True, 2)
Exemplo n.º 23
0
    def __timer_manager(self):
        cnt = 1
        while kid_setting.CONNECT and self.is_alive():
            if cnt % 59 == 0:
                print_plus('__timer_manager is alive')
            if cnt % 29 == 0:
                self.cookies_request_list = []
            if cnt % 4 == 0:
                if len(self.storage_list) > 0:
                    self.__send_msg()
            if cnt % 14 == 0:
                task_cnt = 0
                mqs = self.spider.crawler.engine.slot.scheduler.mqs.queues.get(
                    0, None)
                """
                修改成返回当前任务数
                """
                if mqs:
                    task_cnt = len(mqs.q)
                else:
                    task_cnt = 0
                """
                if mqs:
                    if len(mqs.q) < kid_setting.DEVICE_MAX_TASK:
                        task_cnt = kid_setting.DEVICE_MAX_TASK - len(mqs.q)
                else:
                    task_cnt = kid_setting.DEVICE_MAX_TASK

                """

                signal = KidSignal(
                    sub_model_opcode=SubModelOpcode.sock_send_model,
                    opcode=KidSockOpcode.c_task_num,
                    data=task_cnt)
                self.caller.add_signal(signal)
                signal = KidSignal(
                    sub_model_opcode=SubModelOpcode.sock_send_model,
                    opcode=KidSockOpcode.c_achieve_UA_IP)
                self.caller.add_signal(signal)
            time.sleep(1)
            cnt += 1
Exemplo n.º 24
0
 def __create_table(self, table, columns_name):
     '''
     create table
     '''
     try:
         if table not in self.tables:
             self.tables = self.client.getTableNames()
         if table not in self.tables:
             cols = []
             for column_name in columns_name:
                 col = ColumnDescriptor(name='%s:' % column_name,
                                        maxVersions=1)
                 cols.append(col)
             try:
                 self.client.createTable(table, cols)
             except Exception, e:
                 print_plus(content=e, level=1)
                 return HbaseManager.CreateError
     except Exception, e:
         print_plus(content=e, level=1)
         return HbaseManager.ConnectError
Exemplo n.º 25
0
 def __data_saved_info(self, signal):
     '''
     storage info
     '''
     tasks_info = signal.data
     package = SockCDataSaveInfo()
     for info in tasks_info:
         storage_info = StorageInfo()
         storage_info.key = info['key']
         storage_info.name = info['table']
         storage_info.job_id = info['job_id']
         storage_info.attr_id = info['attr_id']
         storage_info.depth = info['depth']
         storage_info.cur_depth = info['cur_depth']
         package.storages.append(storage_info)
     if info['storage'] == 3:
         package.opcode = KidSockOpcode.c_ftp_save_info
     package_info = package.make_package()
     self.__send(package_info)
     print_plus('send SockCDataSaveInfo [%d][%d]' %
                (len(package.storages), len(package_info)))
Exemplo n.º 26
0
 def __init__(self, host=None, port=None, timeout=15000):
     '''
     Constructor
     '''
     self.sock = TSocket.TSocket(host, port)
     self.sock.setTimeout(timeout)
     self.transport = TTransport.TBufferedTransport(self.sock)
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Hbase.Client(self.protocol)
     try:
         self.transport.open()
         self.state = 1
         self.tables = self.client.getTableNames()
         print_plus(content='HBase Init Succssed')
     except:
         self.state = 0
         print_plus(content='HBase Init Failed', level=1)
     finally:
         self.connection_check_thread = threading.Thread(
             target=self.__hbase_daemon, name='hbase_daemon')
         self.connection_check_thread.setDaemon(True)
         self.connection_check_thread.start()
Exemplo n.º 27
0
 def __reconnect_hbase(self):
     self.transport.close()
     try:
         self.transport.open()
         self.state = 1
         print_plus(content='HBase Init Succssed')
     except Exception, e:
         print_plus(content='HBase Init Failed', level=1)
         print_plus(content=e, level=1)
         self.state = 0
Exemplo n.º 28
0
 def task():
     package = SockHead()
     package.type = 1
     package.opcode = 20000
     self.__send(package.make_package())
     print_plus('send get task')
Exemplo n.º 29
0
        self.client.scannerClose(scanner)
        return results

    def get_row(self, table=None, rowkey=None):
        '''
        get row
        '''
        if not self.state:
            return
        if table and rowkey:
            try:
                return self.client.getRow(table, rowkey, None)
            except Exception,e:
                print_plus('GetRowExcept: table:%s  %s' % (table, e), level=2)
        else:
            print_plus('get row error', level=2)

    def __create_table(self, table, columns_name):
        '''
        create table
        '''
        tables = self.client.getTableNames()
        if table not in tables:
            cols = []
            for column_name in columns_name:
                col = ColumnDescriptor(name='%s:'%column_name, maxVersions=1)
                cols.append(col)
            try:
                self.client.createTable(table, cols)
            except Exception,e:
                print e
Exemplo n.º 30
0
 def __error(self, package):
     error = SockSError(package)
     print_plus('sock error:%d' % error.error_code)