示例#1
0
class TableExtractor:
    def __init__(self, node_to_extract):
        self.node_to_extract = node_to_extract
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id, None)
        self.querier = Querier()
        self.next_level = 159
        self.last_extraction_ts = 0

    def on_stop(self):
        pass  #self._experimental_m.on_stop()

    def main_loop(self):
        msgs_to_send = []
        current_time = time.time()
        if current_time > self.last_extraction_ts + 1:
            fn_msg = self.msg_f.outgoing_find_node_query(
                self.node_to_extract,
                self.node_to_extract.id.generate_close_id(self.next_level),
                None, None)
            if PING:
                fn_msg = self.msg_f.outgoing_ping_query(
                    self.node_to_extract, None)
            msgs_to_send.append(fn_msg)
            self.last_extraction_ts = current_time
            self.next_level -= 1
        # Take care of timeouts
        (self._next_timeout_ts,
         timeout_queries) = self.querier.get_timeout_queries()
        for query in timeout_queries:
            print 'TIMEOUT'
        timeout_call_ts, datagrams_to_send = self.querier.register_queries(
            msgs_to_send)
        return self.last_extraction_ts + 1, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        datagrams_to_send = []
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except (message.MsgError):
            # ignore message
            return self._next_main_loop_call_ts, datagrams_to_send

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            if related_query and related_query.query == message.FIND_NODE:
                print 'level', version_repr(msg.version)
                for node_ in msg.nodes:
                    print node_
            else:
                print 'not related'
        return self.last_extraction_ts + 1, datagrams_to_send
示例#2
0
class TableExtractor:
    def __init__(self, node_to_extract):
        self.node_to_extract = node_to_extract
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id, None)
        self.querier = Querier()
        self.next_level = 159
        self.last_extraction_ts = 0

    def on_stop(self):
        pass  # self._experimental_m.on_stop()

    def main_loop(self):
        msgs_to_send = []
        current_time = time.time()
        if current_time > self.last_extraction_ts + 1:
            fn_msg = self.msg_f.outgoing_find_node_query(
                self.node_to_extract, self.node_to_extract.id.generate_close_id(self.next_level), None, None
            )
            if PING:
                fn_msg = self.msg_f.outgoing_ping_query(self.node_to_extract, None)
            msgs_to_send.append(fn_msg)
            self.last_extraction_ts = current_time
            self.next_level -= 1
        # Take care of timeouts
        (self._next_timeout_ts, timeout_queries) = self.querier.get_timeout_queries()
        for query in timeout_queries:
            print "TIMEOUT"
        timeout_call_ts, datagrams_to_send = self.querier.register_queries(msgs_to_send)
        return self.last_extraction_ts + 1, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        datagrams_to_send = []
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except (message.MsgError):
            # ignore message
            return self._next_main_loop_call_ts, datagrams_to_send

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            if related_query and related_query.query == message.FIND_NODE:
                print "level", version_repr(msg.version)
                for node_ in msg.nodes:
                    print node_
            else:
                print "not related"
        return self.last_extraction_ts + 1, datagrams_to_send
示例#3
0
class Crawler(object):
    def __init__(self, bootstrap_nodes):
        self.rcrawler = RCrawler(set(), set(), START_PREFIX_LEN,
                                 bootstrap_nodes[0].id)
        self.rcrawler.got_nodes_handler(None, bootstrap_nodes)
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id, None)
        self.querier = Querier()
        self.next_main_loop_ts = 0
        self.num_msgs = 0
        self.ok_nodes = set()
        self.dead_nodes = set()

    def on_stop(self):
        pass

    def main_loop(self):
        self.next_main_loop_ts = time.time() + .1
        if self.rcrawler.done:
            print 'ind | ok dead | ok dead'
            self.rcrawler.print_result()
            print self.rcrawler.get_num_ok(), self.rcrawler.get_num_dead()
            print self.num_msgs, 'messages sent'
            for n in sorted(self.ok_nodes, key=attrgetter('ip')):
                print n
            return
        msgs_to_send = []
        node_, target, rcrawler_obj = self.rcrawler.next()
        if target:
            msg = self.msg_f.outgoing_find_node_query(node_, target, None,
                                                      rcrawler_obj)
            #print 'target', `target`, 'to node', `node_.id`
            #print 'sending query to', extracting_node.node,
            #print extracting_node.node.id.log_distance(TARGET)
            msgs_to_send.append(msg)
            # Take care of timeouts
            (self._next_timeout_ts,
             timeout_queries) = self.querier.get_timeout_queries()
            for related_query in timeout_queries:
                #print 'timeout'
                related_query.experimental_obj.timeout_handler(
                    related_query.dst_node)
                self.dead_nodes.add(related_query.dst_node)
        if msgs_to_send:
            timeout_call_ts, datagrams_to_send = self.querier.register_queries(
                msgs_to_send)
        else:
            datagrams_to_send = []
        self.num_msgs += len(datagrams_to_send)
        if datagrams_to_send and self.num_msgs % PRINT_DOT_EACH == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        return self.next_main_loop_ts, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except (message.MsgError):
            # ignore message
            return self.next_main_loop_ts, []

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            #print 'got reply',
            if related_query and related_query.experimental_obj:
                #print 'related >>>>>>>>>>>>>>>>>>>>>>', len(msg.nodes)
                nodes = msg.all_nodes
                node_ = msg.src_node
                related_query.experimental_obj.got_nodes_handler(node_, nodes)
                self.ok_nodes.add(node_)
        return self.next_main_loop_ts, []  #datagrams_to_send
示例#4
0
class Crawler(object):

    def __init__(self, bootstrap_nodes):
        self.target = bootstrap_nodes[0].id
        target_prefix = self.target.get_prefix(START_PREFIX_LEN)
        print target_prefix
        self.rcrawler = RCrawler(target_prefix)
        for n in bootstrap_nodes:
            self.rcrawler.found_node_handler(n)
        self.pending_nodes = bootstrap_nodes
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id,
                                        None)
        self.querier = Querier()
        self.next_main_loop_ts = 0
        self.num_msgs = 0
        self.known_nodes = set(bootstrap_nodes)
        self.ok_nodes = set()
        self.dead_nodes = set()
        self.last_msg_ts = time.time()
                        
    def on_stop(self):
        pass

    def main_loop(self):
        self.next_main_loop_ts = time.time() + EXTRACTION_DELAY
        if time.time() > self.last_msg_ts + 4:# self.rcrawler.done:
            print 'ind | ok dead | ok dead'
            self.rcrawler.print_result()
            print 'total OK/DEAD', len(self.rcrawler.ok_nodes),
            print len(self.rcrawler.dead_nodes)
            print self.num_msgs, 'messages sent'
            for n in sorted(self.ok_nodes, key=attrgetter('ip')):
                print n
            return
        target = None
        msgs_to_send = []
        if ((self.num_msgs < 20 and self.num_msgs % 5 == 0)
            or (self.num_msgs < 100 and self.num_msgs % 10 == 0)
            or (self.num_msgs > 100 and self.num_msgs % 20 == 0)
            or (self.num_msgs > 100 and not self.pending_nodes)):
            dst_node, target = self.rcrawler.next_bootstrap_msg()
            if target:
                print 'O',
            else:
                print 'F',
        if not target and self.pending_nodes:
            dst_node = self.pending_nodes.pop(0)
            if dst_node.id.bin_str.startswith(self.rcrawler.target_prefix):
                self.rcrawler.pinged_node_handler(dst_node)
                target = dst_node.id
            else:
                target = self.target
            
        if target:
            msg = self.msg_f.outgoing_find_node_query(
                dst_node, target, None, self)
            #print 'target', `target`, 'to node', `node_.id`
            #print 'sending query to', extracting_node.node,
            #print extracting_node.node.id.log_distance(TARGET)
            msgs_to_send.append(msg)
            # Take care of timeouts
            (self._next_timeout_ts,
             timeout_queries) = self.querier.get_timeout_queries()
            for related_query in timeout_queries:
                #print 'timeout'
                timeout_node = related_query.dst_node
                self.dead_nodes.add(timeout_node)
                if timeout_node.id.bin_str.startswith(self.rcrawler.target_prefix):
                    self.rcrawler.timeout_handler(timeout_node)
        if msgs_to_send:
            timeout_call_ts, datagrams_to_send = self.querier.register_queries(
                msgs_to_send)
            self.last_msg_ts = time.time()
        else:
            datagrams_to_send = []
        self.num_msgs += len(datagrams_to_send)
        if datagrams_to_send and self.num_msgs % PRINT_DOT_EACH == 0:
            #print target.hex
            sys.stdout.write('.')
            sys.stdout.flush()
        return self.next_main_loop_ts, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except(message.MsgError):
            # ignore message
            return self.next_main_loop_ts, []

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            #print 'got reply',
            if related_query and related_query.experimental_obj:
                nodes = msg.all_nodes
                src_node = msg.src_node
                print '%s >>>>>>>>>>>>>>>>>>>>>> %d' % (
                    src_node.id.hex, len(msg.nodes))
                if src_node.id.bin_str.startswith(self.rcrawler.target_prefix):
                    self.rcrawler.ok_node_handler(src_node)
                for n in nodes:
                    if n not in self.known_nodes:
                        add_node = len(self.ok_nodes) < 3
                        if n.id.bin_str.startswith(self.rcrawler.target_prefix):
                            add_node = True
                            self.rcrawler.found_node_handler(n)
                        if add_node:
                            self.known_nodes.add(n)
                            self.pending_nodes.append(n)
                self.ok_nodes.add(src_node)
        return self.next_main_loop_ts, []#datagrams_to_send
示例#5
0
class Crawler(object):
    def __init__(self, bootstrap_nodes):
        self.target = bootstrap_nodes[0].id
        target_prefix = self.target.get_prefix(START_PREFIX_LEN)
        print target_prefix
        self.rcrawler = RCrawler(target_prefix)
        for n in bootstrap_nodes:
            self.rcrawler.found_node_handler(n)
        self.pending_nodes = bootstrap_nodes
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id, None)
        self.querier = Querier()
        self.next_main_loop_ts = 0
        self.num_msgs = 0
        self.known_nodes = set(bootstrap_nodes)
        self.ok_nodes = set()
        self.dead_nodes = set()
        self.last_msg_ts = time.time()

    def on_stop(self):
        pass

    def main_loop(self):
        self.next_main_loop_ts = time.time() + EXTRACTION_DELAY
        if time.time() > self.last_msg_ts + 4:  # self.rcrawler.done:
            print 'ind | ok dead | ok dead'
            self.rcrawler.print_result()
            print 'total OK/DEAD', len(self.rcrawler.ok_nodes),
            print len(self.rcrawler.dead_nodes)
            print self.num_msgs, 'messages sent'
            for n in sorted(self.ok_nodes, key=attrgetter('ip')):
                print n
            return
        target = None
        msgs_to_send = []
        if ((self.num_msgs < 20 and self.num_msgs % 5 == 0)
                or (self.num_msgs < 100 and self.num_msgs % 10 == 0)
                or (self.num_msgs > 100 and self.num_msgs % 20 == 0)
                or (self.num_msgs > 100 and not self.pending_nodes)):
            dst_node, target = self.rcrawler.next_bootstrap_msg()
            if target:
                print 'O',
            else:
                print 'F',
        if not target and self.pending_nodes:
            dst_node = self.pending_nodes.pop(0)
            if dst_node.id.bin_str.startswith(self.rcrawler.target_prefix):
                self.rcrawler.pinged_node_handler(dst_node)
                target = dst_node.id
            else:
                target = self.target

        if target:
            msg = self.msg_f.outgoing_find_node_query(dst_node, target, None,
                                                      self)
            #print 'target', `target`, 'to node', `node_.id`
            #print 'sending query to', extracting_node.node,
            #print extracting_node.node.id.log_distance(TARGET)
            msgs_to_send.append(msg)
            # Take care of timeouts
            (self._next_timeout_ts,
             timeout_queries) = self.querier.get_timeout_queries()
            for related_query in timeout_queries:
                #print 'timeout'
                timeout_node = related_query.dst_node
                self.dead_nodes.add(timeout_node)
                if timeout_node.id.bin_str.startswith(
                        self.rcrawler.target_prefix):
                    self.rcrawler.timeout_handler(timeout_node)
        if msgs_to_send:
            timeout_call_ts, datagrams_to_send = self.querier.register_queries(
                msgs_to_send)
            self.last_msg_ts = time.time()
        else:
            datagrams_to_send = []
        self.num_msgs += len(datagrams_to_send)
        if datagrams_to_send and self.num_msgs % PRINT_DOT_EACH == 0:
            #print target.hex
            sys.stdout.write('.')
            sys.stdout.flush()
        return self.next_main_loop_ts, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except (message.MsgError):
            # ignore message
            return self.next_main_loop_ts, []

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            #print 'got reply',
            if related_query and related_query.experimental_obj:
                nodes = msg.all_nodes
                src_node = msg.src_node
                print '%s >>>>>>>>>>>>>>>>>>>>>> %d' % (src_node.id.hex,
                                                        len(msg.nodes))
                if src_node.id.bin_str.startswith(self.rcrawler.target_prefix):
                    self.rcrawler.ok_node_handler(src_node)
                for n in nodes:
                    if n not in self.known_nodes:
                        add_node = len(self.ok_nodes) < 3
                        if n.id.bin_str.startswith(
                                self.rcrawler.target_prefix):
                            add_node = True
                            self.rcrawler.found_node_handler(n)
                        if add_node:
                            self.known_nodes.add(n)
                            self.pending_nodes.append(n)
                self.ok_nodes.add(src_node)
        return self.next_main_loop_ts, []  #datagrams_to_send
示例#6
0
class Crawler(object):

    def __init__(self, bootstrap_nodes):
        self.rcrawler = RCrawler(set(), set(), 15, bootstrap_nodes[0].id)
        self.rcrawler.got_nodes_handler(None, bootstrap_nodes)
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id,
                                        None)
        self.querier = Querier()
        self.next_main_loop_ts = 0
        self.num_msgs = 0
                        
    def on_stop(self):
        pass

    def main_loop(self):
        self.next_main_loop_ts = time.time() + .1
        if self.rcrawler.done:
            print 'ind | ok dead | ok dead'
            self.rcrawler.print_result()
            print self.rcrawler.get_num_ok(), self.rcrawler.get_num_dead()
            print self.num_msgs, 'messages sent'
            return
        msgs_to_send = []
        node_, target, rcrawler_obj = self.rcrawler.next()
        if target:
            msg = self.msg_f.outgoing_find_node_query(
                node_,
                target,
                None,
                rcrawler_obj)
            #print 'target', `target`, 'to node', `node_.id`
            #print 'sending query to', extracting_node.node,
            #print extracting_node.node.id.log_distance(TARGET)
            msgs_to_send.append(msg)
            # Take care of timeouts
            (self._next_timeout_ts,
             timeout_queries) = self.querier.get_timeout_queries()
            for related_query in timeout_queries:
                #print 'timeout'
                related_query.experimental_obj.timeout_handler(related_query.dst_node)
        if msgs_to_send:
            timeout_call_ts, datagrams_to_send = self.querier.register_queries(
                msgs_to_send)
        else:
            datagrams_to_send = []
        self.num_msgs += len(datagrams_to_send)
        if datagrams_to_send and self.num_msgs % 1 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        return self.next_main_loop_ts, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except(message.MsgError):
            # ignore message
            return self.next_main_loop_ts, datagrams_to_send

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            #print 'got reply',
            if related_query and related_query.experimental_obj:
                #print 'related >>>>>>>>>>>>>>>>>>>>>>', len(msg.nodes)
                try:
                    nodes = msg.all_nodes
                except AttributeError:
                    print '\nno nodes>>>>>>>', msg._msg_dict
                    nodes = []
                node_ = related_query.dst_node
                related_query.experimental_obj.got_nodes_handler(node_, nodes)
        return self.next_main_loop_ts, []#datagrams_to_send
示例#7
0
class Crawler(object):

    def __init__(self, bootstrap_nodes):
        self.target = RandomId()
        self.extracting_queue = ExtractingQueue(self.target)
        for node_ in bootstrap_nodes:
            is_new_node = self.extracting_queue.add_node(node_)
        self.my_id = self._my_id = RandomId()
        self.msg_f = message.MsgFactory(PYMDHT_VERSION, self.my_id,
                                        None)
        self.querier = Querier()
        self.last_extraction_ts = time.time()
        self.num_msgs = 0
        self.nodes_inrange_w_response = set()
                        
    def on_stop(self):
        pass#self._experimental_m.on_stop()

    def main_loop(self):
        current_time = time.time()
        if current_time > self.last_extraction_ts + 4:
            return #crawler DONE
        msgs_to_send = []
        only_inrange = len(self.nodes_inrange_w_response) > 4
        extracting_node, step_target = \
            self.extracting_queue.next_node_step_target(only_inrange)
        if step_target:
            msg = self.msg_f.outgoing_find_node_query(
                extracting_node.lookup_node,
                step_target,
                None,
                extracting_node)
            #print 'sending query to', extracting_node.node,
            #print extracting_node.node.id.log_distance(TARGET)
            msgs_to_send.append(msg)
            self.last_extraction_ts = current_time
            # Take care of timeouts
            (self._next_timeout_ts,
             timeout_queries) = self.querier.get_timeout_queries()
            for query in timeout_queries:
                #print 'timeout'
                query.experimental_obj.timeout_handler()
        if msgs_to_send:
            timeout_call_ts, datagrams_to_send = self.querier.register_queries(
                msgs_to_send)
        else:
            datagrams_to_send = []
        self.num_msgs += len(datagrams_to_send)
        if datagrams_to_send and self.num_msgs % 100 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        return current_time + .01, datagrams_to_send

    def on_datagram_received(self, datagram):
        data = datagram.data
        addr = datagram.addr
        datagrams_to_send = []
        try:
            msg = self.msg_f.incoming_msg(datagram)
        except(message.MsgError):
            # ignore message
            return self.last_extraction_ts + EXTRACTION_DELAY, datagrams_to_send

        if msg.type == message.RESPONSE:
            related_query = self.querier.get_related_query(msg)
            #print 'got reply',
            if related_query and related_query.experimental_obj:
                #print 'related >>>>>>>>>>>>>>>>>>>>>>', len(msg.nodes)
                try:
                    nodes = msg.nodes
                except AttributeError:
                    print '\nno nodes>>>>>>>', msg._msg_dict
                    nodes = []
                lookup_node = related_query.dst_node
                if in_range(lookup_node):
                    self.nodes_inrange_w_response.add(lookup_node)
                related_query.experimental_obj.add_found_nodes(nodes)
                new_nodes = False
                for node_ in nodes:
                    self.extracting_queue.add_node(node_)
        self.num_msgs += len(datagrams_to_send)
        return self.last_extraction_ts + EXTRACTION_DELAY, datagrams_to_send

    def get_bootstrap_nodes(self):
        return [en.lookup_node.node for en in self.extracting_queue.pinged_nodes[-100:]]
    
    def print_summary(self):
        self.extracting_queue.print_summary()
        print "Messages sent:", self.num_msgs
    
    def print_results(self):
        self.extracting_queue.print_results()