示例#1
0
def run(out, strategy, coverages, max_effort):

    estimator = TimeEstimator(len(coverages))
    for coverage, cost in izip(iter(coverages), strat(coverages, max_effort)):
        #print '%d  %.2f  %s  %d' % (l, coverage, strat_name.replace('crawler','crawlr'), cost)
        estimator.tick()
        log_line = estimator.log_line()
        out.write('%d  %.2f  %s  %d  %f  %s\n' %
                  (l, coverage, strat_name.replace('crawler', 'crawlr'), cost,
                   float(cost) / graph.number_of_nodes(), log_line))
示例#2
0
def run(q, strategy, coverages, coverage_funcs, max_effort):

    estimator = TimeEstimator(len(coverages) * len(coverage_funcs))

    coverages = dict([(coverage_map(cov_func), coverages)
                      for cov_func in coverage_funcs])

    for cost, coverage, coverage_type in strat(coverages, max_effort):
        #print '%d  %.2f  %s  %d' % (l, coverage, strat_name.replace('crawler','crawlr'), cost)
        if cost >= 0:  # not finished yet...
            estimator.tick()
            log_line = estimator.log_line()
            q.put('%d  %.7f  %s %s  %d  %f  %s\n' %
                  (l, coverage, coverage_type,
                   strat_name.replace('crawler', 'crawlr'), cost,
                   float(cost) / graph.number_of_nodes(), log_line))
        else:
            q.put('FINISHED')
示例#3
0
    def save_snowball_edgelist(self, filename):

        out = open(filename, 'w')

        modulo = 10000
        total = self.number_of_edges()
        estimator = TimeEstimator(total / modulo)
        count = 0

        queue = [self.random_nodes()[0]]
        visited = set([])
        while len(queue) > 0:

            # impl: node = queue.pop()
            node = queue[0]
            queue = queue[1:]

            visited.add(node)

            for neigh in self.neighbors_iter(node):

                if not neigh in visited:

                    queue.append(neigh)

                    out.write('%s %s\n' % (str(node), str(neigh)))
                    count += 1

                    if self.debug and count % modulo == 0:
                        print 'INFO: %d edges dumped in save_snowball_edgelist(), total %d' % (
                            count, total)
                        estimator.tick()
                        print estimator.log_line()

            out.flush()
示例#4
0
    def testTimeEstimator(self):

        estimator = TimeEstimator(5)
        
        for _ in range(5):
            time.sleep(1.0)
            estimator.tick()
            
        self.assertAlmostEqual( estimator.time_elapsed(), 5.0, places=1)
        
        self.assertAlmostEqual( estimator.time_per_iteration(), 1.0, places=1)
        
        self.assertAlmostEqual( estimator.time_left(), 0.0, places=1)
        
        self.assertEqual( estimator.log_line(), 'INFO: 5 iterations | 5 total , 5.0 secs (0.1 mins) elapsed | 1.0 secs (0.0 mins) per it. | 0.0 secs (0.0 mins) left')
示例#5
0
    def save_snowball_edgelist_iter(self, filename):

        out = open(filename, 'w')

        self.remove_parameter_cache('visited')
        self.add_parameter_cache('visited')
        self.initialize_parameter('visited',
                                  100000000)  # 100000000 = not visited
        self.index_parameter_cache('visited')

        # TODO: no usar tiempo, es missleading y dificil de dbugearr, usar un contador!!!
        count = 0

        modulo = 10000
        total = self.number_of_edges()
        estimator = TimeEstimator(total / modulo)
        count = 0

        for n, val in self.get_parameter_cache_iter('visited'):
            node, visited = n, val
            break

        visited = 0
        self.update_parameter_cache('visited', node,
                                    visited)  # < 0 means not visited
        while visited <= 100000000.0:

            #
            for n, val in self.get_parameter_cache_iter('visited',
                                                        random=False,
                                                        ascending=True):
                node, visited = n, val
                break
            if visited == 100000001.0 or visited == 100000000.0:
                break  # finish connected (100000001.0) or disconnected (100000000.0) graph!
            self.update_parameter_cache('visited', node,
                                        100000001)  # 100000001 = visited

            for neigh in self.neighbors_iter(node):

                if self.get_parameter_cache('visited',
                                            neigh) <= 100000000:  # not visited

                    count += 1
                    self.update_parameter_cache('visited', neigh, count)

                    out.write('%s %s\n' % (str(node), str(neigh)))

                    if self.debug and count % modulo == 0:
                        print 'INFO: %d edges dumped in save_snowball_edgelist_big(), total %d' % (
                            count, total)
                        estimator.tick()
                        print estimator.log_line()

        self.remove_parameter_cache('visited')
示例#6
0
    def save_edgelist(self, path, comments='#', delimiter=' ', data=False):
        '''
        Save graph as a set of directed links with format:
        <nodeA> <nodeB>

        G : graph
        
            A NetworkX graph
        
        path : file or string
        
            File or filename to write. Filenames ending in .gz or .bz2 will be compressed.
        
        comments : string, optional
        
            The character used to indicate the start of a comment
        
        delimiter : string, optional
        
            The string uses to separate values. The default is whitespace.
        
        data : bool, optional
        
            If True write a string representation of the edge data.

        Save graph as a set of directed links with format:
        <nodeA> <nodeB>
        <nodeF> <nodeU>
        ...
        etc.        
        '''
        try:
            out = open(path, 'w')
        except:
            out = path
        modulo = 100000
        total = self.number_of_edges()
        estimator = TimeEstimator(total / modulo)
        count = 1
        for src, dst in self.edges_iter():
            out.write('%s %s\n' % (str(src), str(dst)))
            if self.debug and count % modulo == 0:
                print 'INFO: %d edges dumped in save_edgelist(), total %d' % (
                    count, total)
                estimator.tick()
                print estimator.log_line()
            count += 1
示例#7
0
    def load_edgelist(self, fileobj, num=False, use_big_alphabet=False):
        c = 0

        modulo = self.input_debug_links
        total = self.max_links_input
        estimator = TimeEstimator(total / modulo)

        if use_big_alphabet:
            base = Base()

        for line in fileobj:
            if line.strip() == '' or line.strip()[0] == '#':
                continue
            s = line.split()
            if num:

                if use_big_alphabet:
                    src = base.base2num(s[0])
                    dst = base.base2num(s[1].strip())
                else:
                    src = int(s[0])
                    dst = int(s[1].strip())

            else:
                src = s[0]
                dst = s[1].strip()

            self.add_edge(src, dst)
            c += 1
            if self.debug and c % self.input_debug_links == 0:
                sys.stdout.write(
                    'INFO: INPUT load_edgelist(), link count = %d  %s\n' %
                    (c, time.ctime()))
            if self.debug and c % modulo == 0:
                print 'INFO: %d edges loaded in load_edgelist(), estimated total %d' % (
                    c, total)
                estimator.tick()
                print estimator.log_line()
            if c >= self.max_links_input:
                break

        if self.debug:
            sys.stdout.write(
                'INFO: FINISH INPUT load_edgelist(), link count = %d\n' % c)
示例#8
0
    def index_parameter_generic(self, param_name, param_iter_func):

        self.add_parameter_cache(param_name)

        modulo = 1000
        estimator = TimeEstimator(self.number_of_nodes() / modulo)
        count = 1
        for node, value in izip(
                self.nodes_iter(),
                param_iter_func(),
        ):

            if self.debug and count % modulo == 0:
                print 'INFO: %d nodes processed in index_parameter_generic, param_name %s' % (
                    count, param_name)
                estimator.tick()
                print estimator.log_line()

            self.insert_parameter_cache(param_name, node, value)
            count += 1

        self.index_parameter_cache(param_name)