Exemplo n.º 1
0
class MemoryBasedSeedsService:
    SEED_PKG_SIZE = 5
    
    def __init__(self):
        self.pending_seeds = []
        self.global_info = GlobalInfo() 

    @property
    def seeds_num(self):
        return len(self.pending_seeds)

    @property
    def hosts(self):
        return self.global_info.hosts

    def get_seeds(self, spiderid, jobreport):
        total = len(self.pending_seeds)
        if total > self.SEED_PKG_SIZE:
            num = self.SEED_PKG_SIZE
        else:
            num = total
        
        results = self.pending_seeds[:num]
        self.pending_seeds = self.pending_seeds[num:]
        self.global_info.update_spider_report(jobreport, 
            True if num > 0 else False)

        log.msg("return %s seeds" % num)
        return results 

    def add_seeds(self, clientid, pkg):
        for seed in pkg.seeds:
            self.pending_seeds.append(seed)
            self.global_info.add_seed(seed)
            log.msg("add %s" % seed)

        log.msg("add %s seeds from %s" % (len(pkg.seeds), clientid))
        return

    def get_latency_time(self, url):
        domain = get_domain(url) 
        hostinfo = self.hosts[domain]
        print hostinfo

        if hostinfo.last_crawl_time is None:
            hostinfo.last_crawl_time = time.time()
            return 0
        else:
            left_time = time.time() - hostinfo.last_crawl_time - \
                hostinfo.crawl_interval

            print "left: ", left_time
            if left_time > 0:
                hostinfo.last_crawl_time = time.time()
                return 0 
            else:
                return -left_time

    def status(self):
        return "unsupported yet"
Exemplo n.º 2
0
 def __init__(self, pkg_size):
     self.SEED_PKG_SIZE = pkg_size
     self.pending_seeds = []
     self.global_info = GlobalInfo()
     log.msg("init MemoryBasedSeedsService, SEED_PKG_SIZE:%s" % pkg_size)
Exemplo n.º 3
0
class MemoryBasedSeedsService:
    SEED_PKG_SIZE = 1

    def __init__(self, pkg_size):
        self.SEED_PKG_SIZE = pkg_size
        self.pending_seeds = []
        self.global_info = GlobalInfo()
        log.msg("init MemoryBasedSeedsService, SEED_PKG_SIZE:%s" % pkg_size)

    @property
    def seeds_num(self):
        return len(self.pending_seeds)

    @property
    def hosts(self):
        return self.global_info.hosts

    def get_seeds(self, spiderid, jobreport):
        total = len(self.pending_seeds)
        if total > self.SEED_PKG_SIZE:
            num = self.SEED_PKG_SIZE
        else:
            num = total

        results = self.pending_seeds[:num]
        self.pending_seeds = self.pending_seeds[num:]
        self.global_info.update_spider_report(jobreport,
                                              True if num > 0 else False)

        log.msg("return %s seeds" % num)
        return results

    def add_seeds(self, clientid, pkg):
        for seed in pkg.seeds:
            self.pending_seeds.append(seed)
            self.global_info.add_seed(seed)
            log.msg("add %s" % seed)

        log.msg("add %s seeds from %s" % (len(pkg.seeds), clientid))
        return

    def get_latency_time(self, url):
        domain = get_domain(url)
        hostinfo = self.hosts[domain]
        print hostinfo

        if hostinfo.last_crawl_time is None:
            hostinfo.last_crawl_time = time.time()
            return 0
        else:
            left_time = time.time() - hostinfo.last_crawl_time - \
                hostinfo.crawl_interval

            print "left: ", left_time
            if left_time > 0:
                hostinfo.last_crawl_time = time.time()
                return 0
            else:
                return -left_time

    def status(self):
        return "unsupported yet"
Exemplo n.º 4
0
 def __init__(self, pkg_size):
     self.SEED_PKG_SIZE = pkg_size
     self.pending_seeds = []
     self.global_info = GlobalInfo() 
     log.msg("init MemoryBasedSeedsService, SEED_PKG_SIZE:%s" % pkg_size)
Exemplo n.º 5
0
 def __init__(self):
     self.pending_seeds = []
     self.global_info = GlobalInfo()