Пример #1
0
def get_jobs_for_all_instances():
    instance_list = ec2.getCrawlerInstances()
    job_dict = {}
    for i in instance_list:
        try:    
            job_dict.update(get_jobs_for_instance(i.id))
        except:
            pass
    return job_dict
Пример #2
0
    def __init__(self, *args, **kwargs):
        super(SiteForm, self).__init__(*args, **kwargs)
        
        running_limit = config_file.get_config().get('bblio','crawler_instance_site_limit')
    
        instance_list = []
        grouping_list = [('works','works'),('works dirty','works dirty'),('WIP','WIP'),('error','error'),('condemned','condemned'),('start','start'), ('test','test')]
        try:
            for i in getCrawlerInstances():            
                dict = get_job_status_count_for_instance(i.id)
                count = int(dict['pending']) + int(dict['running'])
                instance_list.append({'name':i.id,'choice_name': i.id + ' ' + str(count) + '/' + str(running_limit)})
        except:
            pass

        instance_list.append({'name':'','choice_name':''})
        instance_choices = ((i['name'],i['choice_name']) for i in instance_list)
        self.fields['instance'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=instance_choices)
        self.fields['jurisdiction'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=get_country_list())
        self.fields['owner'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=[(o, o) for o in config_file.get_config().get('bblio','owners').split(';')])
        self.fields['grouping'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=[(g,g) for g in config_file.get_config().get('bblio','grouping').split(';')])
Пример #3
0
def deploy():
    ret = None
    for i in ec2.getCrawlerInstances():
        if not i.ip_address:
            continue
        print "[%s] %s" % (i.id, i.ip_address)
                
        ssh_client = sshclient_from_instance(ec2.getInstanceFromInstanceName(i.id), host_key_file = '/home/ec2-user/.ssh/known_hosts', ssh_key_file=keys.aws_pem,user_name='ec2-user')
        #ssh_client = sshclient_from_instance(ec2.getInstanceFromInstanceName(i.id), host_key_file = '/home/ec2-user/.ssh/known_hosts', ssh_key_file="",user_name='ec2-user')
        ssh_client.put_file('/home/ec2-user/bblio/scraper/scrapyd.conf','/home/ec2-user/scrapyd.conf')

        home_dir = '/home/ec2-user/bblio/'

        copyList = []
        copyList.append(home_dir + 'build/search/models.py')
        copyList.append(home_dir + 'build/search/__init__.py')
        copyList.append(home_dir + 'build/Build/__init__.py')
        copyList.append(home_dir + 'build/Build/settings.py.crawler')
        copyList.append(home_dir + 'build/Build/myScript.py.crawler')
        copyList.append(home_dir + 'build/manage.py')
        copyList.append(home_dir + 'build/__init__.py')
        copyList.append(home_dir + 'aws/ec2.py')
        copyList.append(home_dir + 'aws/keys.py')
        copyList.append(home_dir + 'aws/key.pem')
        copyList.append(home_dir + 'aws/__init__.py')
        copyList.append(home_dir + 'config_file.py')
        copyList.append(home_dir + '__init__.py')

        dirList = []

        for c in copyList:
            c_dir = os.path.dirname(c)
            prev_dir = ''
            while c_dir != prev_dir and c_dir not in home_dir:
                if c_dir not in dirList:
                    dirList.append(c_dir)
                prev_dir = c_dir
                c_dir = os.path.dirname(c_dir)
        dirList.append(home_dir)
        dirList.sort(lambda x,y: cmp(len(x), len(y)))

        for d in dirList:
            print('[dir][%s] %s' % (ssh_client.server.instance_id, d))
            ssh_client.run('mkdir %s' % d)

        for c in copyList:
            print('[file][%s] %s' % (ssh_client.server.instance_id, c))
            ssh_client.put_file(c,c.replace('.crawler',''))

        with open("/home/ec2-user/bblio/scraper/deployable/scrapy.cfg", "w") as f:
            f.write(
"""
[settings]
default = deployable.settings    
[deploy]
project = deployable\n
"""
            )
            f.write("url = http://")
            f.write(i.ip_address)
            f.write(":6800")
            print i.ip_address
        p = Popen(['scrapyd-deploy'],stdout=PIPE,shell=True,cwd='/home/ec2-user/bblio/scraper/deployable')
        j = None

        while True:
            out = p.stdout.read()
            if out == '' and p.poll() != None:
                break
            if out != '':
                if '{' in out:
                    j = out
                    j = json.loads(out)
                sys.stdout.write(out)
                sys.stdout.flush()
        #if j['status'] != 'ok':
            #ret = ret + str(i.ip_address) + ' failed\n'
    return ret