示例#1
0
文件: __init__.py 项目: hxu/boomerang
def send_job(source_script=None, in_directory=None, out_directory=None,
             base_directory='task/',
             load_from_s3=0, s3_bucket_name=None, s3_fetch_path=None,
             put_to_s3=0,
             existing_instance=None,
             itype=None, ami=boom_config.DEFAULT_AMI, security_group=boom_config.DEFAULT_SECURITY_GROUP,
             ssh_key=boom_config.DEFAULT_SSH_KEY,
             ssh_key_path=boom_config.DEFAULT_SSH_KEY_PATH):
    """
    Spins up an instance, deploys the job, then exits
    """
    load_from_s3 = int(load_from_s3)
    put_to_s3 = int(put_to_s3)
    if not out_directory.endswith('/'):
        out_directory += '/'
    out_log_file = base_directory + out_directory + 'shell_log.txt'
    _make_workspace()

    # Prepare the local job files
    f = open(boom_config.TEMPORARY_FOLDER + 'boom_task.py', 'w')
    f.write(generate_script(fetch=load_from_s3,
                            bucket_name=s3_bucket_name,
                            fetch_path=s3_fetch_path,
                            put=put_to_s3,
                            out_path=out_directory,
                            run=True,
                            script_name=source_script))
    f.close()

    user = '******'
    ssh_key_path = _expand_path(ssh_key_path)
    path_to_base_directory = '~/{}'.format(base_directory)

    instance = None

    # When provisioning a spot instance
    # res = conn.request_spot_instances(price='0.011', instance_type='t1.micro', image_id='ami-0b9ad862')
    # res[0] gives the spot reservation
    # but this does not have an update method, so need to do
    # conn.get_all_spot_instance_requests(res[0].id)
    # res[0].state = 'active'
    # or res[0].status.code = 'fulfilled'
    # then res[0].instance_id

    try:
        if not existing_instance:
            instance = provision_instance(itype=itype, ami=ami, security_group=security_group, ssh_key=ssh_key)
            print "Waiting for instance to boot"
        else:
            instance = _get_existing_instance(existing_instance)
            print 'Using existing instance {}'.format(existing_instance)
        while instance.state != 'running':
            sys.stdout.write(".")
            time.sleep(5)
            instance.update()
        sys.stdout.write('\n')
    except KeyboardInterrupt:
        print 'Operation cancelled by user.  Attempting to terminate instance'
        if instance:
            # This does not always terminate, if we are really early in the launch process
            instance.terminate()
        _cleanup_workspace()
        sys.exit(1)

    time.sleep(15)
    print "Instance is running at ip {}".format(instance.ip_address)
    print "Connecting as user {}".format(user)

    # Set up the fabric environment to connect to the new machine
    env.host_string = instance.ip_address
    env.user = user
    env.key_filename = ssh_key_path

    attempt = 1
    success = False
    while not success and attempt <= 3:
        try:
            run('uname -a')
            run('pwd')
            success = True
        except NetworkError as e:
            print "Could not connect: {}".format(e)
            print "Retrying"
            attempt += 1
            continue

    if not success:
        print "Could not connect after 3 tries.  Aborting"
        _cleanup_workspace()
        sys.exit(1)

    # Send files to the server
    if exists(base_directory):
        run('rm -R {}'.format(base_directory))
    run('mkdir {}'.format(base_directory))
    run('mkdir {}'.format(base_directory + out_directory))

    fabput(local_path=_expand_path('./' + boom_config.TEMPORARY_FOLDER + 'boom_task.py'), remote_path='~/' + base_directory)
    fabput(local_path=_expand_path('./' + source_script), remote_path='~/' + base_directory)

    with cd(path_to_base_directory):
        print 'Transferring scripts to instance'
        # Kick off the script with tmux
        print 'Kicking off the task'
        run("tmux new-session -s boom_job -d")
        # TODO: Does not always seem to be working, but path looks correct
        run("tmux pipe-pane -o -t boom_job 'exec cat >> {}'".format(out_log_file))
        run("tmux send -t boom_job 'python boom_task.py' Enter")

    _cleanup_workspace()
示例#2
0
文件: put.py 项目: hxu/boomerang
def put_path(path=None,
             bucket_name=None,
             overwrite=0,
             aws_access_key_id=None,
             aws_secret_access_key=None):
    """
    Puts a path to S3
    If the path is a file, puts just the file into the bucket
    If the path is a folder, recursively puts the folder into the bucket
    """
    if bucket_name is None:
        print 'You must provide a bucket name'
        sys.exit(0)

    cb = _progress_cb
    num_cb = 100
    debug = 0
    reduced = True
    grant = None
    headers = {}

    overwrite = int(overwrite)
    conn = connect_s3(aws_access_key_id, aws_secret_access_key)
    b = conn.get_bucket(bucket_name)
    path = _expand_path(path)
    files_to_check_for_upload = []
    existing_keys_to_check_against = []
    prefix = os.getcwd() + '/'
    key_prefix = ''

    # Take inventory of the files to upload
    # For directories, walk recursively
    files_in_bucket = [k.name for k in b.list()]
    if os.path.isdir(path):
        print 'Getting list of existing keys to check against'
        for root, dirs, files in os.walk(path):
            for p in files:
                if p.startswith("."):
                    continue
                full_path = os.path.join(root, p)
                key_name = _get_key_name(full_path, prefix, key_prefix)
                files_to_check_for_upload.append(full_path)
                if key_name in files_in_bucket:
                    existing_keys_to_check_against.append(full_path)
    # for single files, just add the file
    elif os.path.isfile(path):
        full_path = os.path.abspath(path)
        key_name = _get_key_name(full_path, prefix, key_prefix)
        files_to_check_for_upload.append(full_path)
        if key_name in files_in_bucket:
            existing_keys_to_check_against.append(full_path)
    # we are trying to upload something unknown
    else:
        print "I don't know what %s is, so i can't upload it" % path

    print "{} files to upload:".format(len(files_to_check_for_upload))
    pprint(files_to_check_for_upload)
    print "{} Existing files already in bucket:".format(
        len(existing_keys_to_check_against))
    pprint(existing_keys_to_check_against)

    for full_path in files_to_check_for_upload:
        key_name = _get_key_name(full_path, prefix, key_prefix)

        if full_path in existing_keys_to_check_against:
            if not overwrite and b.get_key(key_name):
                print 'Skipping %s as it exists in s3' % full_path
                continue

        print 'Copying %s to %s/%s' % (full_path, bucket_name, key_name)

        # 0-byte files don't work and also don't need multipart upload
        if os.stat(full_path).st_size != 0 and multipart_capable:
            _multipart_upload(bucket_name, aws_access_key_id,
                              aws_secret_access_key, full_path, key_name,
                              reduced, debug, cb, num_cb, grant or 'private',
                              headers)
        else:
            _singlepart_upload(b,
                               key_name,
                               full_path,
                               cb=cb,
                               num_cb=num_cb,
                               policy=grant,
                               reduced_redundancy=reduced,
                               headers=headers)
示例#3
0
文件: put.py 项目: hxu/boomerang
def put_path(path=None, bucket_name=None, overwrite=0,
             aws_access_key_id=None, aws_secret_access_key=None):
    """
    Puts a path to S3
    If the path is a file, puts just the file into the bucket
    If the path is a folder, recursively puts the folder into the bucket
    """
    if bucket_name is None:
        print 'You must provide a bucket name'
        sys.exit(0)

    cb = _progress_cb
    num_cb = 100
    debug = 0
    reduced = True
    grant = None
    headers = {}

    overwrite = int(overwrite)
    conn = connect_s3(aws_access_key_id, aws_secret_access_key)
    b = conn.get_bucket(bucket_name)
    path = _expand_path(path)
    files_to_check_for_upload = []
    existing_keys_to_check_against = []
    prefix = os.getcwd() + '/'
    key_prefix = ''

    # Take inventory of the files to upload
    # For directories, walk recursively
    files_in_bucket = [k.name for k in b.list()]
    if os.path.isdir(path):
        print 'Getting list of existing keys to check against'
        for root, dirs, files in os.walk(path):
            for p in files:
                if p.startswith("."):
                    continue
                full_path = os.path.join(root, p)
                key_name = _get_key_name(full_path, prefix, key_prefix)
                files_to_check_for_upload.append(full_path)
                if key_name in files_in_bucket:
                    existing_keys_to_check_against.append(full_path)
    # for single files, just add the file
    elif os.path.isfile(path):
        full_path = os.path.abspath(path)
        key_name = _get_key_name(full_path, prefix, key_prefix)
        files_to_check_for_upload.append(full_path)
        if key_name in files_in_bucket:
            existing_keys_to_check_against.append(full_path)
    # we are trying to upload something unknown
    else:
        print "I don't know what %s is, so i can't upload it" % path

    print "{} files to upload:".format(len(files_to_check_for_upload))
    pprint(files_to_check_for_upload)
    print "{} Existing files already in bucket:".format(len(existing_keys_to_check_against))
    pprint(existing_keys_to_check_against)

    for full_path in files_to_check_for_upload:
        key_name = _get_key_name(full_path, prefix, key_prefix)

        if full_path in existing_keys_to_check_against:
            if not overwrite and b.get_key(key_name):
                print 'Skipping %s as it exists in s3' % full_path
                continue

        print 'Copying %s to %s/%s' % (full_path, bucket_name, key_name)

        # 0-byte files don't work and also don't need multipart upload
        if os.stat(full_path).st_size != 0 and multipart_capable:
            _multipart_upload(bucket_name, aws_access_key_id,
                              aws_secret_access_key, full_path, key_name,
                              reduced, debug, cb, num_cb,
                              grant or 'private', headers)
        else:
            _singlepart_upload(b, key_name, full_path, cb=cb, num_cb=num_cb,
                               policy=grant, reduced_redundancy=reduced,
                               headers=headers)
示例#4
0
文件: __init__.py 项目: hxu/boomerang
def send_job(source_script=None,
             in_directory=None,
             out_directory=None,
             base_directory='task/',
             load_from_s3=0,
             s3_bucket_name=None,
             s3_fetch_path=None,
             put_to_s3=0,
             existing_instance=None,
             itype=None,
             ami=boom_config.DEFAULT_AMI,
             security_group=boom_config.DEFAULT_SECURITY_GROUP,
             ssh_key=boom_config.DEFAULT_SSH_KEY,
             ssh_key_path=boom_config.DEFAULT_SSH_KEY_PATH):
    """
    Spins up an instance, deploys the job, then exits
    """
    load_from_s3 = int(load_from_s3)
    put_to_s3 = int(put_to_s3)
    if not out_directory.endswith('/'):
        out_directory += '/'
    out_log_file = base_directory + out_directory + 'shell_log.txt'
    _make_workspace()

    # Prepare the local job files
    f = open(boom_config.TEMPORARY_FOLDER + 'boom_task.py', 'w')
    f.write(
        generate_script(fetch=load_from_s3,
                        bucket_name=s3_bucket_name,
                        fetch_path=s3_fetch_path,
                        put=put_to_s3,
                        out_path=out_directory,
                        run=True,
                        script_name=source_script))
    f.close()

    user = '******'
    ssh_key_path = _expand_path(ssh_key_path)
    path_to_base_directory = '~/{}'.format(base_directory)

    instance = None

    # When provisioning a spot instance
    # res = conn.request_spot_instances(price='0.011', instance_type='t1.micro', image_id='ami-0b9ad862')
    # res[0] gives the spot reservation
    # but this does not have an update method, so need to do
    # conn.get_all_spot_instance_requests(res[0].id)
    # res[0].state = 'active'
    # or res[0].status.code = 'fulfilled'
    # then res[0].instance_id

    try:
        if not existing_instance:
            instance = provision_instance(itype=itype,
                                          ami=ami,
                                          security_group=security_group,
                                          ssh_key=ssh_key)
            print "Waiting for instance to boot"
        else:
            instance = _get_existing_instance(existing_instance)
            print 'Using existing instance {}'.format(existing_instance)
        while instance.state != 'running':
            sys.stdout.write(".")
            time.sleep(5)
            instance.update()
        sys.stdout.write('\n')
    except KeyboardInterrupt:
        print 'Operation cancelled by user.  Attempting to terminate instance'
        if instance:
            # This does not always terminate, if we are really early in the launch process
            instance.terminate()
        _cleanup_workspace()
        sys.exit(1)

    time.sleep(15)
    print "Instance is running at ip {}".format(instance.ip_address)
    print "Connecting as user {}".format(user)

    # Set up the fabric environment to connect to the new machine
    env.host_string = instance.ip_address
    env.user = user
    env.key_filename = ssh_key_path

    attempt = 1
    success = False
    while not success and attempt <= 3:
        try:
            run('uname -a')
            run('pwd')
            success = True
        except NetworkError as e:
            print "Could not connect: {}".format(e)
            print "Retrying"
            attempt += 1
            continue

    if not success:
        print "Could not connect after 3 tries.  Aborting"
        _cleanup_workspace()
        sys.exit(1)

    # Send files to the server
    if exists(base_directory):
        run('rm -R {}'.format(base_directory))
    run('mkdir {}'.format(base_directory))
    run('mkdir {}'.format(base_directory + out_directory))

    fabput(local_path=_expand_path('./' + boom_config.TEMPORARY_FOLDER +
                                   'boom_task.py'),
           remote_path='~/' + base_directory)
    fabput(local_path=_expand_path('./' + source_script),
           remote_path='~/' + base_directory)

    with cd(path_to_base_directory):
        print 'Transferring scripts to instance'
        # Kick off the script with tmux
        print 'Kicking off the task'
        run("tmux new-session -s boom_job -d")
        # TODO: Does not always seem to be working, but path looks correct
        run("tmux pipe-pane -o -t boom_job 'exec cat >> {}'".format(
            out_log_file))
        run("tmux send -t boom_job 'python boom_task.py' Enter")

    _cleanup_workspace()