Пример #1
0
def run_ssh_multi(cmd, instances, user, identity_file):
    p = mp.Pool(len(instances))
    async_results = []
    for instance in instances:
        async_results.append(p.apply_async(run_ssh, args=(cmd,
                                                          instance.public_ip_address,
                                                          user,
                                                          identity_file)))
    monitor_mp_jobs(async_results)
    p.close()
    p.join()
Пример #2
0
def mutation_probabilities(collections,
                           standard,
                           outfile=None,
                           query=None,
                           seq_field='vdj_aa',
                           chunksize=100):
    '''
    Calculates the probability of each mutation from the standard sequence,
    given one or more collections.

    Inputs
    ------
    collections: One or more pymongo collection objects, as an iterable.
    standard: The target amino acid sequence, as a string.
    query: Query parameters, as a dict. Will be passed directly to
        collection.find()
    seq_field: The MongoDB field to be used for comparison to the target.
        Default is 'vdj_aa'.
    chunksize: Number of sequences to be submitted for each alignment job.
        Default is 100.

    Returns
    -------
    A dictionary of normalized mutation probabilities, of the format:
        {'12A': 0.01, '15F': 0.12, ...}
    Mutation names are a concatenation of the mutation position (1-based
    indexing of the standard sequence) and the mutated residue.
    '''
    async_results = []
    p = Pool()
    for collection in collections:
        print('\n' + collection)
        print('-' * len(collection))
        print('querying for sequences...')
        sequences = get_sequences(collection, query=query, seq_field=seq_field)
        print('performing alignments:')
        for chunk in chunker(sequences, chunksize):
            async_results.append(
                p.apply_async(get_mutations, args=(chunk, standard)))
        monitor_mp_jobs(async_results)
    mutations = []
    for ar in async_results:
        mutations.extend(ar.get())
    print('\ncalculating mutation probabilities...')
    mcounts = Counter(mutations)
    total = sum(mcounts.values())
    norm_counts = {k: float(v) / total for k, v in mcounts.items()}
    prob_string = '\n'.join(
        ['{}\t{}'.format(k, v) for k, v in list(norm_counts.items())])
    if outfile is not None:
        open(outfile, 'w').write(prob_string)
    return norm_counts
Пример #3
0
    def configure(self):
        instances = [self.master_instance] + self.worker_instances
        instance_lookup = dict(self.master, **self.workers)
        instance_names = sorted(instance_lookup.keys())

        # # update Ab[x] tools
        # self.update_abx(instances)

        # build base image
        print('')
        if len(instances) == 1:
            print('Building base image...')
            configure_base_image(instances[0].public_ip_address,
                                 self.opts.user,
                                 self.opts.identity_file)
        else:
            print('Building base image on all nodes...')
            p = mp.Pool(len(instances))
            async_results = []
            for instance in instances:
                async_results.append(p.apply_async(configure_base_image,
                                                   args=(instance.public_ip_address,
                                                         self.opts.user,
                                                         self.opts.identity_file)))
            monitor_mp_jobs(async_results)
            p.close()
            p.join()

        # deploy SSH key to nodes for passwordless SSH
        print('')
        print("Generating cluster's SSH key on master...")
        key_setup = """
            [ -f ~/.ssh/id_rsa ] ||
            (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
            cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)"""
        self.run(self.master_instance, key_setup)
        get_ssh_tar = 'tar c - .ssh'
        dot_ssh_tar, _ = self.run(self.master_instance, get_ssh_tar)
        if self.worker_instances:
            print("Transferring SSH key to workers:")
            put_ssh_tar = 'tar x'
            for i, worker in enumerate(self.worker_instances):
                progbar.progress_bar(i, len(self.worker_instances))
                self.run(worker, put_ssh_tar, stdin=dot_ssh_tar)
            progbar.progress_bar(len(self.worker_instances), len(self.worker_instances))
            print('')

        # modify /etc/hosts on all nodes
        print('Updating /etc/hosts on all nodes...')
        hosts = ['{} {}'.format(self.get_ip(i), n) for n, i in instance_lookup.items()]
        host_string = '\n'.join(hosts)
        host_cmd = """sudo -- sh -c 'echo "{}" >> /etc/hosts'""".format(host_string)
        for instance in instances:
            self.run(instance, host_cmd)

        # build and share an EBS volumne on the master node
        devices = ['/dev/xvda' + string.ascii_lowercase[i] for i in range(self.opts.master_ebs_vol_num)]
        if len(devices) > 1:
            volume = self.build_ebs_raid_volume(devices)
        elif len(devices) == 1:
            volume = self.format_single_ebs_device(devices[0])
        if len(self.worker_instances) > 0:
            self.share_nfs_volume(volume)

        # start Celery workers on all nodes
        if self.opts.celery and len(self.worker_instances) > 0:
            self.start_redis_server(self.master_instance)
            self.start_celery_workers(self.worker_instances)
            self.start_flower()

        # upload BaseSpace credentials file
        if self.opts.basespace_credentials:
            print('')
            print('Uploading BaseSpace credentials file...')
            cred_file = os.path.expanduser('~/.abstar/basespace_credentials')
            remote_path = '/home/{}/.abstar/basespace_credentials'.format(self.opts.user)
            if os.path.exists(cred_file):
                self.put(self.master_name, cred_file, remote_path)
            else:
                print('ERROR: Local credentials file was not found. No credentials were uploaded.')

        # configure and start a Jupyter Notebook server
        if self.opts.jupyter:
            self.setup_jupyter_notebook()

        # configure and start a MongoDB server
        if self.opts.mongodb:
            self.setup_mongodb()
        else:
            self.stop_mongod()

        # write config information to master
        self.write_config_info()
        print('')