Пример #1
0
def train(argv):
    # retreive command line options
    opts,_ = getopt.getopt(argv, "i:") 

    found = False

    for opt , arg in opts:
        if opt == '-i':
            params = utils.read_params(arg)
            found = True
    
    if not found:
        params = utils.read_params()

    train_algo(params)
def main(data_path="data/split/", feature_path="data/features/", out_path="data/pca/"):
    X_train, X_test, y_train, y_test = read_data(data_path)

    params = read_params("params.yaml", "pca")
    pca = PCA(**params).fit(X_train)

    train_feature = pd.DataFrame(pca.transform(X_train))
    test_feature = pd.DataFrame(pca.transform(X_test))
    train_feature["class"] = y_train
    test_feature["class"] = y_test

    if not os.path.isdir(feature_path):
        os.mkdir(feature_path)
    train_feature.to_csv(f"{feature_path}train.csv", index=False)
    test_feature.to_csv(f"{feature_path}test.csv", index=False)
    save_results(out_path, pca, None)

    print(f"Finished Feature Engineering:\nStats:")
    print(f"\tExplained Variance: {pca.explained_variance_}")
    print(f"\tExplained Variance Ratio: {pca.explained_variance_ratio_}")

    log_experiment(
        out_path,
        metrics=dict(
            explained_variance_=pca.explained_variance_,
            explained_variance_ratio_=pca.explained_variance_ratio_,
        ),
    )
Пример #3
0
def train_model(epochs, labels):
    '''

    :param epochs:  eeg epochs of shape trials x channels x samples
    :return:
    '''
    params = read_params('config.json')
    model = create_model(params=params,
                         num_channels=epochs.shape[1],
                         num_samples=epochs.shape[2])

    path_to_models_dir = params['path_to_models_dir']
    path_to_model = os.path.join(os.path.dirname(__file__), path_to_models_dir,
                                 str(int(time.time())))
    max_epochs = params['max_epochs']
    labels = to_categorical(labels, 2)
    epochs = epochs[:, np.newaxis, :, :]
    model, mean_val_aucs = cv_per_subj_test(x_tr_val=epochs,
                                            y_tr_val=labels,
                                            model=model,
                                            model_path=path_to_model,
                                            max_epochs=max_epochs,
                                            block_mode=False,
                                            plot_fold_history=True)
    return model, mean_val_aucs
Пример #4
0
    def Setup(self):
        utils.say("Oooo 'ello, I'm Mrs. Premise!")
        self.Params = utils.read_params()

        try:
            self.Semaphore = sysv_ipc.Semaphore(self.Params["KEY"],
                                                sysv_ipc.IPC_CREX)
        except sysv_ipc.ExistentialError as err:
            self.Logger.debug(err)
            # One of my peers created the semaphore already
            self.Semaphore = sysv_ipc.Semaphore(self.Params["KEY"])
            # Waiting for that peer to do the first acquire or release
            while not self.Semaphore.o_time:
                time.sleep(.1)
        else:
            # Initializing sem.o_time to nonzero value
            self.Semaphore.release()
            # Now the semaphore is safe to use.

        try:
            self.Memory = sysv_ipc.SharedMemory(self.Params["KEY"],
                                                sysv_ipc.IPC_CREX)
        except sysv_ipc.ExistentialError as err:
            self.Logger.debug(err)
            self.Memory = sysv_ipc.SharedMemory(self.Params["KEY"])
        else:
            self.Memory.release()

        self.Logger.debug("Setup done")
        return True
Пример #5
0
def main(data_path='data/split/',
         feature_path='data/features/',
         out_path='data/pca/'):
    X_train, X_test, y_train, y_test = read_data(data_path)

    params = read_params('params.yaml', 'pca')
    pca = PCA(**params).fit(X_train)

    train_feature = pd.DataFrame(pca.transform(X_train))
    test_feature = pd.DataFrame(pca.transform(X_test))
    train_feature['class'] = y_train
    test_feature['class'] = y_test

    if not os.path.isdir(feature_path):
        os.mkdir(feature_path)
    train_feature.to_csv(f'{feature_path}train.csv', index=False)
    test_feature.to_csv(f'{feature_path}test.csv', index=False)
    save_results(out_path, pca, None)

    print(f'Finished Feature Engineering:\nStats:')
    print(f'\tExplained Variance: {pca.explained_variance_}')
    print(f'\tExplained Variance Ratio: {pca.explained_variance_ratio_}')

    log_experiment(
        out_path,
        params=params,
        metrics=dict(explained_variance_=pca.explained_variance_,
                     explained_variance_ratio_=pca.explained_variance_ratio_))
Пример #6
0
def main(name, argv):
    if len(argv) != 1:
        print_usage(name)
        return

    params = utils.read_params(argv[0])
    cluster = cl.getCluster(params['ClusterName'])
    cluster.runSingle("python " + utils.SCRIPTS_FOL + 'auto.py ' + argv[0])
Пример #7
0
 def __init__(self):
     Process.__init__(self)
     self.Logger = logging.getLogger(__class__.__name__)
     self.State = "IDLE"
     self.Cntr = 0
     self.Running = True
     self.Params = utils.read_params()
     self.CreateFiles()
Пример #8
0
    def load(cls,
             model_id: str,
             envs_to_set=None,
             transfer=False,
             total_training_updates=None,
             total_timesteps=None):
        #TODO This function does not update trainer/optimizer variables (e.g. momentum). As such training after using this function may lead to less-than-optimal results.
        if transfer:
            if total_training_updates is None or total_timesteps is None:
                raise ValueError(
                    "If transfer learning is active total_train_steps and num_timesteps must be provided!"
                )
            if not (total_timesteps == int(total_timesteps)
                    and total_training_updates == int(total_training_updates)):
                raise TypeError(
                    "total_train_steps and num_timesteps must be integers")

        load_path = os.path.join(config.model_path, model_id)
        weights, params = utils._load_model_from_file(load_path, "multitask")

        model = cls(policy=params['policy_name'],
                    env_dict=None,
                    _init_setup_model=False)
        model.__dict__.update(params)

        model.num_timesteps = total_timesteps
        model.total_train_steps = total_training_updates
        tasks = params["tasks"]

        params = utils.read_params(model_id, "multitask")
        env_kwargs = params['env_kwargs']

        if transfer:
            tasks_to_set = list(envs_to_set.keys())
            if tasks == tasks_to_set:
                model.set_envs(envs_to_set, tasks)
                model.setup_train_model(transfer=True)

            else:
                print(
                    "The envs passed as argument is not corresponding to the envs that the model "
                    "is trained on.\n Trained on: {} \n Passed: {}".format(
                        tasks, tasks_to_set))
                sys.exit()
        else:
            model.setup_step_model()
            env_kwargs['episode_life'] = False
            env_kwargs['clip_rewards'] = False
            model.set_envs_by_name(tasks, env_kwargs=env_kwargs)

        restores = []
        for param, loaded_weight in zip(model.trainable_variables, weights):
            restores.append(param.assign(loaded_weight))
        model.sess.run(restores)

        model.sess.graph.finalize()

        return model, tasks
Пример #9
0
    def test_all(self):
        list = [v for v in tf.global_variables() if '_t' or 'dense' in v.name]
        saver = tf.train.Saver(var_list=list)

        if (self.large):
            S_train = utils.read_large_data(self.train_mat)
        else:
            S_train = utils.read_data(self.train_mat)
        idxs = np.random.permutation(S_train.shape[0])
        S_train = S_train[idxs]
        S_max, S_min = utils.max_min(S_train, self.n_train)
        del S_train

        print('Loading testing snapshot matrix...')
        if (self.large):
            self.S_test = utils.read_large_data(self.test_mat)
        else:
            self.S_test = utils.read_data(self.test_mat)

        utils.scaling(self.S_test, S_max, S_min)

        if (self.zero_padding):
            self.S_test = utils.zero_pad(self.S_test, self.p)

        print('Loading testing parameters...')
        self.params_test = utils.read_params(self.test_params)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            ckpt = tf.train.get_checkpoint_state(
                os.path.dirname(self.checkpoints_folder + '/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                print(ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
                self.test_once(sess, self.init)

                utils.inverse_scaling(self.U_h, S_max, S_min)
                utils.inverse_scaling(self.S_test, S_max, S_min)
                n_test = self.S_test.shape[0] // self.N_t
                err = np.zeros((n_test, 1))
                for i in range(n_test):
                    num = np.sqrt(
                        np.mean(
                            np.linalg.norm(
                                self.S_test[i * self.N_t:(i + 1) * self.N_t] -
                                self.U_h[i * self.N_t:(i + 1) * self.N_t],
                                2,
                                axis=1)**2))
                    den = np.sqrt(
                        np.mean(
                            np.linalg.norm(self.S_test[i * self.N_t:(i + 1) *
                                                       self.N_t],
                                           2,
                                           axis=1)**2))
                    err[i] = num / den
                print('Error indicator epsilon_rel: {0}'.format(np.mean(err)))
Пример #10
0
def main():
    params = read_params(cfg_file_path)
    trainfiles, testfiles = filesplit(DATADIR)
    savefilenames(SAVEPATH + 'filenames/', trainfiles, testfiles)
    trainfiles = read_names('../../scratch/bd_lstm/filenames/trainfiles.txt')
    trainset = DataSet(root_dir=DATADIR,
                       files=trainfiles,
                       normalize=False,
                       seq_len=params['slice_size'],
                       stride=params['stride'])
    train(SAVEPATH + 'trainstats/', trainset, params)
Пример #11
0
def plot():
    params_file = 'params_txtfiles/params_peer_critical_lpub.txt'
    all_params = read_params(params_file, int_params=['n'])
    pi_A, pi_B = all_params['pi_A'][0], all_params['pi_B'][0]
    params_order = ['pi_A', 'pi_B']
    params_to_use = [all_params[lab] for lab in params_order]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for params_set in product(*params_to_use):
        params = {lab: params_set[i] for i, lab in enumerate(params_order)}
        all_lpub_values = []
        colors = ['#ffb347', '#b347ff', '#47ffb3']
        linestyles = ['--', '-', '-.']
        for i, pb in enumerate([0.3, 0.5, 0.7]):

            pa_values = np.arange(0.01, 1.01, 0.01)
            # find critical lpub for each pa_value
            lpub_values = [
                find_critical(pa, pb, pi_A, pi_B) for pa in pa_values
            ]

            ax.plot(pa_values,
                    lpub_values,
                    color=colors[i],
                    label=r'$\pi_b=%.1f$' % pb,
                    linestyle=linestyles[i])

            ax.grid(True, linestyle=':')
            all_lpub_values += lpub_values

        #ax.legend(bbox_to_anchor=(0.5,0.17), loc='center')
        ax.legend()
        title = gen_str_params(params,
                               params_order=params_order,
                               symbols=params2latex).replace('\n', '')

        ax.set_xlabel(r'$\pi_a$')
        ax.set_ylabel(r'critical $\lambda_{pub}$')

        ax.set_xlim([-0.05, 1.05])
        ax.set_ylim([-0.05, np.max(all_lpub_values) + 0.05])
        ax.set_xticks([0., 0.2, 0.4, 0.6, 0.8, 1.])

        plt.tight_layout()

        str_params = gen_str_params(params, params_order=params_order)
        uniq_param = '_'.join(
            str_params.replace('\n', '').replace('$', '').split('; '))

        plt.savefig('figs/peer/critical_lpub_%s.pdf' % uniq_param)
        ax.set_title(title)
        plt.savefig('figs/peer/critical_lpub_%s.png' % uniq_param)
Пример #12
0
def main():
    utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

    params = utils.read_params()

    semaphore = sysv_ipc.Semaphore(params["KEY"])
    memory = sysv_ipc.SharedMemory(params["KEY"])

    utils.say("memory attached at %d" % memory.address)

    what_i_wrote = ""
    s = ""

    for i in range(0, params["ITERATIONS"]):
        utils.say("i = %d" % i)
        if not params["LIVE_DANGEROUSLY"]:
            # Wait for Mrs. Premise to free up the semaphore.
            utils.say("acquiring the semaphore...")
            semaphore.acquire()

        s = utils.read_from_memory(memory)

        while s == what_i_wrote:
            if not params["LIVE_DANGEROUSLY"]:
                # Release the semaphore...
                utils.say("releasing the semaphore")
                semaphore.release()
                # ...and wait for it to become available again.
                utils.say("acquiring for the semaphore...")
                semaphore.acquire()

            s = utils.read_from_memory(memory)

        if what_i_wrote:
            if PY_MAJOR_VERSION > 2:
                what_i_wrote = what_i_wrote.encode()
            try:
                assert(s == hashlib.md5(what_i_wrote).hexdigest())
            except AssertionError:
                raise AssertionError("Shared memory corruption after %d iterations." % i)

        if PY_MAJOR_VERSION > 2:
            s = s.encode()
        what_i_wrote = hashlib.md5(s).hexdigest()

        utils.write_to_memory(memory, what_i_wrote)

        if not params["LIVE_DANGEROUSLY"]:
            utils.say("releasing the semaphore")
            semaphore.release()
        # TODO: remove
        time.sleep(1)
Пример #13
0
def destroy_shm_semaphore(semaphore, mapfile):
    params = utils.read_params()

    utils.say("Destroying semaphore and shared memory.")
    mapfile.close()
    # I could call memory.unlink() here but in order to demonstrate
    # unlinking at the module level I'll do it that way.
    posix_ipc.unlink_shared_memory(params["SHARED_MEMORY_NAME"])

    semaphore.release()

    # I could also unlink the semaphore by calling
    # posix_ipc.unlink_semaphore() but I'll do it this way instead.
    semaphore.unlink()
Пример #14
0
def main(data_path="data/features/", out_path="data/models/svc/"):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = "LinearSVC"
    params = read_params("params.yaml", "svc")
    model = LinearSVC(**params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
Пример #15
0
def main():
    df = read_data()
    params = read_params('s1')
    df = create_bins(df, params['q1'], params['q2'])
    print(df.bin.value_counts())
    df.reset_index(inplace=True)

    # save the new bins
    df[['parcelid', 'bin']].to_csv('data/s1_intermediate/train_bins.csv',
                                   index=False)

    # calculate median output per bin and save it
    logerr_per_bin = calculate_logerror_per_bin(df)
    logerr_per_bin.to_csv('data/s1_intermediate/train_logerror_per_bin.csv',
                          index=False)
Пример #16
0
def main(data_path='data/features/', out_path='data/models/logistic/'):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = 'LogisticRegression'
    params = read_params('params.yaml', 'logistic')
    model = LogisticRegression(**params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   params=params,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
Пример #17
0
def loadModelThread():
    global Trained_model
    params = read_params('config.json')
    path_to_models_dir = params['path_to_models_dir']
    path_to_model = os.path.dirname(__file__) + path_to_models_dir + '/**/*.hdf5'
    filename_list = glob.glob(path_to_model)
    if len(filename_list) > 0:
        filename_ = filename_list[-1]
        Trained_model = load_model(filename_)

    global ClassifierPrepared
    if Trained_model is None:
        ClassifierPrepared = False
    else:
        ClassifierPrepared = True

    return
Пример #18
0
def control_daemon(log_path, heat_pin, timeout):
    param_path = '%s.params' % log_path

    while True:
        df = utils.read_log(log_path, timeout=timeout)

        params = utils.read_params(param_path, timeout=timeout)
        heat_on = control_params(320, df['time'].values,
                                 df['heat_on'].values,
                                 df['temperature'].values,
                                 params)

        import ipdb; ipdb.set_trace()

        if heat_on:
            GPIO.output(heat_pin, GPIO.HIGH)
        else:
            GPIO.output(heat_pin, GPIO.LOW)
Пример #19
0
def main(data_path='data/features/',
         out_path='data/models/r_forrest/',
         n_estimators=10,
         max_samples=30):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = 'RandomForrest'
    params = read_params('params.yaml', 'forrest')
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   params=params,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
Пример #20
0
def main():
    '''Perform species classification.'''

    # Get dictionary of paramters (query_dict in GUI)
    cwd = os.getcwd()
    param_dict = read_params(cwd)

    # Get paths
    output_dir, aml_path = os.path.split(param_dict['filepath'])
    aml_clean_path = aml_path.split('.')[0] + '-clean.csv'
    
    file_dir = os.path.dirname(__file__)
    class_dir = os.path.join(file_dir, os.path.pardir, 'data')
    class_path = os.path.join(class_dir, param_dict['classifier'])

    # Write scan clean file
    write_aml_clean(aml_path, aml_clean_path)

    # Run classifier and generate output
    classify_calls(aml_clean_path, class_path, param_dict)
Пример #21
0
def init_shm_semaphore():
    params = utils.read_params()

    # Create the shared memory and the semaphore.
    memory = posix_ipc.SharedMemory(params["SHARED_MEMORY_NAME"], posix_ipc.O_CREAT,
                                    size=params["SHM_SIZE"])
    semaphore = posix_ipc.Semaphore(params["SEMAPHORE_NAME"], posix_ipc.O_CREAT)

    # MMap the shared memory
    mapfile = mmap.mmap(memory.fd, memory.size)

    # Once I've mmapped the file descriptor, I can close it without
    # interfering with the mmap.
    memory.close_fd()

    # I seed the shared memory with a random string (the current time).
    what_i_wrote = time.asctime()
    utils.write_to_memory(mapfile, what_i_wrote)

    return semaphore, mapfile, what_i_wrote
Пример #22
0
def main(data_path='data/features/',
         model_path='data/models/',
         out_path='data/models/ensemble/'):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = 'Ensemble'
    params = read_params('params.yaml', 'ensemble')
    cl1 = load_model(f'{model_path}/logistic/')
    cl2 = load_model(f'{model_path}/svc/')
    cl3 = load_model(f'{model_path}/r_forrest/')
    estimators = [('l_regression', cl1), ('l_svc', cl2), ('r_forrest', cl3)]

    model = VotingClassifier(estimators, **params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
Пример #23
0
    def __init__(self, model_id: str, n_steps: int, input_len: int,
                 output_len: int):
        self.model_id = model_id
        self.transfer = True if os.path.exists(
            os.path.join(config.model_path, model_id)) and model_id else False
        if self.transfer:
            params = read_params(model_id, "meta")
            self.input_len = params['input_length']
            self.output_len = params['output_length']
            self.n_steps = params['n_steps']
            self.window_size = self.n_steps // 2 + 1 if self.n_steps < 15 else self.n_steps // 3
            self.gamma = params['gamma']
            self.meta_learner = MetaA2CModel.load(self.model_id,
                                                  self.input_len,
                                                  self.output_len)
        else:
            self.input_len = input_len
            self.output_len = output_len
            self.n_steps = n_steps
            self.window_size = self.n_steps // 2 + 1 if self.n_steps < 15 else self.n_steps // 3
            self.gamma = 0.8
            self.meta_learner = MetaA2CModel(self.input_len,
                                             self.output_len,
                                             self.n_steps,
                                             window_size=self.window_size,
                                             gamma=self.gamma)

        self.input = deque([np.zeros(self.input_len)] * self.window_size,
                           maxlen=self.window_size)
        self.input_batch = deque(
            [np.zeros([self.window_size, self.input_len])] * self.n_steps,
            maxlen=self.n_steps)
        self.reward_batch = deque([0.0] * self.n_steps, maxlen=self.n_steps)
        self.action_batch = deque([0] * self.n_steps, maxlen=self.n_steps)
        self.value_batch = deque([0.0] * self.n_steps, maxlen=self.n_steps)
        self.train_step = 0
def main():
    if not os.path.isdir(options.base_dir):
        os.mkdir(options.base_dir)  #create working directory
    print options.action

    if options.action == "write_orthos":
        cds_dic = utils.read_params(options.param_file)
        ortho_dic = utils.read_orthofile(options.orthofile_format,
                                         options.ortho_file)
        index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix)
        seq_dic = utils.get_cds_files(cds_dic)
        if options.no_paralogs:
            utils.write_orthos(
                options.ortho_file, seq_dic,
                "%s/%s_orthos" % (options.base_dir, options.prefix),
                index_file)
        else:
            utils.write_orthoparagroups(
                ortho_dic, seq_dic,
                "%s/%s_orthos" % (options.base_dir, options.prefix),
                index_file, options.min_taxa)
        print "Orthogroups written to %s/%s_orthos" % (options.base_dir,
                                                       options.prefix)
        print "Exiting"
        sys.exit()

    if options.action == "write_cnees":
        ncar_dic = utils.read_params(options.param_file)
        ortho_dic = utils.ncar_ortho_dic(
            options.ncar_ortho_file, options.min_taxa
        )  #this needs to be "filtered_loci.index" from the NCAR pipeline
        seq_dic = utils.get_cds_files(ncar_dic)
        index_file = "%s/%s_ncar_ortho.index" % (options.base_dir,
                                                 options.prefix)
        utils.write_ncar_cnees(
            ortho_dic, seq_dic,
            "%s/%s_ncars" % (options.base_dir, options.prefix),
            options.min_taxa, index_file)
        sys.exit()

###Align coding sequences and concatenate all protein sequences into
###an aligned matrix that can be input into RAxML to make a phylogeny.
    if options.action == "align_coding":
        cds_dic = utils.read_params(options.param_file)
        index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix)
        paras_allowed = True
        og_list = utils.read_ortho_index(
            index_file, options.min_taxa, paras_allowed
        )  #Gets list of OGs that meet minimum taxa requirement. If paras_allowed is False then will not return any OGs with any paralogs in them.
        iscoding = True
        utils.fsa_coding_align(
            og_list, "%s/%s_orthos/" % (options.base_dir, options.prefix),
            "%s/%s_fsa_coding" % (options.base_dir, options.prefix),
            options.num_threads, iscoding)
        print "Orthogroups aligned using FSA and output written to %s/%s_fsa_coding" % (
            options.base_dir, options.prefix)
        paras_allowed = False
        og_list = utils.read_ortho_index(
            index_file, len(cds_dic.keys()), paras_allowed
        )  #Gets only those OGs that have a single sequence for every species in the study. This is for making a sequence matrix that can be used for phylogenetics.
        utils.concatenate_for_raxml(
            "%s/%s_fsa_coding" % (options.base_dir, options.prefix),
            "%s/%s.afa" % (options.base_dir, options.prefix), og_list,
            cds_dic.keys())
        print "If you would like to run a phylogenetic analysis, a concatenated amino acid sequence matrix of all orthogroups including all %s of the species in your study has been written to %s/%s.afa" % (
            len(cds_dic.keys()), options.base_dir, options.prefix)
        print "Exiting"
        sys.exit()

    if options.action == "fourfold_matrix":
        cds_dic = utils.read_params(options.param_file)
        index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix)
        paras_allowed = False
        og_list = utils.read_ortho_index(
            index_file, len(cds_dic.keys()), paras_allowed
        )  #Gets only those OGs that have a single sequence for every species in the study. This is for making a sequence matrix that can be used for phylogenetics.
        utils.concatenate_fourf_for_raxml(
            "%s/%s_gene_ancestral" % (options.base_dir, options.prefix),
            "%s/%s_fourfold.afa" % (options.base_dir, options.prefix), og_list,
            cds_dic.keys())
        sys.exit()

    if options.action == "align_ncars":
        iscoding = False
        ortho_dic = utils.ncar_ortho_dic(
            options.ncar_ortho_file, options.min_taxa
        )  #this needs to be "filtered_loci.index" from the NCAR pipeline
        ncar_list = ortho_dic.keys()
        utils.fsa_ncar_align(
            ncar_list, "%s/%s_ncars" % (options.base_dir, options.prefix),
            "%s/%s_fsa_ncar" % (options.base_dir, options.prefix),
            options.num_threads, iscoding)
        sys.exit()

    if options.action == "pairs_coding_div":
        coding_ortho_dic = utils.read_orthofile("orthofinder",
                                                options.ortho_file)
        exclude_paras = True
        og_list = utils.min_taxa_membership(
            {(options.inspecies, options.outspecies): 2}, {}, [],
            "%s/%s_filtered.index" % (options.base_dir, options.prefix),
            options.min_taxa, exclude_paras)
        good_coding_ortho_dic = {}
        #        og_list = [10644, 11419, 12394, 11141, 11231, 11334, 11341]
        for og in og_list:
            good_coding_ortho_dic[og] = coding_ortho_dic[og]

        pickle_dir = options.pickle_dir
        utils.pairs_coding_div(
            options.inspecies, options.outspecies, good_coding_ortho_dic,
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix), options.base_dir,
            options.num_threads, options.min_taxa)
        sys.exit()

    if options.action == "alignment_filter":
        index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix)
        paras_allowed = True
        og_list = utils.read_ortho_index(index_file, options.min_taxa,
                                         paras_allowed)
        utils.alignment_column_filtering(
            "%s/%s_fsa_coding" % (options.base_dir, options.prefix),
            "%s/%s_fsa_coding_columnfilt" % (options.base_dir, options.prefix),
            og_list, options.nogap_min_count, options.nogap_min_prop,
            options.nogap_min_species, {}, options.num_threads)
        print "First iteration of column filtering done. Results written to %s/%s_fsa_coding_columnfilt" % (
            options.base_dir, options.prefix)
        print "Starting Jarvis filter."
        utils.jarvis_filtering(
            og_list,
            "%s/%s_fsa_coding_columnfilt" % (options.base_dir, options.prefix),
            "%s/%s_fsa_coding_jarvis" % (options.base_dir, options.prefix),
            options.min_cds_len, options.num_threads)
        print "Jarvis filtering done. Results written to %s/%s_fsa_coding_jarvis" % (
            options.base_dir, options.prefix)
        utils.alignment_column_filtering(
            "%s/%s_fsa_coding_jarvis" % (options.base_dir, options.prefix),
            "%s/%s_fsa_coding_jarvis_columnfilt" %
            (options.base_dir, options.prefix), og_list,
            options.nogap_min_count, options.nogap_min_prop,
            options.nogap_min_species, {}, options.num_threads)
        print "Second iteration of column filtering done. Results written to %s/%s_fsa_coding_jarvis_columnfilt" % (
            options.base_dir, options.prefix)
        utils.sequence_gap_filtering(
            "%s/%s_fsa_coding_jarvis_columnfilt" %
            (options.base_dir, options.prefix),
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt" %
            (options.base_dir, options.prefix),
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix),
            "%s/%s_orthos" % (options.base_dir, options.prefix), og_list,
            options.min_seq_prop_kept, options.max_seq_prop_gap,
            options.min_cds_len,
            "%s/%s_filtered.index" % (options.base_dir, options.prefix))
        print "Filtering of whole sequences based on gap content done. Results written to %s/%s_fsa_coding_jarvis_columnfilt_seqfilt" % (
            options.base_dir, options.prefix)
        print "Exiting"
        sys.exit()

    if options.action == "rer_converge":
        test_type = "aaml_blengths"
        foreground = "aaml_blengths"
        exclude_paras = True
        manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic(
            options.taxa_inclusion)
        og_list = utils.min_taxa_membership(
            manda_taxa, multi_taxa, remove_list,
            "%s/%s_filtered.index" % (options.base_dir, options.prefix),
            options.min_taxa, exclude_paras)
        print len(og_list)
        print og_list
        utils.paml_test(
            og_list, foreground, test_type,
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix),
            "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground,
                             options.outputfile.split(".")[0]),
            options.tree_file, options.num_threads, options.use_gblocks,
            options.min_taxa, remove_list)
        utils.read_aaml_phylos(
            og_list,
            "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground,
                             options.outputfile.split(".")[0]),
            "%s/aaml_compiled" % (options.base_dir), options.outputfile,
            options.min_taxa)
        sys.exit()

    if options.action == "nopara_gene_trees":
        constrained = False
        paras_allowed = True
        include_paras = False
        og_list = utils.read_ortho_index(
            "%s/%s_filtered.index" % (options.base_dir, options.prefix),
            options.min_taxa, paras_allowed)
        cur_og_list = og_list
        utils.gene_trees(
            cur_og_list, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix),
            "%s/%s_nopara_nucl_gene_trees" %
            (options.base_dir, options.prefix), constrained, options.tree_file,
            options.num_threads, "nucs")
        sys.exit()

    if options.action == "check_discordance":
        paras_allowed = True
        og_list = utils.read_ortho_index(
            "%s/%s_filtered.index" % (options.base_dir, options.prefix),
            options.min_taxa, paras_allowed)
        utils.discordance(
            og_list, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix),
            "%s/%s_nopara_nucl_gene_trees" %
            (options.base_dir, options.prefix),
            "%s/%s_discordance" % (options.base_dir, options.prefix),
            options.tree_file, options.num_threads)
        utils.read_discordance(
            "%s/%s_discordance" % (options.base_dir, options.prefix), og_list,
            options.base_dir)
        sys.exit()

    if options.action == "rer_goatools":
        if not os.path.exists("%s/RER_goatools" % options.base_dir):
            os.mkdir("%s/RER_goatools" % options.base_dir)
        rerconverge_output = options.rerconverge_output
        short_outputname = rerconverge_output.split("/")[-1][0:-4]
        utils.rer_goatools(
            rerconverge_output, rerconverge_output, options.go_database,
            "%s/RER_goatools/rer_0.05_slower_go_%s" %
            (options.base_dir, short_outputname), 3, 0.05, "slow")
        utils.rer_goatools(
            rerconverge_output, rerconverge_output, options.go_database,
            "%s/RER_goatools/rer_0.05_faster_go_%s" %
            (options.base_dir, short_outputname), 3, 0.05, "fast")
        sys.exit()

    if options.action == "goatools":
        outbase = options.goa_forefile.split("/")[-1].rsplit(".", 1)[0]
        if not os.path.exists("%s/%s_goatools/" %
                              (options.base_dir, options.prefix)):
            os.mkdir("%s/%s_goatools/" % (options.base_dir, options.prefix))
        outdir = "%s/%s_goatools/%s" % (options.base_dir, options.prefix,
                                        outbase)
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        utils.og_list_goatools(options.goa_forefile, options.goa_backfile,
                               options.go_database, outdir)
        sys.exit()

    if options.action == "hyphy_relax":
        test_type = "RELAX"
        exclude_paras = True
        og_list = []
        manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic(
            options.taxa_inclusion)

        if options.og_list_file:

            reader = open(options.og_list_file, 'rU')
            for line in reader:
                cur_og = int(line.strip())
                og_list.append(cur_og)
        else:

            og_list = utils.min_taxa_membership(
                manda_taxa, multi_taxa, remove_list,
                "%s/%s_filtered.index" % (options.base_dir, options.prefix),
                options.min_taxa, exclude_paras)

        print len(og_list)
        if options.foreground == "INTREE":
            fore_list = "INTREE"
#            cur_og_list = og_list
        elif options.foreground.startswith("DAUGHTERS"):
            fore_list = options.foreground.split(",")
        else:
            fore_list = options.foreground.split(",")


#        og_list = [10724, 11488, 12704, 13036, 13879, 15282]
        print options.foreground
        utils.paml_test(
            og_list, fore_list, test_type,
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix), "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, options.foreground, test_type),
            options.tree_file, options.num_threads, options.use_gblocks,
            options.min_taxa, remove_list)
        utils.read_hyphy_relax(
            og_list, "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, options.foreground, test_type),
            options.base_dir, options.foreground)
        sys.exit()

    if options.action == "hyphy_relax_permutation":
        if not os.path.exists("%s/relax_permutations" % options.base_dir):
            os.mkdir("%s/relax_permutations" % options.base_dir)
        test_type = "RELAX"
        exclude_paras = True
        og_list = []
        manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic(
            options.taxa_inclusion)

        if options.og_list_file:

            reader = open(options.og_list_file, 'rU')
            for line in reader:
                cur_og = int(line.strip())
                og_list.append(cur_og)
        else:

            og_list = utils.min_taxa_membership(
                manda_taxa, multi_taxa, remove_list,
                "%s/%s_filtered.index" % (options.base_dir, options.prefix),
                options.min_taxa, exclude_paras)

        print len(og_list)
        fore_list = options.foreground.split(",")
        random.shuffle(fore_list)
        fore_list = fore_list[0:6]
        #        og_list = [10724, 11488, 12704, 13036, 13879, 15282]
        print options.foreground
        print(fore_list)
        perm_fores = open(
            "%s/relax_permutations/foreground_perm_%s.txt" %
            (options.base_dir, options.hyphy_perm_num), 'w')
        perm_fores.write(",".join(fore_list))
        perm_fores.close()
        utils.paml_test(
            og_list, fore_list, test_type,
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix),
            "%s/relax_permutations/%s_%s_%s_%s" %
            (options.base_dir, options.prefix, ",".join(fore_list), test_type,
             options.hyphy_perm_num), options.tree_file, options.num_threads,
            options.use_gblocks, options.min_taxa, remove_list)
        utils.read_hyphy_relax(
            og_list, "%s/relax_permutations/%s_%s_%s_%s" %
            (options.base_dir, options.prefix, ",".join(fore_list), test_type,
             options.hyphy_perm_num),
            "%s/relax_permutations/" % options.base_dir,
            options.hyphy_perm_num)
        sys.exit()

    if options.action == "hyphy_absrel":
        test_type = "aBSREL"
        exclude_paras = True
        manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic(
            options.taxa_inclusion)
        og_list = []
        if options.og_list_file:

            reader = open(options.og_list_file, 'rU')
            for line in reader:
                cur_og = int(line.strip())
                og_list.append(cur_og)
        else:

            og_list = utils.min_taxa_membership(
                manda_taxa, multi_taxa, remove_list,
                "%s/%s_filtered.index" % (options.base_dir, options.prefix),
                options.min_taxa, exclude_paras)

        print len(og_list)
        #        print og_list[0:10]
        og_list = utils.limit_list(og_list, options.min_og_group,
                                   options.max_og_group)
        #        og_list = og_list[0:10]
        utils.paml_test(
            og_list, [], test_type,
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix), "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, "all", test_type),
            options.tree_file, options.num_threads, options.use_gblocks,
            options.min_taxa, remove_list)
        utils.read_hyphy_absrel(
            og_list, "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, "all", test_type),
            options.base_dir)
        sys.exit()

    if options.action == "godatabase":
        gaf_file = "/Genomics/kocherlab/berubin/annotation/trinotate/AMEL/AMEL.gaf"
        gaf_file = "/Genomics/kocherlab/berubin/annotation/trinotate/PGRA/PGRA.gaf"
        gaf_file = "/Genomics/kocherlab/berubin/annotation/trinotate/ACEP/ACEP.gaf"
        gaf_file = "/Genomics/kocherlab/berubin/annotation/hic/trinotate/LALB/LALB.gaf"
        utils.make_go_database(ortho_dic, ipr_taxa_list,
                               "%s/%s" % (options.base_dir, options.prefix),
                               gaf_file)
        sys.exit()

    if options.action == "yn_dnds":
        #        paras_allowed = True
        exclude_paras = True
        manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic(
            options.taxa_inclusion)
        og_list = utils.min_taxa_membership(
            manda_taxa, multi_taxa, remove_list,
            "%s/%s_filtered.index" % (options.base_dir, options.prefix),
            options.min_taxa, exclude_paras)
        print len(og_list)
        #        og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed)
        utils.yn_estimates(
            og_list, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix),
            "%s/%s_yn" % (options.base_dir, options.prefix), options.tree_file,
            options.min_taxa, options.use_gblocks, remove_list)
        sys.exit()

    if options.action == "gc_content":
        if not os.path.isdir("%s/%s_gc_content" %
                             (options.base_dir, options.prefix)):
            os.mkdir("%s/%s_gc_content" % (options.base_dir, options.prefix))
        paras_allowed = True
        og_list = utils.read_ortho_index(index_file, options.min_taxa,
                                         paras_allowed)
        utils.gc_content(
            og_list, "%s/%s_fsa_coding" % (options.base_dir, options.prefix),
            "%s/%s_gc_content" % (options.base_dir, options.prefix))
        sys.exit()

    if options.action == "free_ratios":
        test_type = "free"
        foreground = "free"
        get_dn_ds = True
        exclude_paras = True
        manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic(
            options.taxa_inclusion)
        if options.og_list_file:
            reader = open(options.og_list_file, 'rU')
            for line in reader:
                cur_og = int(line.strip())
                og_list.append(cur_og)
        else:
            og_list = utils.min_taxa_membership(
                manda_taxa, multi_taxa, remove_list,
                "%s/%s_filtered.index" % (options.base_dir, options.prefix),
                options.min_taxa, exclude_paras)
        print len(og_list)
        #        og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed)

        utils.paml_test(
            og_list, foreground, test_type,
            "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" %
            (options.base_dir, options.prefix), "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            options.tree_file, options.num_threads, options.use_gblocks,
            options.min_taxa, remove_list)
        cur_og_list = og_list
        utils.read_frees(
            "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s_%s_%s_results" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s.gaf" % (options.base_dir, options.prefix),
            "%s/%s_%s_%s_go" %
            (options.base_dir, options.prefix, foreground, test_type),
            get_dn_ds, options.tree_file, cur_og_list)
        sys.exit()

    if options.action == "time_aamls":
        paras_allowed = True
        foreground = "aaml_blengths"
        test_type = "aaml_blengths"
        index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix)
        fore_list = options.foreground.split(",")
        og_list = utils.read_ortho_index(index_file, options.min_taxa,
                                         paras_allowed)
        #        og_list = [3576]
        utils.aaml_time_phylos(
            og_list, "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s_aaml_time_calibrated" % (options.base_dir, options.prefix),
            options.timetree, fore_list)
        sys.exit()

    if options.action == "ds_correlations":
        paras_allowed = True
        og_list = utils.read_ortho_index(index_file, options.min_taxa,
                                         paras_allowed)
        #        og_list = og_list[0:100]
        bootstrap_taxa = True
        categorical = False
        utils.bootstrapping_ds_time_correlations(
            og_list, "%s/%s_free_free" % (options.base_dir, options.prefix),
            "%s/%s_ds_corrs" % (options.base_dir, options.prefix),
            options.timetree, options.traittree, bootstrap_taxa, categorical)
        sys.exit()

    if options.action == "branch_test":
        test_type = "branch"
        foreground = options.foreground
        og_list = utils.read_ortho_index(index_file, options.min_taxa,
                                         paras_allowed)
        cur_og_list = og_list
        #        cur_og_list = utils.target_taxa_in_og(ortho_dic, target_taxa, og_list)
        utils.paml_test(
            cur_og_list, foreground, test_type,
            "%s/%s_fsa_coding" % (options.base_dir, options.prefix),
            "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            options.tree_file, options.num_threads)
        utils.test_lrt_branch(
            "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s_%s_%s.lrt" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s.gaf" % (options.base_dir, options.prefix), ortho_dic,
            "%s/%s_%s_%s_go" %
            (options.base_dir, options.prefix, foreground, test_type))
        sys.exit()

    if options.action == "bs_test":
        test_type = "bs"
        foreground = "solitary"
        og_list = utils.read_ortho_index(index_file, options.min_taxa,
                                         paras_allowed)
        cur_og_list = og_list
        foreground = options.foreground
        utils.paml_test(
            cur_og_list, foreground, test_type,
            "%s/%s_fsa_coding" % (options.base_dir, options.prefix),
            "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            options.tree_file, options.num_threads, options.use_gblocks,
            options.min_taxa)
        utils.test_lrt(
            "%s/%s_%s_%s" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s_%s_%s.lrt" %
            (options.base_dir, options.prefix, foreground, test_type),
            "%s/%s.gaf" % (options.base_dir, options.prefix), ortho_dic,
            "%s/%s_%s_%s_go" %
            (options.base_dir, options.prefix, foreground, test_type))

        sys.exit()

    if options.action == "hypergeom":
        utils.hypergeom_test(options.hyper_pop, options.hyper_pop_cond,
                             options.hyper_targets, options.hyper_targets_back)
        sys.exit()

    if options.action == "rer_hypergeom":
        utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond,
                                 options.rerconverge_output, 0, 0.05, "fast")
        utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond,
                                 options.rerconverge_output, 0, 0.05, "slow")
        utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond,
                                 options.rerconverge_output, 0, 0.01, "fast")
        utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond,
                                 options.rerconverge_output, 0, 0.01, "slow")
        sys.exit()
Пример #25
0
import sysv_ipc
import utils

params = utils.read_params()


try:
    mq = sysv_ipc.MessageQueue(params["KEY"])
    mq.remove()
    s = "message queue %d removed" % params["KEY"]
    print (s)
except:
    print ("message queue doesn't exist")
    


print ("\nAll clean!")
Пример #26
0
import shutil

import click
import pandas as pd
from deepsense import neptune
from sklearn.metrics import roc_auc_score

import pipeline_config as cfg
from pipelines import PIPELINES
from utils import init_logger, read_params, create_submission, set_seed, save_evaluation_predictions, \
    read_csv_time_chunks, cut_data_in_time_chunks, data_hash_channel_send, get_submission_hours_index

set_seed(1234)
logger = init_logger()
ctx = neptune.Context()
params = read_params(ctx)


@click.group()
def action():
    pass


@action.command()
def prepare_data():
    logger.info('chunking train')
    train = pd.read_csv(params.raw_train_filepath)
    cut_data_in_time_chunks(train,
                            timestamp_column='click_time',
                            chunks_dir=params.train_chunks_dir,
                            prefix='train',
Пример #27
0
def evaluate_any_file():
    #os.system(scp )
    filepath = '../../original/processed_data/'
    weightpath = '../../scratch/bd_lstm/trainstats/weights_middle.pth'
    demoweights = '../../scratch/bd_lstm/trainstats/demoweights.pth'
    weightpath = demoweights
    parampath = '../../code/bdrnn/conf_model.cfg'
    filenamepath = '../../scratch/bd_lstm/filenames/testfiles.txt'
    minmaxdatapath = '../../original/minmaxdata/'

    #get best file
    filenames = read_names(filenamepath)
    print(len(filenames))
    filenamedict = make_dict(filenames)
    velocity = float(
        input(
            'Give rotational velocity between 4Hz and 18Hz and the closest one is used at evaluation.\n'
        ))
    filename, velocity = find_closest(filenamedict, velocity)
    files = [filename]

    #read parameters
    params = read_params(parampath)

    #init dataset with the file we selected and model
    dataset = DataSet(root_dir=filepath,
                      files=files,
                      normalize=False,
                      seq_len=params['slice_size'],
                      stride=1000)

    loader = DataLoader(dataset,
                        batch_size=int(params['batch_size']),
                        shuffle=True)

    model = LSTM_layers(input_size=int(params['input_size']),
                        hidden_size=int(params['hidden_size']),
                        num_layers=int(params['n_layers']),
                        dropout=float(params['dropout']),
                        output_size=int(params['output_size']),
                        batch_first=True,
                        bidirectional=True)
    #RuntimeError: Attempting to deserialize object on a
    #CUDA device but torch.cuda.is_available() is False.
    #If you are running on a CPU-only machine,
    #please use torch.load with map_location='cpu' to map your storages to the CPU.

    model.load_state_dict(torch.load(weightpath, map_location='cpu'))
    model.to(device)
    model.eval()
    losses = []

    for idx, sample in enumerate(loader):
        y = sample[:, :, :2].clone().detach().requires_grad_(True).to(device)
        x = sample[:, :, 2:].clone().detach().requires_grad_(True).to(device)
        h0 = model.init_hidden(int(params['batch_size']), None).to(device)
        c0 = model.init_cell(int(params['batch_size'])).to(device)

        #compute
        output = model.forward(x, (h0, c0))
        loss = F.mse_loss(output, y)
        losses.append(loss.item())

        output, y = scale_seqs(output, y, filename, minmaxdatapath)

        if (idx % 3) == 0:
            save_this_plot(0, 2763, output[0], y[0], loss.item(), velocity)
    print("Avg loss:", np.mean(losses))
Пример #28
0
def main():
    integrate_ode = integrate_unanimity_ode
    params_file = 'params_txtfiles/params_unanimity_zA_vs_xi.txt'
    all_params = read_params(params_file, int_params=['n'])
    n = all_params['n'][0]
    #print all_params
    params_order = ['pi_A', 'pi_B', 'xi', 'lpub', 'lpriv', 'pa', 'pb']
    types_names = [
        ''.join(['z'] + ['a'] * (n - i) + ['b'] * i) for i in range(n + 1)
    ]
    tipo2latex = {
        elem: r'$z_{%s}$' % (elem[1:].upper())
        for elem in types_names
    }
    my_colors = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3']
    more_than_one = ['xi']
    init_conditions = gen_init_conditions(n)[::2]
    str_vec_in = [str(vec_in) for vec_in in init_conditions]
    df_zA = pd.DataFrame()
    linestyles = ['-', '--', '-.', ':']
    handles = []
    labels = []
    fig = plt.figure(figsize=(16, 14))
    ax = fig.add_subplot(111)
    out_count = 0
    for xi in all_params['xi']:
        params = {
            lab: all_params[lab][0]
            for i, lab in enumerate(params_order) if lab != 'xi'
        }
        params['xi'] = xi
        print(params)
        lpriv = 1 - params['lpub']
        params['lpriv'] = lpriv
        params_nums = tuple([params[lab] for lab in params_order] + [n])
        for i, vec_in in enumerate(init_conditions):
            df_temp = pd.DataFrame()
            curve_points, time_points = integrate_ode(vec_in, params_nums)
            df_temp = dynamics_to_dataframe(n,
                                            curve_points,
                                            time_points,
                                            initial_vec_str=str_vec_in[i])
            df_temp['xi'] = xi
            df_zA = pd.concat([df_zA, df_temp])

            df_aux = df_temp.sort_values(by='t')
            tipo = 'zA'
            freq_tipo = df_aux[tipo].tolist()
            time_points = df_aux['t'].tolist()
            linha, = ax.plot(time_points,
                             freq_tipo,
                             color=my_colors[i],
                             lw=2,
                             linestyle=linestyles[out_count])
            out_count += 1
            handles.append(linha)

            condicao = []
            for i, elem in enumerate(vec_in[:1]):
                pedaco_str = tipo2latex[types_names[i]] + \
                    (r'$^{init}=%.0f' % round(elem, 0))+'$'
                condicao.append(pedaco_str)

            pedaco_str = r'$z_{BBB}$' + (r'$^{init}=%.0f' %
                                         round(1 - sum(vec_in), 0)) + '$'
            condicao.append(pedaco_str)
            pedaco_str = (r'$ \xi=%.7f' % xi).rstrip('0').rstrip('.') + '$'
            condicao.append(pedaco_str)
            labels.append(';\t'.join(condicao))

    fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.17), loc='center')
    default_order = ['lpub', 'lpriv', 'pi_A', 'pi_B', 'pa', 'pb', 'xi']
    default_order = [col for col in default_order if col not in more_than_one]
    titulo = gen_str_params(params,
                            params_order=default_order,
                            symbols=params2latex,
                            exclude_keys=more_than_one).replace('\n', '')

    max_za = np.max(df_zA['zA'].tolist())
    min_za = np.min(df_zA['zA'].tolist())
    dist = 0.05 * (max_za - min_za)

    ax.set_xlim([0., 160.1])
    #ax.set_ylim([0., 0.81])
    ax.set_ylim([min_za - dist, max_za + dist])
    ax.set_xlabel(r'$t$')
    ax.set_ylabel(r'$z_A$')
    ax.grid(True)
    plt.subplots_adjust(left=0., right=0.95, top=0.75, bottom=0.4)
    str_params = gen_str_params(params,
                                params_order=params_order,
                                exclude_keys=more_than_one)

    uniq_param = '_'.join(
        str_params.replace('\n', '').replace('$', '').split('; '))
    uniq_param += '_n=%d' % n

    plt.savefig('figs/same_plot_zA_dynamics_%s.pdf' % (uniq_param),
                bbox_inches='tight')
    ax.set_title(titulo)

    #plt.tight_layout()
    plt.savefig('figs/same_plot_zA_dynamics_%s.png' % (uniq_param),
                bbox_inches='tight')

    plt.close()
Пример #29
0
    def train_all(self, n_epochs):
        if (not self.restart):
            utils.safe_mkdir(self.checkpoints_folder)
        saver = tf.train.Saver()
        train_writer = tf.summary.FileWriter('./' + self.graph_folder + '/train', tf.get_default_graph())
        test_writer = tf.summary.FileWriter('./' + self.graph_folder + '/test', tf.get_default_graph())

        print('Loading snapshot matrix...')
        if (self.large):
            S = utils.read_large_data(self.train_mat)
        else:
            S = utils.read_data(self.train_mat)

        idxs = np.random.permutation(S.shape[0])
        S = S[idxs]
        S_max, S_min = utils.max_min(S, self.n_train)
        utils.scaling(S, S_max, S_min)

        if (self.zero_padding):
            S = utils.zero_pad(S, self.p)

        self.S_train, self.S_val = S[:self.n_train, :], S[self.n_train:, :]
        del S

        print('Loading parameters...')
        params = utils.read_params(self.train_params)

        params = params[idxs]

        self.params_train, self.params_val = params[:self.n_train], params[self.n_train:]
        del params

        self.loss_best = 1
        count = 0
        with tf.Session(config = tf.ConfigProto(gpu_options = tf.GPUOptions(allow_growth = True))) as sess:
            sess.run(tf.global_variables_initializer())

            if (self.restart):
                ckpt = tf.train.get_checkpoint_state(os.path.dirname(self.checkpoints_folder + '/checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    print(ckpt.model_checkpoint_path)
                    saver.restore(sess, ckpt.model_checkpoint_path)

            step = self.g_step.eval()

            for epoch in range(n_epochs):
                step = self.train_one_epoch(sess, self.init, train_writer, epoch, step)
                total_loss_mean = self.eval_once(sess, saver, self.init, test_writer, epoch, step)
                if total_loss_mean < self.loss_best:
                    self.loss_best = total_loss_mean
                    count = 0
                else:
                    count += 1
                # early - stopping
                if count == 500:
                    print('Stopped training due to early-stopping cross-validation')
                    break
            print('Best loss on validation set: {0}'.format(self.loss_best))

        train_writer.close()
        test_writer.close()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            ckpt = tf.train.get_checkpoint_state(os.path.dirname(self.checkpoints_folder + '/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                print(ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)

            print('Loading testing snapshot matrix...')
            if (self.large):
                self.S_test = utils.read_large_data(self.test_mat)
            else:
                self.S_test = utils.read_data(self.test_mat)

            utils.scaling(self.S_test, S_max, S_min)

            if (self.zero_padding):
                self.S_test = utils.zero_pad(self.S_test, self.n)

            print('Loading testing parameters...')
            self.params_test = utils.read_params(self.test_params)

            self.test_once(sess, self.init)

            utils.inverse_scaling(self.U_h, S_max, S_min)
            utils.inverse_scaling(self.S_test, S_max, S_min)
            n_test = self.S_test.shape[0] // self.N_t
            err = np.zeros((n_test, 1))
            for i in range(n_test):
                num = np.sqrt(np.mean(np.linalg.norm(self.S_test[i * self.N_t : (i + 1) * self.N_t] - self.U_h[i * self.N_t : (i + 1) * self.N_t], 2, axis = 1) ** 2))
                den = np.sqrt(np.mean(np.linalg.norm(self.S_test[i * self.N_t : (i + 1) * self.N_t], 2, axis = 1) ** 2))
                err[i] = num / den
            print('Error indicator epsilon_rel: {0}'.format(np.mean(err)))
Пример #30
0
    def __init__(self, cfg):
        self.cfg = cfg

        self.db = dutils.init_db(self.cfg.db_path)
        self.init_post()

        self.device = torch.device(self.cfg.device)

        # dataset parameters
        if self.cfg.dataset.lower() == 'mnist':
            self.dataset = MNIST
            self.data_path = self.cfg.data_dir + 'mnist'
            self.img_size = [1, 28, 28]
            self.normalize = [(0.1307, ), (0.3081, )]
        elif self.cfg.dataset.lower() == 'cifar10':
            self.dataset = CIFAR10
            self.data_path = self.cfg.data_dir + 'cifar10'
            self.img_size = [3, 32, 32]
            self.normalize = [(0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)]
        else:
            raise NotImplementedError()

        # datasets and dataloaders
        # base transforms
        self.train_transforms = [transforms.ToTensor()]
        if self.cfg.normalize_input:
            self.train_transforms.append(
                transforms.Normalize(self.normalize[0], self.normalize[1]))
        self.val_transforms = copy.deepcopy(self.train_transforms)

        # # (if applicable) additional training set transforms defined here
        # train_transforms.extend([
        #                          ])

        self.dataset_train = self.dataset(root=self.data_path,
                                          train=True,
                                          download=True,
                                          transform=transforms.Compose(
                                              self.train_transforms),
                                          target_transform=None)
        self.dataloader_train = DataLoader(dataset=self.dataset_train,
                                           batch_size=self.cfg.batch_size,
                                           shuffle=self.cfg.shuffle,
                                           num_workers=self.cfg.num_workers,
                                           pin_memory=True,
                                           drop_last=False)

        # number of output classes (based only on training data)
        self.c_dim = len(torch.unique(self.dataset_train.targets))

        self.dataset_val = self.dataset(root=self.data_path,
                                        train=False,
                                        download=True,
                                        transform=transforms.Compose(
                                            self.val_transforms),
                                        target_transform=None)
        self.dataloader_val = DataLoader(dataset=self.dataset_val,
                                         batch_size=self.cfg.batch_size,
                                         shuffle=False,
                                         num_workers=self.cfg.num_workers,
                                         pin_memory=True,
                                         drop_last=False)

        # maximum entropy threshold for training with random inputs
        self.max_entropy = metrics.max_entropy(self.c_dim)
        self.thresh_entropy = self.cfg.train_random * self.max_entropy

        # define model
        # parameters for each hidden layer is passed in as an argument
        self.params = utils.read_params(
            self.cfg.model_params[self.cfg.model_type])
        self.activation = getattr(activations, self.cfg.activation.lower())
        if self.cfg.model_type.lower() == 'fc':
            if self.cfg.norm.lower() == 'batch':
                self.norm = nn.BatchNorm1d
            elif self.cfg.norm.lower() == 'layer':
                self.norm = layers.LayerNorm1d
            else:
                self.norm = None
            net = FCNet
        elif self.cfg.model_type.lower() == 'conv':
            if self.cfg.norm.lower() == 'batch':
                self.norm = nn.BatchNorm2d
            elif self.cfg.norm.lower() == 'layer':
                self.norm = layers.LayerNorm2d
            else:
                self.norm = None
            net = ConvNet
        else:
            raise NotImplementedError()
        self.net = net(self.img_size, self.c_dim, self.params, self.activation,
                       self.norm).to(self.device)
        self.post['params'] = self.params

        # TODO: add custom weight initialization scheme
        # # weight initialization - weights are initialized using Kaiming uniform (He) initialization by default

        # loss function <kl_y_to_p> generalizes the cross entropy loss to continuous label distributions
        # i.e. <kl_y_to_p> is equivalent to <cross_entropy_loss> for one-hot labels
        # but is also a sensible loss function for continuous label distributions
        self.criterion = loss_fns.kl_y_to_p

        if self.cfg.optim.lower() == 'sgd':
            self.optimizer = optim.SGD(
                params=self.net.parameters(),
                lr=self.cfg.lr,
                momentum=self.cfg.optim_params['sgd']['momentum'],
                nesterov=self.cfg.optim_params['sgd']['nesterov'])
            self.post['momentum'], self.post[
                'nesterov'] = self.cfg.optim_params['sgd'][
                    'momentum'], self.cfg.optim_params['sgd']['nesterov']
        else:
            self.optimizer = optim.Adam(
                params=self.net.parameters(),
                lr=self.cfg.lr,
                betas=(self.cfg.optim_params['adam']['beta1'],
                       self.cfg.optim_params['adam']['beta2']))
            self.post['beta1'], self.post['beta2'] = self.cfg.optim_params[
                'adam']['beta1'], self.cfg.optim_params['adam']['beta2']
Пример #31
0
# older Pythons so I import md5 if hashlib is not available. Fortunately
# md5 can masquerade as hashlib for my purposes.
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import posix_ipc

# Utils for this demo
import utils

utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

params = utils.read_params()

# Mrs. Premise has already created the message queue. I just need a handle
# to it.
mq = posix_ipc.MessageQueue(params["MESSAGE_QUEUE_NAME"])

what_i_sent = ""

for i in range(0, params["ITERATIONS"]):
    utils.say("iteration %d" % i)

    s, _ = mq.receive()
    s = s.decode()
    utils.say("Received %s" % s)

    while s == what_i_sent:
Пример #32
0
import os

from attrdict import AttrDict
from deepsense import neptune

from utils import read_params

ctx = neptune.Context()
params = read_params(ctx)

CATEGORICAL_COLUMNS = [
    'CODE_GENDER', 'EMERGENCYSTATE_MODE', 'FLAG_MOBIL', 'FLAG_OWN_CAR',
    'FLAG_OWN_REALTY', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE',
    'NAME_CONTRACT_TYPE', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE',
    'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE',
    'OCCUPATION_TYPE', 'ORGANIZATION_TYPE', 'WALLSMATERIAL_MODE',
    'WEEKDAY_APPR_PROCESS_START'
]
NUMERICAL_COLUMNS = [
    'AMT_ANNUITY', 'AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_INCOME_TOTAL',
    'CNT_CHILDREN', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_ID_PUBLISH',
    'DAYS_REGISTRATION', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3',
    'OWN_CAR_AGE', 'REGION_POPULATION_RELATIVE', 'REGION_RATING_CLIENT',
    'REGION_RATING_CLIENT_W_CITY'
]
TIMESTAMP_COLUMNS = []
ID_COLUMNS = ['SK_ID_CURR']
TARGET_COLUMNS = ['TARGET']

DEV_SAMPLE_SIZE = int(20e4)
Пример #33
0
def main(name, argv):
    if not len(argv) == 1:
        print_usage(name)
        return

    log = open('log.txt', 'w', buffering=1)
    log.write('INFO: PRosettaC run has started\n')
    log.write('INFO: Processing inputs\n')
    params = utils.read_params(argv[0])
    PDB = params['PDB'].split()
    LIG = params['LIG'].split()
    Linkers = params['PROTAC'].split()[0]
    Full = params['Full'].split()[0] == 'True'
    if '.smi' in Linkers:
        with open(Linkers, 'r') as f:
            protac = f.readline().split()[0]
    else:
        protac = Linkers
    Structs = ['StructA.pdb', 'StructB.pdb']
    Heads = ['HeadA.sdf', 'HeadB.sdf']
    Subs = ['SubA.sdf', 'SubB.sdf']
    Chains = ['A', 'B']
    Anchors = []

    # Get a handle to the cluster specified in config file. Default to PBS cluster.
    cluster = cl.getCluster(params['ClusterName'])

    for i in [0, 1]:
        if not '.pdb' in PDB[i] and '.sdf' in LIG[i]:
            log.write(
                'ERROR: An .sdf file can only be chosen is a corresponding .pdb file is chosen\n'
            )
            sys.exit()
        if not pymol_utils.get_rec_plus_lig(PDB[i], LIG[i], Structs[i],
                                            Heads[i], Chains[i]):
            log.write(
                'ERROR: There is a problem with the PDB chains. If using an .sdf file, it should be close to exactly one protein chain in its appropriate .pdb file. If using a LIG name, make sure that the ligand has a chain assigned to it within the .pdb file. Also, make sure the ligand is has at least 10 heavy atoms.\n'
            )
            sys.exit()
        Anchors.append(pl.get_mcs_sdf(Heads[i], Subs[i], protac))
        if Anchors[i] == None:
            log.write(
                'ERROR: There is some problem with the PDB ligand ' + LIG[i] +
                '. It could be either one of the following options: the ligand is not readable by RDKit, the MCS (maximal common substructure) between the PROTAC smiles and '
                + LIG[i] +
                ' ligand does not have an anchor atom which is uniquly defined in regard to smiles, or there is a different problem regarding substructure match. Try to choose a different PDB template, or use the manual option, supplying your own .sdf files.\n'
            )
            log.close()
            sys.exit()
    Heads = Subs
    log.write(
        'INFO: Cleaning structures, adding hydrogens to binders and running relax\n'
    )
    PT_params = []
    for i in [0, 1]:
        #Adding hydrogens to the heads (binders)
        new_head = Heads[i].split('.')[0] + "_H.sdf"
        utils.addH_sdf(Heads[i], new_head)
        Anchors[i] = pl.translate_anchors(Heads[i], new_head, Anchors[i])
        if Anchors[i] == -1:
            log.write(
                'ERROR: There is a problem with the maximal common substructure between the PROTAC and PDB ligand '
                + LIG[i] + '.\n')
            log.close()
            sys.exit()
        Heads[i] = new_head
        #Cleaning the structures
        rs.clean(Structs[i], Chains[i])
        Structs[i] = Structs[i].split('.')[0] + '_' + Chains[i] + '.pdb'
        #Relaxing the initial structures
        PT_pdb, PT_param = rs.mol_to_params(Heads[i], 'PT' + str(i),
                                            'PT' + str(i))
        PT_params.append(PT_param)
        os.system('cat ' + PT_pdb + ' ' + Structs[i] + ' > Side' + str(i) +
                  '.pdb')
        Structs[i] = 'Side' + str(i) + '.pdb'
        rs.relax(Structs[i], PT_param)
        Structs[i] = 'Side' + str(i) + '_0001.pdb'
        #Fix the atom order by adding the original ligands to the relaxed structures
        rs.clean(Structs[i], Chains[i])
        Structs[i] = Structs[i].split('.')[0] + '_' + Chains[i] + '.pdb'
        os.system('cat ' + PT_pdb + ' ' + Structs[i] + ' > Init' + str(i) +
                  '.pdb')
        Structs[i] = 'Init' + str(i) + '.pdb'

    #Generate up to 200 conformations of PROTAC conformations for each anchor distance within bins
    log.write('INFO: Sampling the distance between the two anchor points\n')
    (min_value, max_value) = pl.SampleDist(Heads, Anchors, Linkers)
    if (min_value, max_value) == (None, None):
        log.write(
            'ERROR: There is a problem with finding substructure between the .sdf file and the SMILES of the full protac. Please check that your .sdf files have the right conformations.\n'
        )
        log.close()
        sys.exit()
    if (min_value, max_value) == (0, 0):
        log.write(
            'ERROR: There is a problem with generating protac conformations to sample the anchor distance. Please check that both .sdf files are in a bound conformation to their appropriate structures and that this conformation is valid.\n'
        )
        log.close()
        sys.exit()

    #PatchDock
    log.write('INFO: Running PatchDock with the constrains\n')
    if Full:
        Global = 1000
    else:
        Global = 500
    Num_Results = utils.patchdock(Structs, [a + 1 for a in Anchors], min_value,
                                  max_value, Global, 2.0)
    if Num_Results == None:
        log.write(
            'INFO: PatchDock did not find any global docking solution within the geometrical constraints\n'
        )
        log.write('INFO: PRosettaC run has finished\n')
        log.close()
        sys.exit()

    #Rosetta Local Docking
    log.write(
        'INFO: Run Rosetta local docking on the top 1000 PatchDock results\n')
    curr_dir = os.getcwd()
    os.chdir('Patchdock_Results/')
    if Full:
        Local = 50
    else:
        Local = 10
    commands = [
        rs.local_docking('pd.' + str(i + 1) + '.pdb', Chains[0] + 'X',
                         Chains[1] + 'Y', curr_dir + '/' + PT_params[0],
                         curr_dir + '/' + PT_params[1], Local)
        for i in range(Num_Results)
    ]
    jobs = cluster.runBatchCommands(commands, mem=params['RosettaDockMemory'])
    log.write('INFO: Local docking jobs: ' + str(jobs) + '\n')
    cluster.wait(jobs)

    #Generating 100 constrained conformations for the entire linker based on PatchDock results
    log.write(
        'INFO: Generating up to 100 constrained conformations for each local docking results\n'
    )
    docking_solutions = glob.glob('*_docking_????.pdb')
    suffix = []
    for s in docking_solutions:
        suffix.append([s, s.split('.')[1].split('_')])
        suffix[-1][1] = suffix[-1][1][0] + '_' + str(int(suffix[-1][1][2]))
    commands = [
        'python ' + utils.SCRIPTS_FOL + '/constraint_generation.py ../' +
        Heads[0] + ' ../' + Heads[1] + ' ../' + Linkers + ' ' + s[1] + " " +
        s[0] + " " + ''.join(Chains) for s in suffix
    ]
    jobs = cluster.runBatchCommands(commands,
                                    batch_size=12,
                                    mem=params['ProtacModelMemory'])
    log.write('INFO: Constrained conformation generation jobs: ' + str(jobs) +
              '\n')
    cluster.wait(jobs)

    #Clustering the top 200 local docking models (according to interface RMSD), out of 1000 final scoring models
    log.write('INFO: Clustering the top results\n')
    os.system('cat ../Init0.pdb ../Init1.pdb > ../Init.pdb')
    os.chdir('../')
    os.system('python ' + utils.SCRIPTS_FOL + '/clustering.py 1000 200 4 ' +
              Chains[1])
    if os.path.isdir('Results/'):
        log.write('INFO: Clustering is done\n')
    else:
        log.write('INFO: No models have been created\n')
    log.write('INFO: PRosettaC run has finished\n')
    log.close()