Пример #1
0
def main(args):
    np.random.seed(args.seed)

    # CPU only instead of GPU
    if args.cpu_only:
        logging.info('Setting env for CPU-only mode...')
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
        os.environ["CUDA_VISIBLE_DEVICES"] = '-1'

    # Load and process data
    # Provide objective to load
    recall_0 = Utils.class_recall(0)
    recall_1 = Utils.class_recall(1)
    custom_obj = {'metr': recall_0}

    logging.info('Loading model...')
    ## pkl
    logging.info('  Loading mstd...')
    F = os.path.join(args.model_path, args.mstd_name)
    if not os.path.exists(F):
        msg = 'Model file not available at data-path: {}'
        raise IOError(msg.format(F))
    with open(F, 'rb') as mstd:
        mean_tr, std_tr = pickle.load(mstd)
    ## h5
    logging.info('  Loading h5...')
    F = os.path.join(args.model_path, args.model_name)
    if not os.path.exists(F):
        msg = 'Model file not available at data-path: {}'
        raise IOError(msg.format(F))
    model = load_model(F, custom_objects=custom_obj)

    # outdir
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    logging.info('Loading features...')
    x, y, i2n = Utils.load_features_nogt(args.feature_file_table,
                                         force_overwrite=args.force_overwrite,
                                         pickle_only=args.pickle_only,
                                         n_procs=args.n_procs)

    logging.info('Loaded {} contigs'.format(len(set(i2n.values()))))
    n2i = Utils.reverse_dict(i2n)
    x = [xi for xmeta in x for xi in xmeta]
    y = np.concatenate(y)

    logging.info('Running model generator...')
    dataGen = Models.Generator(x,
                               y,
                               batch_size=64,
                               shuffle=False,
                               norm_raw=0,
                               mean_tr=mean_tr,
                               std_tr=std_tr)

    logging.info('Computing predictions...')
    scores = Utils.compute_predictions(n2i, dataGen, model, args.save_path,
                                       args.save_name)
Пример #2
0
    def __init__(self, config):
        self.max_len = config.max_len
        self.filters = config.filters
        self.n_conv = config.n_conv
        self.n_features = config.n_features
        self.pool_window = config.pool_window
        self.dropout = config.dropout
        self.lr_init = config.lr_init
        self.n_fc = config.n_fc
        self.n_hid = config.n_hid

        self.net = Sequential()

        self.net.add(
            Conv2D(self.filters,
                   kernel_size=(2, self.n_features),
                   input_shape=(self.max_len, self.n_features, 1),
                   activation='relu',
                   padding='valid'))
        self.net.add(BatchNormalization(axis=-1))

        for i in range(1, self.n_conv):
            self.net.add(
                Conv2D(2**i * self.filters,
                       kernel_size=(2, 1),
                       strides=2,
                       input_shape=(self.max_len, 1,
                                    2**(i - 1) * self.filters),
                       activation='relu'))
            self.net.add(BatchNormalization(axis=-1))

        self.net.add(AveragePooling2D((self.pool_window, 1)))
        self.net.add(Flatten())

        optimizer = keras.optimizers.adam(lr=self.lr_init)

        # binary classification
        for _ in range(self.n_fc - 1):
            self.net.add(Dense(self.n_hid, activation='relu'))
            self.net.add(Dropout(rate=self.dropout))

        self.net.add(Dense(1, activation='sigmoid'))
        self.net.add(Dropout(rate=self.dropout))

        recall_0 = Utils.class_recall(0)
        recall_1 = Utils.class_recall(1)
        self.net.compile(loss='binary_crossentropy',
                         optimizer=optimizer,
                         metrics=[recall_0, recall_1])

        self.reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                           factor=0.5,
                                                           patience=5,
                                                           min_lr=0.01 *
                                                           self.lr_init)
Пример #3
0
def main(args):
    """Main interface
    """
    np.random.seed(12)

    save_plot = args.save_plot
    if save_plot is None:
        save_plot = args.save_path

    # Load and process data
    # Provide objective to load
    recall_0 = Utils.class_recall(0)
    recall_1 = Utils.class_recall(1)
    custom_obj = {'metr': recall_0}

    path_to_models = os.listdir(args.save_path)
    auc = []

    for model_path in path_to_models:
        if not os.path.exists(
            (os.path.join(args.save_path, model_path, 'final_model.h5'))):
            continue

        if not os.path.exists(
                os.path.join(args.save_path, model_path, 'predictions')):
            os.makedirs(os.path.join(args.save_path, model_path,
                                     'predictions'))

        if not os.path.exists(
                os.path.join(args.save_path, model_path, 'predictions',
                             args.data_path.split('/')[-1])):
            os.makedirs(
                os.path.join(args.save_path, model_path, 'predictions',
                             args.data_path.split('/')[-1]))

        F = os.path.join(args.save_path, model_path,
                         'mean_std_final_model.pkl')
        with open(F, 'rb') as mstd:
            mean_tr, std_tr = pickle.load(mstd)

        model = load_model(os.path.join(args.save_path, model_path,
                                        'final_model.h5'),
                           custom_objects=custom_obj)

        tech = args.technology

        logging.info('Loading data...')
        if args.is_synthetic == 1:
            x, y, i2n = Utils.load_features(args.data_path,
                                            max_len=args.max_len,
                                            mode=args.mode,
                                            technology=tech)
        else:
            x, y, i2n = Utils.load_features_nogt(args.data_path,
                                                 max_len=args.max_len,
                                                 mode=args.mode)

        logging.info('Loaded {} contigs...'.format(len(set(i2n.values()))))

        n2i = Utils.reverse_dict(i2n)
        x = [xi for xmeta in x for xi in xmeta]
        y = np.concatenate(y)

        dataGen = Models.Generator(x,
                                   y,
                                   args.max_len,
                                   batch_size=64,
                                   shuffle=False,
                                   norm_raw=bool(args.norm_raw),
                                   mean_tr=mean_tr,
                                   std_tr=std_tr)

        loggin.info('Computing predictions for {}...'.format(tech))

        scores = compute_predictions(y, n2i)
        outfile = os.path.join(args.save_path, model_path, 'predictions',
                               args.data_path.split('/')[-1], tech + '.pkl')
        with open(outfile, 'wb') as spred:
            pickle.dump(scores, spred)
        logging.info('File written: {}'.format(outfile))
Пример #4
0
def main(args):
    """Main interface
    """
    # init
    np.random.seed(args.seed)
    ## where to save the plot
    save_plot = args.save_plot
    if save_plot is None:
        save_plot = args.save_path

    # Load and process data
    # Provide objective to load
    logging.info('Loading data...')
    recall_0 = Utils.class_recall(0)
    recall_1 = Utils.class_recall(1)
    custom_obj = {'metr': recall_0}

    h5_file = os.path.join(args.model_path, args.model_name)
    if not os.path.exists(h5_file):
        msg = 'Cannot find {} file in {}'
        raise IOError(msg.format(args.model_name, args.model_path))
    logging.info('Loading model: {}'.format(h5_file))
    model = load_model(h5_file, custom_objects=custom_obj)

    # model pkl
    pkl_file = os.path.join(args.model_path, args.mstd_name)
    logging.info('Loading file: {}'.format(pkl_file))
    with open(pkl_file, 'rb') as mstd:
        mean_tr, std_tr = pickle.load(mstd)

    # loading features
    if args.is_synthetic == 1:
        logging.info('Loading synthetic features')
        x, y, i2n = Utils.load_features(args.feature_file_table,
                                        max_len=args.max_len,
                                        technology=args.technology,
                                        force_overwrite=args.force_overwrite,
                                        n_procs=args.n_procs)
    else:
        logging.info('Loading non-synthetic features')
        x, y, i2n = Utils.load_features_nogt(
            args.feature_file_table,
            max_len=args.max_len,
            force_overwrite=args.force_overwrite,
            n_procs=args.n_procs)

    logging.info('Loaded {} contigs'.format(len(set(i2n.values()))))
    n2i = Utils.reverse_dict(i2n)
    x = [xi for xmeta in x for xi in xmeta]
    y = np.concatenate(y)

    logging.info('Running model generator...')
    dataGen = Models.Generator(x,
                               y,
                               args.max_len,
                               batch_size=64,
                               shuffle=False,
                               norm_raw=bool(args.norm_raw),
                               mean_tr=mean_tr,
                               std_tr=std_tr)

    logging.info('Computing predictions for {}...'.format(args.technology))
    scores = Utils.compute_predictions_y_known(y, n2i, model, dataGen)
    outfile = os.path.join(
        args.save_path, '_'.join([args.save_name, args.technology + '.pkl']))
    with open(outfile, 'wb') as spred:
        pickle.dump(scores, spred)
    logging.info('File written: {}'.format(outfile))
Пример #5
0
    def __init__(self, config):
        self.max_len = config.max_len
        self.filters = config.filters
        self.n_conv = config.n_conv
        self.n_features = config.n_features
        self.pool_window = config.pool_window
        self.dropout = config.dropout
        self.lr_init = config.lr_init
        self.mode = config.mode
        self.n_fc = config.n_fc
        self.n_hid = config.n_hid

        self.net = Sequential()

        self.net.add(
            Conv2D(self.filters,
                   kernel_size=(2, self.n_features),
                   input_shape=(self.max_len, self.n_features, 1),
                   activation='relu',
                   padding='valid'))
        self.net.add(BatchNormalization(axis=-1))

        for i in range(1, self.n_conv):
            self.net.add(
                Conv2D(2**i * self.filters,
                       kernel_size=(2, 1),
                       strides=2,
                       input_shape=(self.max_len, 1,
                                    2**(i - 1) * self.filters),
                       activation='relu'))
            self.net.add(BatchNormalization(axis=-1))

        self.net.add(AveragePooling2D((self.pool_window, 1)))
        self.net.add(Flatten())

        optimizer = keras.optimizers.adam(lr=self.lr_init)

        if self.mode in ['chimera', 'extensive']:
            for _ in range(self.n_fc - 1):
                self.net.add(Dense(self.n_hid, activation='relu'))
                self.net.add(Dropout(rate=self.dropout))

            self.net.add(Dense(1, activation='sigmoid'))
            self.net.add(Dropout(rate=self.dropout))

            recall_0 = Utils.class_recall(0)
            recall_1 = Utils.class_recall(1)
            self.net.compile(loss='binary_crossentropy',
                             optimizer=optimizer,
                             metrics=[recall_0, recall_1])
        elif self.mode == 'edit':
            self.net.add(Dense(20, activation='relu'))
            self.net.add(Dropout(rate=dropout))
            self.net.add(Dense(20, activation='relu'))
            self.net.add(Dropout(rate=dropout))
            self.net.add(Dense(1, activation='linear'))
            self.net.compile(loss='mean_absolute_error',
                             optimizer=optimizer,
                             metrics=[Utils.explained_var])
        else:
            raise ('Training mode "{}" not supported.'.format(mode))

        self.reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                           factor=0.5,
                                                           patience=5,
                                                           min_lr=0.01 *
                                                           self.lr_init)