Exemplo n.º 1
0
def add_predict_args(parser: ArgumentParser):
    """
    Adds predict arguments to an ArgumentParser.

    :param parser: An ArgumentParser.
    """
    parser.add_argument('--gpu', type=int,
                        choices=list(range(torch.cuda.device_count())),
                        help='Which GPU to use')
    parser.add_argument('--test_path', type=str,
                        help='Path to CSV file containing testing data for which predictions will be made')
    parser.add_argument('--use_compound_names', action='store_true', default=False,
                        help='Use when test data file contains compound names in addition to SMILES strings')
    parser.add_argument('--preds_path', type=str,
                        help='Path to CSV file where predictions will be saved')
    parser.add_argument('--checkpoint_dir', type=str,
                        help='Directory from which to load model checkpoints'
                             '(walks directory and ensembles all models that are found)')
    parser.add_argument('--checkpoint_path', type=str, default=None,
                        help='Path to model checkpoint (.pt file)')
    parser.add_argument('--batch_size', type=int, default=50,
                        help='Batch size')
    parser.add_argument('--no_cuda', action='store_true', default=False,
                        help='Turn off cuda')
    parser.add_argument('--features_generator', type=str, nargs='*',
                        choices=get_available_features_generators(),
                        help='Method of generating additional features')
    parser.add_argument('--features_path', type=str, nargs='*',
                        help='Path to features to use in FNN (instead of features_generator)')
    parser.add_argument('--no_features_scaling', action='store_true', default=False,
                        help='Turn off scaling of features')
    parser.add_argument('--max_data_size', type=int,
                        help='Maximum number of data points to load')
Exemplo n.º 2
0
 def configure(self) -> None:
     self.add_argument(
         "--gpu", choices=list(range(torch.cuda.device_count()))
     )
     self.add_argument(
         "--features_generator", choices=get_available_features_generators()
     )
Exemplo n.º 3
0
    def test_save_features(self):
        try:
            parser = ArgumentParser()
            parser.add_argument('--data_path', type=str,
                                help='Path to data CSV')
            parser.add_argument('--features_generator', type=str,
                                choices=get_available_features_generators(),
                                help='Type of features to generate')
            parser.add_argument('--save_path', type=str,
                                help='Path to .npz file where features will be saved as a numpy compressed archive')
            parser.add_argument('--save_frequency', type=int, default=10000,
                                help='Frequency with which to save the features')
            parser.add_argument('--restart', action='store_true', default=False,
                                help='Whether to not load partially complete featurization and instead start from scratch')
            parser.add_argument('--max_data_size', type=int,
                                help='Maximum number of data points to load')
            parser.add_argument('--sequential', action='store_true', default=False,
                                help='Whether to run sequentially rather than in parallel')
            args = parser.parse_args([])
            args.data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'delaney_toy.csv')
            args.save_path = NamedTemporaryFile(suffix='.npz').name
            args.restart = True
            args.features_generator = 'morgan_count'

            generate_and_save_features(args)

            os.remove(args.save_path)
        except:
            self.fail('save_features')
Exemplo n.º 4
0
 def add_arguments(self) -> None:
     self.add_argument('--gpu',
                       choices=list(range(torch.cuda.device_count())))
     self.add_argument('--features_generator',
                       choices=get_available_features_generators())
Exemplo n.º 5
0
def add_train_args(parser: ArgumentParser):
    """
    Adds training arguments to an ArgumentParser.

    :param parser: An ArgumentParser.
    """
    # General arguments
    parser.add_argument('--gpu',
                        type=int,
                        choices=list(range(torch.cuda.device_count())),
                        help='Which GPU to use')
    parser.add_argument('--data_path',
                        type=str,
                        help='Path to data CSV file',
                        default='M_CYP1A2I_I.csv')
    parser.add_argument(
        '--use_compound_names',
        action='store_true',
        default=False,
        help=
        'Use when test data file contains compound names in addition to SMILES strings'
    )
    parser.add_argument('--max_data_size',
                        type=int,
                        help='Maximum number of data points to load')
    parser.add_argument(
        '--test',
        action='store_true',
        default=False,
        help='Whether to skip training and only test the model')
    parser.add_argument(
        '--features_only',
        action='store_true',
        default=False,
        help='Use only the additional features in an FFN, no graph network')
    parser.add_argument('--features_generator',
                        type=str,
                        nargs='*',
                        choices=get_available_features_generators(),
                        help='Method of generating additional features')
    parser.add_argument(
        '--features_path',
        type=str,
        nargs='*',
        help='Path to features to use in FNN (instead of features_generator)')
    parser.add_argument('--save_dir',
                        type=str,
                        default='./ckpt',
                        help='Directory where model checkpoints will be saved')
    parser.add_argument(
        '--save_smiles_splits',
        action='store_true',
        default=False,
        help=
        'Save smiles for each train/val/test splits for prediction convenience later'
    )
    parser.add_argument(
        '--checkpoint_dir',
        type=str,
        default=None,
        help='Directory from which to load model checkpoints'
        '(walks directory and ensembles all models that are found)')
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default=None,
                        help='Path to model checkpoint (.pt file)')
    parser.add_argument(
        '--dataset_type',
        type=str,
        choices=['classification', 'regression', 'multiclass'],
        help='Type of dataset, e.g. classification or regression.'
        'This determines the loss function used during training.',
        default='regression')  # classification
    parser.add_argument(
        '--multiclass_num_classes',
        type=int,
        default=3,
        help='Number of classes when running multiclass classification')
    parser.add_argument('--separate_val_path',
                        type=str,
                        help='Path to separate val set, optional')
    parser.add_argument('--separate_val_features_path',
                        type=str,
                        nargs='*',
                        help='Path to file with features for separate val set')
    parser.add_argument('--separate_test_path',
                        type=str,
                        help='Path to separate test set, optional')
    parser.add_argument(
        '--separate_test_features_path',
        type=str,
        nargs='*',
        help='Path to file with features for separate test set')
    parser.add_argument(
        '--split_type',
        type=str,
        default='random',
        choices=[
            'random', 'scaffold_balanced', 'predetermined', 'crossval',
            'index_predetermined'
        ],
        help='Method of splitting the data into train/val/test')
    parser.add_argument(
        '--split_sizes',
        type=float,
        nargs=3,
        default=[0.8, 0.1, 0.1],
        help='Split proportions for train/validation/test sets')
    parser.add_argument(
        '--num_folds',
        type=int,
        default=1,
        help='Number of folds when performing cross validation')
    parser.add_argument('--folds_file',
                        type=str,
                        default=None,
                        help='Optional file of fold labels')
    parser.add_argument(
        '--val_fold_index',
        type=int,
        default=None,
        help='Which fold to use as val for leave-one-out cross val')
    parser.add_argument(
        '--test_fold_index',
        type=int,
        default=None,
        help='Which fold to use as test for leave-one-out cross val')
    parser.add_argument(
        '--crossval_index_dir',
        type=str,
        help='Directory in which to find cross validation index files')
    parser.add_argument('--crossval_index_file',
                        type=str,
                        help='Indices of files to use as train/val/test'
                        'Overrides --num_folds and --seed.')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='Random seed to use when splitting data into train/val/test sets.'
        'When `num_folds` > 1, the first fold uses this seed and all'
        'subsequent folds add 1 to the seed.')
    parser.add_argument(
        '--metric',
        type=str,
        default=None,
        choices=[
            'auc', 'prc-auc', 'rmse', 'mae', 'mse', 'r2', 'accuracy',
            'cross_entropy'
        ],
        help='Metric to use during evaluation.'
        'Note: Does NOT affect loss function used during training'
        '(loss is determined by the `dataset_type` argument).'
        'Note: Defaults to "auc" for classification and "rmse" for regression.'
    )
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='Skip non-essential print statements')
    parser.add_argument(
        '--log_frequency',
        type=int,
        default=10,
        help='The number of batches between each logging of the training loss')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='Turn off cuda')
    parser.add_argument(
        '--show_individual_scores',
        action='store_true',
        default=False,
        help=
        'Show all scores for individual targets, not just average, at the end')
    parser.add_argument('--no_cache',
                        action='store_true',
                        default=False,
                        help='Turn off caching mol2graph computation')
    parser.add_argument(
        '--config_path',
        type=str,
        help=
        'Path to a .json file containing arguments. Any arguments present in the config'
        'file will override arguments specified via the command line or by the defaults.'
    )

    # Training arguments
    parser.add_argument('--epochs',
                        type=int,
                        default=30,
                        help='Number of epochs to run')
    parser.add_argument('--batch_size',
                        type=int,
                        default=50,
                        help='Batch size')
    parser.add_argument(
        '--warmup_epochs',
        type=float,
        default=2.0,
        help=
        'Number of epochs during which learning rate increases linearly from'
        'init_lr to max_lr. Afterwards, learning rate decreases exponentially'
        'from max_lr to final_lr.')
    parser.add_argument('--init_lr',
                        type=float,
                        default=1e-4,
                        help='Initial learning rate')
    parser.add_argument('--max_lr',
                        type=float,
                        default=1e-3,
                        help='Maximum learning rate')
    parser.add_argument('--final_lr',
                        type=float,
                        default=1e-4,
                        help='Final learning rate')
    parser.add_argument('--no_features_scaling',
                        action='store_true',
                        default=False,
                        help='Turn off scaling of features')

    # Model arguments
    parser.add_argument('--ensemble_size',
                        type=int,
                        default=1,
                        help='Number of models in ensemble')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=300,
                        help='Dimensionality of hidden layers in MPN')
    parser.add_argument('--bias',
                        action='store_true',
                        default=False,
                        help='Whether to add bias to linear layers')
    parser.add_argument('--depth',
                        type=int,
                        default=3,
                        help='Number of message passing steps')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.0,
                        help='Dropout probability')
    parser.add_argument(
        '--activation',
        type=str,
        default='ReLU',
        choices=['ReLU', 'LeakyReLU', 'PReLU', 'tanh', 'SELU', 'ELU'],
        help='Activation function')
    parser.add_argument(
        '--undirected',
        action='store_true',
        default=False,
        help='Undirected edges (always sum the two relevant bond vectors)')
    parser.add_argument(
        '--ffn_hidden_size',
        type=int,
        default=None,
        help='Hidden dim for higher-capacity FFN (defaults to hidden_size)')
    parser.add_argument('--ffn_num_layers',
                        type=int,
                        default=2,
                        help='Number of layers in FFN after MPN encoding')
    parser.add_argument(
        '--atom_messages',
        action='store_true',
        default=False,
        help='Use messages on atoms instead of messages on bonds')
Exemplo n.º 6
0
    except OverflowError:
        print(
            'Features array is too large to save as a single file. Instead keeping features as a directory of files.'
        )


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('--data_path',
                        type=str,
                        required=True,
                        help='Path to data CSV')
    parser.add_argument('--features_generator',
                        type=str,
                        required=True,
                        choices=get_available_features_generators(),
                        help='Type of features to generate')
    parser.add_argument(
        '--save_path',
        type=str,
        required=True,
        help=
        'Path to .npz file where features will be saved as a compressed numpy archive'
    )
    parser.add_argument('--save_frequency',
                        type=int,
                        default=10000,
                        help='Frequency with which to save the features')
    parser.add_argument(
        '--restart',
        action='store_true',
Exemplo n.º 7
0
 def add_arguments(self) -> None:
     self.add_argument('--features_generator',
                       choices=get_available_features_generators())
Exemplo n.º 8
0
def add_predict_args(parser: ArgumentParser):
    """
    Adds predict arguments to an ArgumentParser.

    :param parser: An ArgumentParser.
    """
    parser.add_argument('--gpu',
                        type=int,
                        choices=list(range(torch.cuda.device_count())),
                        help='Which GPU to use')
    parser.add_argument(
        '--data_format',
        type=str,
        default=None,
        help='SSPFF12 example for 2 smiles, prop, followed by feats')
    parser.add_argument(
        '--test_path',
        type=str,
        help=
        'Path to CSV file containing testing data for which predictions will be made'
    )
    parser.add_argument(
        '--use_compound_names',
        action='store_true',
        default=False,
        help=
        'Use when test data file contains compound names in addition to SMILES strings'
    )
    parser.add_argument(
        '--preds_path',
        type=str,
        help='Path to CSV file where predictions will be saved')
    parser.add_argument(
        '--checkpoint_dir',
        type=str,
        help='Directory from which to load model checkpoints'
        '(walks directory and ensembles all models that are found)')
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default=None,
                        help='Path to model checkpoint (.pt file)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=50,
                        help='Batch size')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='Turn off cuda')
    parser.add_argument('--features_generator',
                        type=str,
                        nargs='*',
                        choices=get_available_features_generators(),
                        help='Method of generating additional features')
    parser.add_argument(
        '--features_path',
        type=str,
        nargs='*',
        help='Path to features to use in FNN (instead of features_generator)')
    parser.add_argument('--no_features_scaling',
                        action='store_true',
                        default=False,
                        help='Turn off scaling of features')
    parser.add_argument('--max_data_size',
                        type=int,
                        help='Maximum number of data points to load')
    parser.add_argument(
        '--config_path',
        type=str,
        help=
        'Path to a .json file containing arguments. Any arguments present in the config'
        'file will override arguments specified via the command line or by the defaults.'
    )
    parser.add_argument('--ops',
                        type=str,
                        default='concat',
                        choices=['plus', 'minus', 'concat'],
                        help='Operation for embeddings')