示例#1
0
def earnn(experiment='one_month_forecast',
          include_pred_month=True,
          surrounding_pixels=None,
          pretrained=True):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    if not pretrained:
        predictor = EARecurrentNetwork(hidden_size=128,
                                       data_folder=data_path,
                                       experiment=experiment,
                                       include_pred_month=include_pred_month,
                                       surrounding_pixels=surrounding_pixels)
        predictor.train(num_epochs=50, early_stopping=5)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f'models/{experiment}/ealstm/model.pt')

    test_file = data_path / f'features/{experiment}/test/2018_3'
    assert test_file.exists()
    all_shap_for_file(test_file, predictor, batch_size=100)
示例#2
0
def main(folder, out):
    """起動はこのフォルダ内から行う
       フォルダに格納された画像に対して予測を行う
    """
    dirs = os.listdir('images/test/')
    weights = get_latestname("__checkpoints__/model_", 1)
    n_classes = len(dirs)
    model = load_model(n_classes=n_classes, weights=weights,
                       freeze='inference', basemodel=BASEMODEL)

    df = pd.DataFrame()
    images_path = glob.glob(os.path.join(folder, '*.jpg'))
    print("-----"*4)
    print(f"Detected {len(images_path)} images")
    if not len(images_path):
        print("No images detected")
        return

    for image_path in images_path:
        name = os.path.basename(image_path)
        image = prep_image(image_path)
        image = image.reshape(TARGET_SIZE[0],
                              TARGET_SIZE[1], 3).reshape(1, TARGET_SIZE[0],
                                                         TARGET_SIZE[1], 3)
        pred = model.predict(image)
        print(name, pred)
        df[name] = pred[0]

    df.index = dirs
    df = df.T
    df.to_csv(out)
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=True,
    ignore_vars=None,
    include_static=True,
):
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=128,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            ignore_vars=ignore_vars,
            include_static=include_static,
        )
        predictor.train(num_epochs=50, early_stopping=10)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")
示例#4
0
    def test_regression(self, tmp_path, monkeypatch):

        coef_array = np.array([1, 1, 1, 1, 1])
        intercept_array = np.array([2])

        def mocktrain(self):
            class MockModel:
                @property
                def coef_(self):
                    return coef_array

                @property
                def intercept_(self):
                    return intercept_array

            self.model = MockModel()

        monkeypatch.setattr(LinearRegression, 'train', mocktrain)

        model = LinearRegression(tmp_path, experiment='one_month_forecast')
        model.train()
        model.save_model()

        model_path = tmp_path / 'models/one_month_forecast/linear_regression/model.pkl'
        assert model_path.exists(), f'Model not saved!'

        new_model = load_model(model_path)
        assert type(new_model) == LinearRegression

        assert model.model.coef_ == coef_array
        assert model.model.intercept_ == intercept_array
        assert new_model.include_pred_month == model.include_pred_month
        assert new_model.experiment == model.experiment
        assert new_model.surrounding_pixels == model.surrounding_pixels
示例#5
0
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=True,
    ignore_vars=None,
):
    data_path = get_data_path()

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=128,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            ignore_vars=ignore_vars,
        )
        predictor.train(num_epochs=50, early_stopping=5)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")

    test_file = data_path / f"features/{experiment}/test/2018_3"
    assert test_file.exists()
    all_explanations_for_file(test_file, predictor, batch_size=100)
def train_model(model_name='densenet121',
                opt='Adagrad',
                dataset='iris',
                writer=None):
    train_loader, val_loader, test_loader = load_data(dataset)

    # Model selection
    model = load_model(model_name)

    # Optimizer
    if model_name == "ownnet":
        optimizer = opt_selection(model[0], opt)
    else:
        optimizer = opt_selection(model, opt)

    # Loss Criterion
    if dataset == 'mltoy' or dataset == "yeast14c" or dataset == "yeast14c_m":
        # criterion = nn.MultiLabelSoftMarginLoss()
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if model_name == "ownnet":
        criterion = torch.nn.MSELoss()

    best_train, best_val = 0.0, 0.0
    g = 0
    for epoch in range(1, args.epochs + 1):
        # Train and Validate
        train_stats = train_step(model, criterion, optimizer, train_loader, g,
                                 epoch)
        valid_stats = valid_step(model, criterion, val_loader)
        g += 1

        # Logging
        logging(epoch, train_stats, valid_stats, writer)

        # Keep best model
        # print(train_stats['accuracy'], valid_stats['accuracy'], best_train, best_val)
        if valid_stats['accuracy'] > best_val or (
                valid_stats['accuracy'] == best_val
                and train_stats['accuracy'] >= best_train):
            best_train = train_stats['accuracy']
            best_val = valid_stats['accuracy']
            if model_name == "ownnet":
                best_model_weights = copy.deepcopy(model[0].state_dict())
            else:
                best_model_weights = copy.deepcopy(model.state_dict())

    # Load best model and evaluate on test set
    model.load_state_dict(best_model_weights)
    test_stats = valid_step(model, criterion, test_loader)
    # print(train_stats['accuracy'], valid_stats['accuracy'], best_train, best_val)
    print(
        '\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}'
        .format(best_train, best_val, test_stats['accuracy']))

    return model
示例#7
0
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=False,
    explain=False,
    static="features",
    ignore_vars=None,
    num_epochs=50,
    early_stopping=5,
    static_embedding_size=10,
    hidden_size=128,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
    normalize_y=True,
    include_prev_y=True,
    include_yearly_aggs=True,  # new
    clear_nans=True,
    weight_observations=False,
    pred_month_static=False,
):
    data_path = get_data_path()

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=hidden_size,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            static=static,
            static_embedding_size=static_embedding_size,
            ignore_vars=ignore_vars,
            predict_delta=predict_delta,
            spatial_mask=spatial_mask,
            include_latlons=include_latlons,
            normalize_y=normalize_y,
            include_prev_y=include_prev_y,
            include_yearly_aggs=include_yearly_aggs,
            clear_nans=clear_nans,
            weight_observations=weight_observations,
            pred_month_static=pred_month_static,
        )
        predictor.train(num_epochs=num_epochs, early_stopping=early_stopping)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")

    if explain:
        test_file = data_path / f"features/{experiment}/test/2018_3"
        assert test_file.exists()
        all_explanations_for_file(test_file, predictor, batch_size=100)
示例#8
0
def main(
    config_path: Path,
    input_path: Path,
    model_path: Path,
    predict_path: Path,
    val_loader: Optional[DataLoader] = None,
) -> Path:
    """
    Main function responsible for prediction with passed model.

    Arguments:
        Path config_path: Path to main config (of :class:`DefaultConfig` class)
        Path input_path: Path to file with input data
        Path model_path: Path to trained model
        Path predict_path: Path to output directory

    Returns:
        Path to the experiment root dir.
    """

    config = load_variable("config", config_path)

    if val_loader is None:
        pipeline = False
        _, val_loader, _ = DataLoader.get_loaders(input_path, config=config)

    else:
        pipeline = True

    is_image_folder = image_folder(val_loader.dataset)

    model: Module = load_model(config, model_path)
    model.eval()

    all_preds = torch.tensor([])
    for inputs, labels in tqdm(val_loader, desc="Predictions"):

        if not is_image_folder and not pipeline:
            inputs, _ = input_transform(
                input_data=inputs,
                input_labels=labels,
                preprocessors=config.preprocessors,
            )

        predictions = model(inputs)
        processed_preds = pred_transform(predictions, config.postprocessors)

        all_preds = torch.cat([all_preds, processed_preds])

    save_prediction(predictions=all_preds, output_path=predict_path)

    return SimpleNamespace(root=predict_path.parent.parent)
示例#9
0
    def test_rnn(self, tmp_path, monkeypatch):

        features_per_month = 5
        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25
        dense_dropout = 0.25
        include_pred_month = True

        def mocktrain(self):
            self.model = RNN(
                features_per_month,
                dense_features,
                hidden_size,
                rnn_dropout,
                dense_dropout,
                include_pred_month,
                experiment="one_month_forecast",
            )
            self.features_per_month = features_per_month

        monkeypatch.setattr(RecurrentNetwork, "train", mocktrain)

        model = RecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
        )
        model.train()
        model.save_model()

        model_path = tmp_path / "models/one_month_forecast/rnn/model.pkl"

        assert model_path.exists(), "Model not saved!"

        new_model = load_model(model_path)

        assert type(new_model) == RecurrentNetwork

        for key, val in new_model.model.state_dict.items():
            assert (model.model.state_dict()[key] == val).all()

        assert new_model.dense_features == model.dense_features
        assert new_model.features_per_month == model.features_per_month
        assert new_model.hidden_size == model.hidden_size
        assert new_model.rnn_dropout == model.rnn_dropout
        assert new_model.include_pred_month == model.include_pred_month
        assert new_model.experiment == model.experiment
        assert new_model.surrounding_pixels == model.surrounding_pixels
def load(log_dir: str, model_type: str):
    global MODEL, PREPROCESSOR
    if model_type == "lstm":
        model_path = os.path.join(log_dir, "model.h5")
    else:
        model_path = os.path.join(log_dir, "saved_model/model")
    preprocessor_path = os.path.join(log_dir, "preprocessor.pkl")
    if not os.path.exists(model_path) or not os.path.exists(preprocessor_path):
        raise Exception(
            "Please run `train.py` before building the app. Or make sure the needed files exist"
        )
    MODEL = load_model(model_path)
    PREPROCESSOR = Preprocessor.load(preprocessor_path)
    assert MODEL is not None, "Model has not been properly loaded"
    assert PREPROCESSOR is not None, "Preprocessor has not been properly loaded"
示例#11
0
def debug_model(category, category_dir, labels, mode):
    print("Start:", category)
    images_path = glob.glob(os.path.join('images', 'test', category, '*.jpg'))

    n_classes = os.listdir('images/train')
    trained_weight = get_latestname("__checkpoints__/model_", 1)
    model = load_model(len(n_classes),
                       trained_weight,
                       freeze='inference',
                       basemodel=BASEMODEL)

    for image_path in images_path:
        image = prep_image(image_path)
        name = pathlib.Path(image_path).name

        prediction = model.predict(image)
        predicted_label = labels[np.argmax(prediction)]
        true_label = category

        if predicted_label == true_label:
            continue

        image = image.reshape(TARGET_SIZE[0], TARGET_SIZE[1], 3)
        explainer = lime_image.LimeImageExplainer()
        explanation = explainer.explain_instance(image,
                                                 model.predict,
                                                 top_labels=len(n_classes),
                                                 hide_color=0,
                                                 num_samples=NUM_SAMPLES)

        if mode == 'RedGreen':
            temp, mask = explanation.get_image_and_mask(
                explanation.top_labels[0],
                positive_only=False,
                num_features=10,
                hide_rest=False)

        elif mode == 'simple':
            temp, mask = explanation.get_image_and_mask(
                explanation.top_labels[0],
                positive_only=True,
                num_features=5,
                hide_rest=False)

        image = mark_boundaries(temp * 0.7 + 0.3, mask) * 255
        image = Image.fromarray(np.uint8(image))
        image.save(
            os.path.join(category_dir, "[" + predicted_label + "]_" + name))
示例#12
0
    def __init__(self, model, features_path, database_path=None):
        # Load the extraction model
        self.model = load_model(model)

        # Load the feature metadata
        features_basename = basename(features_path)
        meta_file_path = join(features_path,
                              '{}.meta'.format(features_basename))
        with open(meta_file_path, 'r') as f:
            self.feature_metadata = json.load(f)

        # Load image representations
        repr_file_path = join(features_path,
                              '{}.repr.npy'.format(features_basename))
        self.feature_store = np.load(repr_file_path)

        if representation_size(self.model) != self.feature_store.shape[-1]:
            raise ValueError('Model {} and feature store {} have nonmatching '
                             'representation sizes: {} vs {}'.format(
                                 model, features_path,
                                 representation_size(self.model),
                                 self.feature_store.shape[-1]))

        # Construct paths to feature files
        self.feature_file_paths = {}
        features_sub_folder = join(features_path, 'features/')
        for idx, metadata in self.feature_metadata.items():
            if not idx.isdigit():
                continue
            image_name = basename(self.feature_metadata[str(idx)]['image'])
            path = join(features_sub_folder, '{}.npy'.format(image_name))
            if isfile(path):
                self.feature_file_paths[str(idx)] = path
            else:
                print('Missing feature file for image {}'.format(image_name))

        # Load PCA
        pca_file_path = join(features_path, '{}.pca'.format(features_basename))
        if isfile(pca_file_path):
            self.pca = joblib.load(pca_file_path)
        else:
            self.pca = None

        # Load image database
        if database_path:
            self.database = Database.load(database_path)
        else:
            self.database = None
示例#13
0
def inference_testdata():
    dirs = os.listdir('images/test/')
    weights = get_latestname("__checkpoints__/model_", 1)
    model = load_model(n_classes=len(dirs),
                       weights=weights,
                       freeze='inference',
                       basemodel=BASEMODEL)

    testGene = DummyGenerator(
        batch_size=1,
        train_path='__dataset__',
        image_folder='test',
        aug_dict=None,
        save_to_dir=None,
        shuffle=False,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    filenames = testGene.filenames
    category_names = [pathlib.Path(fname).parts[-2] for fname in filenames]
    nb_samples = len(filenames)
    if not nb_samples:
        print("No images found")
        return

    predict = model.predict_generator(testGene, steps=nb_samples)

    columns = list(testGene.class_indices.keys())
    df_predict = pd.DataFrame(predict, columns=columns)
    df_predict['Pred'] = df_predict.idxmax(1)
    df_predict['True'] = category_names
    df_predict['Score'] = (df_predict['Pred'] == df_predict['True']).apply(int)
    df_predict.index = filenames
    df_predict.to_csv('__checkpoints__/test_result.csv')

    print("--" * 20)
    print("")
    print("Total Score:", df_predict['Score'].mean())
    for label in set(list(df_predict["True"])):
        score = df_predict.loc[df_predict['True'] == label, "Score"].mean()
        print(f"{label} Score:", score)

    print("")
    print("--" * 20)
示例#14
0
def main() -> None:
    print(f"Running DeepLIFT for {EXPERIMENT}")
    data_dir = get_data_path()

    # 1. open the model
    model = load_model(data_dir / "models" / EXPERIMENT / MODEL / "model.pt")
    model.models_dir = data_dir / "models" / EXPERIMENT
    model.experiment = TRUE_EXPERIMENT

    # 2. get all the TEST timesteps in the test directory
    test_folders = [d for d in (data_dir / f"features/{EXPERIMENT}/test").iterdir()]
    #  TODO: remove this test
    # test_folders = test_folders[:2]

    #  3. run the shap analysis for each test timestep
    for test_folder in test_folders:
        print(f"\n\n** Working on {test_folder.name} **\n\n")
        run_shap_for_folder(data_dir, test_folder, model)  # type: ignore
示例#15
0
    def test_linear_network(self, tmp_path, monkeypatch):
        layer_sizes = [10]
        input_size = 10
        dropout = 0.25
        include_pred_month = True
        surrounding_pixels = 1

        def mocktrain(self):
            self.model = LinearModel(
                input_size, layer_sizes, dropout, include_pred_month
            )
            self.input_size = input_size

        monkeypatch.setattr(LinearNetwork, "train", mocktrain)

        model = LinearNetwork(
            data_folder=tmp_path,
            layer_sizes=layer_sizes,
            dropout=dropout,
            experiment="one_month_forecast",
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
        )
        model.train()
        model.save_model()

        model_path = tmp_path / "models/one_month_forecast/linear_network/model.pkl"

        assert model_path.exists(), "Model not saved!"

        new_model = load_model(model_path)

        assert type(new_model) == LinearNetwork

        for key, val in new_model.model.state_dict.items():
            assert (model.model.state_dict()[key] == val).all()

        assert new_model.dense_features == model.dense_features
        assert new_model.features_per_month == model.input_size
        assert new_model.hidden_size == model.dropout
        assert new_model.include_pred_month == model.include_pred_month
        assert new_model.experiment == model.experiment
        assert new_model.surrounding_pixels == model.surrounding_pixels
示例#16
0
def train_model(model_name='densenet121',
                opt='Adagrad',
                dataset='iris',
                writer=None,
                label_col_name=''):
    # train_loader, val_loader, test_loader = load_data(dataset, label_col_name=label_col_name)
    train_loader, test_loader, nb_classes = load_data(
        dataset, label_col_name=label_col_name)

    # Model selection
    model = load_model(model_name, nb_classes=nb_classes)

    # Optimizer
    optimizer = opt_selection(model, opt)

    # Loss Criterion
    criterion = nn.CrossEntropyLoss()

    best_train, best_val = 0.0, 0.0
    for epoch in range(1, args.epochs + 1):
        # Train and Validate
        train_stats = train_step(model, criterion, optimizer, train_loader)
        # valid_stats = valid_step(model, criterion, val_loader)

        # Logging
        # logging(epoch, train_stats, valid_stats, writer)
        logging(epoch, train_stats, writer)

        # Keep best model
        if train_stats['accuracy'] >= best_train:
            best_train = train_stats['accuracy']
            # best_val    = valid_stats['accuracy']
            best_model_weights = copy.deepcopy(model.state_dict())

    # Load best model and evaluate on test set
    model.load_state_dict(best_model_weights)
    test_stats = valid_step(model, criterion, test_loader)

    # print('\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}'.format(best_train, best_val, test_stats['accuracy']))
    print('\nBests Model Accuracies: Train: {:4.2f} | Test: {:4.2f}'.format(
        best_train, test_stats['accuracy']))

    return model
示例#17
0
def main(folderpath):
    dirs = os.listdir('images/test/')
    weights = get_latestname("__checkpoints__/model_", 1)
    n_classes = len(dirs)
    model = load_model(n_classes=n_classes,
                       weights=weights,
                       freeze='inference',
                       basemodel=BASEMODEL)

    df = pd.DataFrame()
    images_path = glob.glob(folderpath + '/*')
    for image_path in images_path:
        name = os.path.basename(image_path)
        image = prep_image(image_path)
        pred, uncert = prediction(model, n_classes, image)

        df[name + "_pred"] = pred
        df[name + "_var"] = uncert

    df.index = dirs
    df.to_csv('inference.csv')
示例#18
0
    def test_xgboost(self, tmp_path, monkeypatch):

        import xgboost as xgb

        def mocktrain(self):
            self.model = xgb.XGBRegressor()

        monkeypatch.setattr(GBDT, "train", mocktrain)

        model = GBDT(tmp_path, experiment="one_month_forecast")
        model.train()
        model.save_model()

        model_path = tmp_path / "models/one_month_forecast/gbdt/model.pkl"
        assert model_path.exists(), f"Model not saved!"

        new_model = load_model(model_path)
        assert type(new_model) == GBDT

        assert new_model.include_pred_month == model.include_pred_month
        assert new_model.experiment == model.experiment
        assert new_model.surrounding_pixels == model.surrounding_pixels
示例#19
0
def extract_conv_features(name, model_name, features_dir, image_dir, root_dir):
    """Extracts features of all images in image_dir and 
    saves them for later use.
    """
    image_dir = os.path.abspath(image_dir)

    out_dir = join(features_dir, 'features/')
    if not exists(out_dir):
        os.mkdir(out_dir)

    extensions = ['.png', '.jpg', '.jpeg']
    images = os.listdir(image_dir)
    images = [
        img for img in images if os.path.splitext(img)[1].lower() in extensions
    ]
    images = sorted(images)

    model = load_model(model_name)

    meta_data = {'model': model_name}

    for idx, image_name in enumerate(images):
        print('{}/{}: extracting features of image {}'.format(
            idx + 1, len(images), image_name))
        image_path = join(image_dir, image_name)
        image = load_image(image_path)

        features = compute_features(model, image)

        np.save(join(out_dir, os.path.basename(image_name)), features)
        meta_data[idx] = {
            'image': os.path.relpath(image_path, root_dir),
            'height': image.shape[0],
            'width': image.shape[1]
        }

    meta_file_name = '{}.meta'.format(name)
    with open(join(features_dir, meta_file_name), 'w') as f:
        json.dump(meta_data, f)
示例#20
0
 def __init__(self, params: PredictionPipelineParams):
     self.pipeline = load_transformer(params.transformer_path)
     self.model = load_model(params.model_path)
示例#21
0
def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # make sure keras is the minimum required version
    check_keras_version()

    # optionally choose specific GPU
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    # make save path if it doesn't exist
    if args.save_path is not None and not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # create the generator
    generator = create_generator(args)

    # optionally load anchor parameters
    anchor_params = None
    if args.config and 'anchor_parameters' in args.config:
        anchor_params = parse_anchor_parameters(args.config)

    # load the model
    print('Loading model, this may take a second...')
    model = models.load_model(args.model, backbone_name=args.backbone)

    # optionally convert the model
    if args.convert_model:
        model = models.convert_model(model, anchor_params=anchor_params)

    # print model summary
    # print(model.summary())

    # start evaluation
    if args.dataset_type == 'coco':
        from ..utils.coco_eval import evaluate_coco
        evaluate_coco(generator, model, args.score_threshold)
    else:
        average_precisions = evaluate(
            generator,
            model,
            iou_threshold=args.iou_threshold,
            score_threshold=args.score_threshold,
            max_detections=args.max_detections,
            save_path=args.save_path
        )

        # print evaluation
        total_instances = []
        precisions = []
        for label, (average_precision, num_annotations) in average_precisions.items():
            print('{:.0f} instances of class'.format(num_annotations),
                  generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
            total_instances.append(num_annotations)
            precisions.append(average_precision)

        if sum(total_instances) == 0:
            print('No test instances found.')
            return

        print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
        print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))
示例#22
0
 def __init__(self):
     model_path = 'data/inference.h5'
     self.model = models.load_model(model_path, backbone_name='resnet50')
     self.labels_to_names = {0: 'crack', 1: 'wrinkle'}
     self.threshold = 0.50
ealstm.evaluate(
    spatial_unit_name='station_id',
    save_preds=True
)
results_dict = json.load(open('data/models/one_timestep_forecast/ealstm/results.json', 'rb'))
print("Overall RMSE: ", results_dict['total'])

# 1 epoch = 1.04 / 2.81
# 100 epochs = 0.72 / 2.04

# save the model
ealstm.save_model()

# load the model
from src.models import load_model
ealstm2 = load_model(Path('data/models/one_timestep_forecast/ealstm/model.pt'))

# ------------------------------------------------------------------------
## ANALYSIS
# ------------------------------------------------------------------------
import matplotlib.pyplot as plt

# checking the performance of the models
EXPERIMENT = 'one_timestep_forecast'
from src.analysis import read_pred_data
from src.analysis.evaluation import join_true_pred_da

# making predictions
ealstm_pred = read_pred_data('ealstm', data_dir, experiment=EXPERIMENT)
ealstm_pred['station_id'] = ealstm_pred['station_id'].astype(int)
示例#24
0
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import sys

%load_ext autoreload
%autoreload 2
%matplotlib

data_dir = data_path = Path('data')
data_dir = data_path = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')
sys.path.append('/Users/tommylees/github/ml_drought')


# load model
from src.models import load_model

model_path = data_dir / 'models/one_month_forecast/ealstm/model.pt'
assert model_path.exists()

ealstm = load_model(model_path)

# load X / Y data
from src.analysis import read_train_data, read_test_data
X_train, y_train = read_train_data(data_dir)
X_test, y_test = read_test_data(data_dir)

示例#25
0
def run_training():
    acc_train = []
    acc_val = []

    trainGene = customGenerator(
        batch_size=BATCH_SIZE,
        train_path='__dataset__',
        image_folder='train',
        aug_dict=DATA_GEN_DEFAULT,
        save_to_dir=None,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    validGene = customGenerator(
        batch_size=BATCH_SIZE,
        train_path='__dataset__',
        image_folder='valid',
        aug_dict=None,
        save_to_dir=None,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    if os.path.exists("__checkpoints__"):
        shutil.rmtree("__checkpoints__")
    os.makedirs("__checkpoints__")

    hdfname = get_uniquename("__checkpoints__/model_", 1)
    model_checkpoint = ModelCheckpoint('{}.hdf5'.format(hdfname),
                                       monitor='loss',
                                       verbose=1,
                                       save_best_only=True)

    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=EA_EPOCHS,
                                   verbose=0,
                                   mode='auto')

    callbacks = [early_stopping, model_checkpoint]

    n_classes = len(os.listdir('__dataset__/train/'))
    n_train_images = len(glob.glob('__dataset__/train/*/*'))
    n_valid_images = len(glob.glob('__dataset__/valid/*/*'))

    print("ベースモデル凍結:訓練開始")

    trained_weight = get_latestname("__checkpoints__/model_", 1)
    model = load_model(n_classes,
                       trained_weight,
                       freeze='initial',
                       basemodel=BASEMODEL)

    history = model.fit_generator(trainGene,
                                  steps_per_epoch=n_train_images // BATCH_SIZE,
                                  epochs=INITIAL_EPOCHS,
                                  validation_data=validGene,
                                  validation_steps=n_valid_images //
                                  BATCH_SIZE,
                                  callbacks=callbacks)

    acc_train = acc_train + list(history.history['acc'])
    acc_val = acc_val + list(history.history['val_acc'])
    n_epoch_1 = len(list(history.history['acc']))

    print("初期訓練の終了:モデルのリロードを開始")
    trained_weight = get_latestname("__checkpoints__/model_", 1)
    print("検出したモデル:", trained_weight)
    model = load_model(n_classes,
                       weights=trained_weight,
                       freeze='second',
                       basemodel=BASEMODEL)
    print("2つのinceptionブロックを解凍:訓練再開")

    history = model.fit_generator(trainGene,
                                  steps_per_epoch=n_train_images // BATCH_SIZE,
                                  epochs=SECOND_EPOCHS,
                                  validation_data=validGene,
                                  validation_steps=n_valid_images //
                                  BATCH_SIZE,
                                  callbacks=callbacks)

    acc_train = acc_train + list(history.history['acc'])
    acc_val = acc_val + list(history.history['val_acc'])
    n_epoch_2 = len(list(history.history['acc']))

    print("第二次訓練の終了:モデルのリロードを開始")
    trained_weight = get_latestname("__checkpoints__/model_", 1)
    print("検出したモデル:", trained_weight)
    model = load_model(n_classes,
                       weights=trained_weight,
                       freeze='third',
                       basemodel=BASEMODEL)
    print("4つのinceptionブロックを解凍:訓練再開")

    trainGene = customGenerator(
        batch_size=BATCH_SIZE,
        train_path='__dataset__',
        image_folder='train',
        aug_dict=DATA_GEN_DEFAULT,
        save_to_dir=None,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    validGene = customGenerator(
        batch_size=BATCH_SIZE,
        train_path='__dataset__',
        image_folder='valid',
        aug_dict=None,
        save_to_dir=None,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    history = model.fit_generator(trainGene,
                                  steps_per_epoch=n_train_images // BATCH_SIZE,
                                  epochs=THIRD_EPOCHS,
                                  validation_data=validGene,
                                  validation_steps=n_valid_images //
                                  BATCH_SIZE,
                                  callbacks=callbacks)

    acc_train = acc_train + list(history.history['acc'])
    acc_val = acc_val + list(history.history['val_acc'])
    n_epoch_3 = len(list(history.history['acc']))

    print("第三次訓練の終了:モデルのリロードを開始")
    trained_weight = get_latestname("__checkpoints__/model_", 1)
    print("検出したモデル:", trained_weight)
    model = load_model(n_classes,
                       weights=trained_weight,
                       freeze='final',
                       basemodel=BASEMODEL)
    print("すべてのinceptionブロックを解凍:訓練再開")

    trainGene = customGenerator(
        batch_size=BATCH_SIZE,
        train_path='__dataset__',
        image_folder='train',
        aug_dict=DATA_GEN_DEFAULT,
        save_to_dir=None,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    validGene = customGenerator(
        batch_size=BATCH_SIZE,
        train_path='__dataset__',
        image_folder='valid',
        aug_dict=None,
        save_to_dir=None,
        target_size=TARGET_SIZE,
        image_color_mode="rgb",
    )

    history = model.fit_generator(trainGene,
                                  steps_per_epoch=n_train_images // BATCH_SIZE,
                                  epochs=FINAL_EPOCHS,
                                  validation_data=validGene,
                                  validation_steps=n_valid_images //
                                  BATCH_SIZE,
                                  callbacks=callbacks)

    acc_train = acc_train + list(history.history['acc'])
    acc_val = acc_val + list(history.history['val_acc'])
    n_epoch_4 = len(list(history.history['acc']))
    print("訓練の正常終了")

    print("acc train:", acc_train)
    print("acc validation:", acc_val)

    epochs = range(1, len(acc_train) + 1)

    plt.plot(epochs, acc_train, label='train')
    plt.plot(epochs, acc_val, label='valid')

    #: トレーニングの区切り
    plt.plot([n_epoch_1, n_epoch_1], [0, 1.0],
             "--",
             color='darkred',
             alpha=0.7)
    plt.plot([n_epoch_1 + n_epoch_2, n_epoch_1 + n_epoch_2], [0, 1.0],
             "--",
             color='darkred',
             alpha=0.7)
    plt.plot(
        [n_epoch_1 + n_epoch_2 + n_epoch_3, n_epoch_1 + n_epoch_2 + n_epoch_3],
        [0, 1.0],
        "--",
        color='darkred',
        alpha=0.7)
    plt.plot([
        n_epoch_1 + n_epoch_2 + n_epoch_3 + n_epoch_4,
        n_epoch_1 + n_epoch_2 + n_epoch_3 + n_epoch_4
    ], [0, 1.0],
             "--",
             color='darkred',
             alpha=0.7)

    plt.legend()
    plt.savefig('__checkpoints__/training_history.png')

    shutil.copy('config.py', '__checkpoints__/')
示例#26
0
def load_nn(data_dir: Path,
            model_str: str,
            experiment: str = "one_month_forecast") -> NNBase:
    return load_model(data_dir / f"models/{experiment}/{model_str}/model.pt")
示例#27
0
sys.path.append("../..")

from scripts.utils import _rename_directory, get_data_path
from src.models import load_model

#
data_dir = get_data_path()

# experiment names
EXPERIMENT = "one_month_forecast_BASE_static_vars"
TRUE_EXPERIMENT = "one_month_forecast_BOKU_boku_VCI_our_vars_ALL"
TARGET_VAR = "boku_VCI"

# load EALSTM model
ealstm = load_model(data_dir / "models" / EXPERIMENT / "ealstm" / "model.pt")
ealstm.models_dir = data_dir / "models" / EXPERIMENT
ealstm.experiment = TRUE_EXPERIMENT

# load static embeddings
from typing import Tuple


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def build_static_x(
    x: Tuple[np.array],
) -> Tuple[List[np.array], List[np.array], List[np.array]]:
    all_static_x = []
            caption = "{} {:.3f}".format(labels_to_names[label], score)
            # draw_caption(draw, b, caption)
            print(caption, b)
            # draw_text(box, draw, labels_to_names[label], [255,255,0], 0, -45, 1, 1)
            cv2.putText(draw, caption, (box[0], box[1]),
                        cv2.FONT_HERSHEY_SIMPLEX, 5, (255, 255, 0), 5,
                        cv2.LINE_8)
            plt.figure(figsize=(15, 15))
            plt.axis('off')
            plt.imshow(draw)
            plt.show()

            filename, _ = os.path.basename(file_path).split(".")
            dirname = os.path.dirname(file_path)
            dest = os.path.join(
                dirname, filename + "_" + labels_to_names[label] + ".jpg")

            if save:
                cv2.imwrite(dest, draw)


if __name__ == "__main__":

    model = models.load_model('data/inference.h5', backbone_name='resnet50')
    labels_to_names = {0: 'crack', 1: 'wrinkle'}
    im_path = 'data/images/'

    for file in os.listdir(im_path):
        file_path = os.path.join(im_path, file)
        predict_save(file_path, model, save=True)
示例#29
0
def main(
    root_path: Path,
    experiments: str,
    model_paths: str,
    input_path: Path,
    predict_path: Path,
    config_path: Path,
    evaluate_path: Path,
) -> Path:
    """
    Main function responsible for prediction with passed model.

    Arguments:
        Path root_path: Path to the root folder for the subprocess
        str experiment_paths:  Relative paths to experiments separated by coma
        Path model_path: Subpath to trained model
        Path input_path: Path to file with input data
        Path predict_path: Path to output directory
        Path config_path: Path to main config (of :class:`DefaultConfig` class)
        Path evaluate_path: Path to evaluations

    Returns:
        Path to the experiment root dir.
    """
    models_with_config = []

    data_type = None
    prediction_bs = None
    main_config = None

    zipped = zip(unpack_string(experiments), unpack_string(model_paths))

    for index, (experiment, model_path) in enumerate(zipped):
        config_paths = (root_path / experiment / config_path).iterdir()
        config = get_config(config_paths)

        if index == 0:
            main_config = config

        model = load_model(config, root_path / experiment / model_path)
        model.eval()

        models_with_config.append(dict(model=model, config=config))

        data_type = set_param(previous=data_type,
                              current=config.training.dtype,
                              name="dtype")

        prediction_bs = set_param(
            previous=prediction_bs,
            current=config.prediction.batch_size,
            name="batch_size",
        )

    _, val_loader, _ = DataLoader.get_loaders(input_path, config=main_config)

    model_preds = []

    for model_with_config in models_with_config:
        model = model_with_config["model"]
        config = model_with_config["config"]

        all_preds = torch.tensor([])
        for x, _ in tqdm(val_loader, desc="Predictions"):
            predictions = model(x)
            processed_preds = pred_transform(
                preds=predictions, postprocessors=config.postprocessors)

            all_preds = torch.cat([all_preds, processed_preds])

        model_preds.append(all_preds)

    predictions = two_classifiers(model_preds, loader=val_loader)
    save_prediction(
        predictions=predictions,
        output_path=root_path / predict_path,
    )

    if evaluate_path:
        logger.info("Evaluator")
        evaluate(
            config=main_config,
            input_path=input_path,
            predict_path=root_path / predict_path,
            evaluate_path=root_path / evaluate_path,
            val_loader=val_loader,
        )