def main():
    test_anomaly = load_dataset('anomaly')
    test_normal = load_dataset('normal')

    model = VAE()
    model, loss = model.vae_net()
    model.load_weights("weight/vae_model.h5")
    anomaly_detector(model, test_normal, test_anomaly)
示例#2
0
def main(test_file, config):
    params = load_config(config)
    print(params)
    set_seed(params.seed)

    tokenizer = transformers.AutoTokenizer.from_pretrained(
        params.output.tokenizer_dir)
    model = transformers.TFAutoModelWithLMHead.from_pretrained(
        params.output.model_dir)
    test_texts = load_dataset(test_file)

    x_test, y_test = build_data(tokenizer, test_texts, params.block_size)

    # Create optimizer
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    # optimizer = keras.optimizers.Adam()
    model.compile(
        loss=[loss, *[None] * model.config.n_layer],
        metrics=[
            keras.metrics.SparseCategoricalCrossentropy(from_logits=True),
            keras.metrics.SparseCategoricalAccuracy(),
        ],
    )

    # Evaluate best model with test set
    res = model.evaluate(x_test, y_test)
    print(res)
示例#3
0
def display():
    url = None
    form = NameForm()
    reliability = ''
    info = ''
    info_flag = False
    error = ''
    if form.validate_on_submit():
        url = form.url.data
        s = Spider(url.split('//')[1].split(':')[0].split('/')[0])
        s.get_main_info()
        s.get_ip_location()
        s.get_qualification_info()
        s.get_site_loopholes_info()
        print(s.feature)
        info = get_base_info(url.split('//')[1].split(':')[0].split('/')[0])
        if isinstance(info, list):
            info_flag = True
        else:
            info_flag = False
        if len(s.feature) == 8:
            feature_list = s.feature
            data, target = load_dataset('feature_2.txt')
            reliability = train_1(data, target, feature_list)
        else:
            error = '特征缺失,无法评估。'
    return render_template('display.html',
                           form=form,
                           url=url,
                           reliability=reliability,
                           info=info,
                           info_flag=info_flag,
                           error=error)
示例#4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--alpha', '-a', type=float, default=1e-3)
    parser.add_argument('--beta1', type=float, default=0.9)
    parser.add_argument('--weightdecay', '-w', type=float, default=1e-4)
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--union_dir', type=str, default='')
    parser.add_argument('--df_path', default='../dataframe')
    parser.add_argument('--disable_update_lstm', action='store_true')
    parser.add_argument('--train_test_ratio', type=float, default=0.8)
    args = parser.parse_args()
    dataset_dict = load_dataset(args.df_path)

    for case in ['ga', 'o', 'ni']:
        model_path = load_union_model_path(args.union_dir, case)
        for domain in domain_dict:
            train_size = math.ceil(
                len(dataset_dict['{0}_x'.format(domain)]) * 0.7)
            dev_size = math.ceil(
                len(dataset_dict['{0}_x'.format(domain)]) * 0.8)
            train_x = dataset_dict['{0}_x'.format(domain)][:train_size]
            test_x = dataset_dict['{0}_x'.format(domain)][train_size:dev_size]
            train_y = dataset_dict['{0}_y_{1}'.format(domain,
                                                      case)][:train_size]
            test_y = dataset_dict['{0}_y_{1}'.format(
                domain, case)][train_size:dev_size]
            train_data = tuple_dataset.TupleDataset(train_x, train_y)
            test_data = tuple_dataset.TupleDataset(test_x, test_y)
            fine_tuning(model_path, train_data, test_data, domain, case, args)
示例#5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--dir', '-m', type=str, default='')
    parser.add_argument('--df_path', default='../dataframe')
    parser.add_argument('--train_test_ratio', type=float, default=0.8)
    args = parser.parse_args()

    dataset_dict = load_dataset(args.df_path)
    print('start data load domain union')
    union_test_x = []
    union_test_ga = []
    union_test_o = []
    union_test_ni = []
    union_test_ga_dep_tag = []
    union_test_o_dep_tag = []
    union_test_ni_dep_tag = []
    union_test_z = []
    union_test_word = []
    union_test_is_verb = []
    for domain in domain_dict:
        size = math.ceil(len(dataset_dict['{0}_x'.format(domain)])*args.train_test_ratio)
        union_test_x += dataset_dict['{0}_x'.format(domain)][size:]
        union_test_ga += dataset_dict['{0}_y_ga'.format(domain)][size:]
        # union_test_o += dataset_dict['{0}_y_o'.format(domain)][size:]
        # union_test_ni += dataset_dict['{0}_y_ni'.format(domain)][size:]
        union_test_ga_dep_tag += dataset_dict['{0}_y_ga_dep_tag'.format(domain)][size:]
        # union_test_o_dep_tag += dataset_dict['{0}_y_o_dep_tag'.format(domain)][size:]
        # union_test_ni_dep_tag += dataset_dict['{0}_y_ni_dep_tag'.format(domain)][size:]
        union_test_z += dataset_dict['{0}_z'.format(domain)][size:]
        union_test_word += dataset_dict['{0}_word'.format(domain)][size:]
        union_test_is_verb += dataset_dict['{0}_is_verb'.format(domain)][size:]
    for case in ['ga']:
        for model_path in load_model_path(args.dir, case):
            if case == 'ga':
                test_data  = tuple_dataset.TupleDataset(union_test_x, union_test_ga, union_test_ga_dep_tag, union_test_z, union_test_word, union_test_is_verb)
            # elif case == 'o':
            #     test_data  = tuple_dataset.TupleDataset(union_test_x, union_test_o, union_test_o_dep_tag, union_test_z, union_test_word, union_test_is_verb)
            # elif case == 'ni':
            #     test_data  = tuple_dataset.TupleDataset(union_test_x, union_test_ni, union_test_ni_dep_tag, union_test_z, union_test_word, union_test_is_verb)
            predict(model_path, test_data, 'union', case, args)
            for domain in domain_dict:
                size = math.ceil(len(dataset_dict['{0}_x'.format(domain)])*args.train_test_ratio)
                test_x = dataset_dict['{0}_x'.format(domain)][size:]
                test_z = dataset_dict['{0}_z'.format(domain)][size:]
                if case == 'ga':
                    test_y = dataset_dict['{0}_y_ga'.format(domain)][size:]
                    test_y_dep_tag = dataset_dict['{0}_y_ga_dep_tag'.format(domain)][size:]
                # elif case == 'o':
                #     test_y = dataset_dict['{0}_y_o'.format(domain)][size:]
                #     test_y_dep_tag = dataset_dict['{0}_y_o_dep_tag'.format(domain)][size:]
                # elif case == 'ni':
                #     test_y = dataset_dict['{0}_y_ni'.format(domain)][size:]
                #     test_y_dep_tag = dataset_dict['{0}_y_ni_dep_tag'.format(domain)][size:]
                test_word = dataset_dict['{0}_word'.format(domain)][size:]
                test_is_verb = dataset_dict['{0}_is_verb'.format(domain)][size:]
                test_data  = tuple_dataset.TupleDataset(test_x, test_y, test_y_dep_tag, test_z, test_word, test_is_verb)
                predict(model_path, test_data, domain, case, args)
def plot_ano_by_trainedmodel():
    """
    Caliculate images diffference by weight and subtraction
    """  
    # load normalized anomaly image
    anomaly_image = load_dataset('anomaly_images')
    detector_model = create_detector_model()
    ano_score, similar_img = compute_anomaly_score(detector_model, anomaly_image[1].reshape(1, IMAGE_SHAPE, IMAGE_SHAPE, 3))
    plot_anomaly(similar_img, anomaly_image[1], ano_score=ano_score)
示例#7
0
文件: eval.py 项目: Hira63S/Tesloyta
def eval(cfg):
    dataset = load_dataset(cfg.dataset)('val', cfg)
    cfg = Args().update_dataset_info(cfg, dataset)
    Args().print(cfg)

    aps = eval_dataset(dataset, cfg.load_model, cfg)
    for k, v in aps.items():
        print('{:<20} {:.3f}'.format(k, v))

    torch.cuda.empty_cache()
示例#8
0
def load_env_compatible(sketch_data_dir, photo_data_dir, model_base_dir):
    """Loads environment for inference mode, used in jupyter notebook."""
    # modified https://github.com/tensorflow/magenta/blob/master/magenta/models/sketch_rnn/sketch_rnn_train.py
    # to work with depreciated tf.HParams functionality
    model_params = sketch_p2s_model.get_default_hparams()
    # with tf.gfile.Open(os.path.join(model_base_dir, model_params.data_type, 'model_config.json'), 'r') as f:
    with tf.gfile.Open(os.path.join(model_base_dir, model_params.data_type, 'model_config.json'), 'r') as f:
        data = json.load(f)
    fix_list = ['is_training', 'use_input_dropout', 'use_output_dropout', 'use_recurrent_dropout']
    for fix in fix_list:
        data[fix] = (data[fix] == 1)
    model_params.parse_json(json.dumps(data))

    return load_dataset(os.path.join(sketch_data_dir, model_params.data_type), photo_data_dir,
                        model_params, inference_mode=True)
示例#9
0
def main():
    """main function for checkpoint ensemble."""
    config = Config("ensemble", training=True)
    trace(config)
    torch.backends.cudnn.benchmark = True

    train_data = load_dataset(config.train_dataset,
                              config.train_batch_size,
                              config,
                              prefix="Training:")

    # Build model.
    vocab = train_data.get_vocab()
    model = model_factory(config, config.checkpoint, *vocab)
    cp = CheckPoint(config.checkpoint)
    model.load_state_dict(cp.state_dict['model'])
    dump_checkpoint(model, config.save_model, ".ensemble")
示例#10
0
def restore_weight(model, verbose=True):
    if verbose:
        _, _, x_test, y_test = train.load_dataset()
        _, acc = model.evaluate(x_test, y_test)
        print("Untrained model, accuracy: {:5.2f}%".format(100 * acc))

    latest = tf.train.latest_checkpoint(train.checkpoint_dir)
    if latest:
        print("loading weight...")
        model.load_weights(latest)
    else:
        print("can't find weight = =")
        return

    if verbose:
        _, acc = model.evaluate(x_test, y_test)
        print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
示例#11
0
def check():
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_layers', '-n', type=int, default=1)
    parser.add_argument('--dropout', '-d', type=float, default=0.2)
    parser.add_argument('--batchsize', '-b', type=int, default=32)
    parser.add_argument('--epoch', '-e', type=int, default=10)
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--train_test_ratio', type=float, default=0.8)
    parser.add_argument('--seed', default=1)
    parser.add_argument('--df_path', default='../dataframe')
    args = parser.parse_args()
    dataset_dict = load_dataset(args.df_path)
    for dropout in [0.1 * i for i in range(4)]:
        args.dropout = dropout
        args.batchsize = 256
        union(
            dataset_dict, args,
            'normal/dropout-{0}_batchsize-{1}'.format(args.dropout,
                                                      args.batchsize))
示例#12
0
文件: translate.py 项目: wang-h/pynmt
def main():
    config = Config("translate", training=False)
    if config.verbose: trace(config)
    torch.backends.cudnn.benchmark = True

    test_data = load_dataset(config.test_dataset,
                             config.test_batch_size,
                             config,
                             prefix="Translate:")

    # Build model.
    vocab = test_data.get_vocab()
    pred_file = codecs.open(config.output + ".pred.txt", 'w', 'utf-8')

    model = model_factory(config, config.checkpoint, *vocab)
    translator = BatchTranslator(model, config, test_data.src_vocab,
                                 test_data.trg_vocab)

    # Statistics
    counter = count(1)
    pred_list = []
    gold_list = []
    for batch in tqdm(iter(test_data), total=test_data.num_batches):

        batch_trans = translator.translate(batch)

        for trans in batch_trans:
            if config.verbose:
                sent_number = next(counter)
                trace(trans.pprint(sent_number))

            if config.plot_attn:
                plot_attn(trans.src, trans.preds[0], trans.attns[0].cpu())

            pred_file.write(" ".join(trans.preds[0]) + "\n")
            pred_list.append(trans.preds[0])
            gold_list.append(trans.gold)
    report_bleu(gold_list, pred_list)
    report_rouge(gold_list, pred_list)
示例#13
0
        image_name = re.search(r'image_predict/(.+)', img_path_list[i])
        image_name = image_name.group(1)
        # 辞書ビューオブジェクトでキーを取得
        keys_dic_view = label_dic.keys()
        # 辞書ビューオブジェクトをリストに変換
        val = list(keys_dic_view)[y.argmax()]

        print('image: {0}\tpredicted label: {1}\tvalue: {2}'.format(image_name.ljust(30, ' '), y.argmax(), val))


if __name__ == '__main__':
    # ラベルの辞書を取得
    label_dic = get_label_dic()

    # データセットをロード
    train_image, train_label, test_image, test_label = train.load_dataset()

    # 入力サイズ
    in_shape = train_image.shape[1:]

    # 学習済みのモデルをロード
    model = load_model(in_shape)

    # 画像のパスのリストを取得
    img_path_list = glob.glob('image_predict/*.jpg')
    x_list = []
    for img_path in img_path_list:
        # 画像を数値データに変換
        x = convert_image(img_path)
        if x == []:
            continue
示例#14
0
import tensorflow as tf
import numpy as np
import utils
import discriminator
import os
import six
import logging
from datagenerator import DataGenerator
from train import load_dataset

logger = tf.get_logger()
model_params = discriminator.HParams()
datasets = load_dataset('dataset', model_params)
test_set = datasets[2][0] + datasets[2][1]
labels = {dataset: i for (i, dataset) in enumerate(model_params.data_set)}
test_generator = DataGenerator(
    test_set, labels, batch_size=model_params.batch_size, shuffle=False)
model = discriminator.Discriminator(model_params)
predictions = model.test('models/final.model', test_generator)
                 label='test_anomaly')
    plt.legend()
    plt.show()


def cal_anoo_score(train_normal, anomaly_test, test_normal):
    train_a, test_a, test_b = LOF_Score(train_normal, anomaly_test,
                                        test_normal)
    print("each ano score by LOF predict method range -1 to +1")
    lof_each_ano_score(train_a, test_a, test_b)

    print("each MCD(マハラノビス距離) ano score")
    MCD_Score(train_a, test_a, test_b)


if __name__ == '__main__':
    normal_path = 'normal_class'
    ano_path = 'anomaly_class'

    print('load normal query images')
    x_train = load_dataset(normal_path)
    train_normal = x_train[:900]
    test_normal = x_train[900:]

    print('load anomaly query images')
    anomaly_test = load_dataset(ano_path)

    # plot histgram
    plot_anomaly_histgram(train_normal, test_normal, anomaly_test)
    # calculate anomaly score
    cal_anoo_score(train_normal, anomaly_test, test_normal)
示例#16
0
import tensorflow as tf
import os
import numpy as np
from keras.preprocessing import image
from train import load_dataset, define_model
import cv2

def label_to_category_dict(path):
    '''Returns a dictionary that maps labels to categories'''
    categories = os.listdir('Data/dogImages/train/')
    label_to_cat = map(lambda x: (int(x.split('.')[0]) - 1, x.split('.')[1]), categories)
    label_to_cat = {label: category for label, category in label_to_cat}
    return label_to_cat

train_files, train_targets = load_dataset('Data/dogImages/train')
label_to_cat = label_to_category_dict(train_files)
Resnet50_model = define_model((1, 1, 2048), 133)

Resnet50_model.load_weights('saved_models/weights_best_Resnet50.hdf5')

def face_detector(img_path):
    '''Returns true if human face present in image'''
    face_cascade = cv2.CascadeClassifier('Data/haarcascades/haarcascade_frontalface_alt.xml')
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray)
    return len(faces) > 0

def extract_Resnet50(tensor):
    '''Returns the VGG16 features of the tensor'''
    return tf.keras.applications.resnet50.ResNet50(weights='imagenet', include_top=False).predict(tf.keras.applications.resnet50.preprocess_input(tensor))
示例#17
0
    filename, ext = os.path.splitext(base_name)
    new_name = "%s.fold%.3d%s" % (filename, fold, ext)
    return new_name


def save_datasets(datasets, file):
    """
    Dump with PyTorch save
    """
    with open(file, 'wb') as f:
        save(datasets, f)


if __name__ == "__main__":
    args = parse_args()
    dataset = load_dataset(args.spectrogram_dir, args.label_dir,
                           args.downbeats)
    fold_sets = make_fold_datasets(dataset, args.num_folds)

    cuda_device = device('cuda:%d' % args.cuda_device)\
                  if args.cuda_device is not None else None

    # Similar to our train script, but we do this k times
    for k, datasets in enumerate(iterate_folds(fold_sets)):
        train, val, test = datasets
        model = BeatNet(downbeats=args.downbeats)
        if cuda_device is not None:
            model.cuda(args.cuda_device)

        output_file = make_fold_output_name(args.output_file, k)

        train_loader, val_loader, test_loader = make_data_loaders(
def run():
    if len(sys.argv) != 3:
        raise ValueError(
            'expected 2 arguments.' +
            "\neg. python.exe tune.py .\Data\Train/Under_90_min_tuning\data.zip .\Data\Validation\Validation_10_percent\data.zip"
        )
    data_file_path = sys.argv[1]
    data_folder_path = os.path.dirname(data_file_path)
    train.unzip_files(data_file_path, data_folder_path)
    train_suffix = data_folder_path.split(sep='\\')[-1]

    val_file_path = sys.argv[2]
    val_folder_path = os.path.dirname(val_file_path)
    val_suffix = val_folder_path.split(sep='\\')[-1]

    train.unzip_files(val_file_path, val_folder_path)

    # check if necessary folders were extracted
    actual_train_orig_folder_path = os.path.join(data_folder_path,
                                                 train_suffix,
                                                 'train_original')
    actual_train_loc_folder_path = os.path.join(data_folder_path, train_suffix,
                                                'train_localization')
    if not os.path.exists(actual_train_orig_folder_path):
        raise ValueError(
            actual_train_orig_folder_path +
            ' does not exist. data.py did not do its job properly')
    if not os.path.exists(actual_train_loc_folder_path):
        raise ValueError(
            actual_train_loc_folder_path +
            ' does not exist. data.py did not do its job properly')

    actual_val_orig_folder_path = os.path.join(val_folder_path, val_suffix,
                                               'valid_original')
    actual_val_loc_folder_path = os.path.join(val_folder_path, val_suffix,
                                              'valid_localization')
    if not os.path.exists(actual_val_orig_folder_path):
        raise ValueError(
            actual_val_loc_folder_path +
            ' does not exist. data.py did not do its job properly')
    if not os.path.exists(actual_val_loc_folder_path):
        raise ValueError(
            actual_val_loc_folder_path +
            ' does not exist. data.py did not do its job properly')

    # open tune.txt and hyperparam.txt
    tuning_file_path = os.path.join(curr_dir, 'tuning_results.txt')
    hyp_file_path = os.path.join(curr_dir, 'hyperparameter.txt')
    tuning_file = open(tuning_file_path, 'w')
    tuning_file.write('model1\n')
    tuning_file.close()

    # tune model 1
    print('\n\ntuning model 1...')
    train_sample_num = train.get_train_sample_num(
        actual_train_orig_folder_path)
    print('#train images', train_sample_num)
    train_generator = train.load_dataset(folder_path_list=[
        actual_train_orig_folder_path, actual_train_loc_folder_path
    ],
                                         color_mode_list=["rgb", "grayscale"],
                                         batch_size=data.BATCH_SIZE,
                                         seed=0,
                                         apply_conversion=True)

    val_generator_list = get_val_data_gen_for_localization(
        folder_path_list=[
            actual_val_orig_folder_path, actual_val_loc_folder_path
        ],
        color_mode_list=["rgb", "grayscale"])

    learning_rates = [0.001, 0.01, 0.1]
    # learning_rates = [0.1]
    optimizers_list = ['RMSProp', 'SGD', 'ADAM']
    # optimizers_list = ['RMSProp']
    dropratios = [0.5, 0.7, 0.8]
    res = []
    for lr in learning_rates:
        for optimizer in optimizers_list:
            if lr == 0.1 and optimizer == 'SGD':
                continue
            if optimizer == 'SGD':
                opt = optimizers.SGD(lr)
            elif optimizer == 'ADAM':
                opt = optimizers.Adam(lr)
            elif optimizer == 'RMSProp':
                opt = optimizers.RMSprop(lr)
            else:
                opt = optimizers.RMSprop(lr)
            for dropratio in dropratios:
                print('lr: ', lr, 'optimizer', optimizer, 'dropratio',
                      dropratio)
                print('\nlr: ', lr, 'optimizer', optimizer)
                model_localization = train_localization_model(
                    train_generator, train_sample_num, opt, dropratio)
                IoU = predict_localization(
                    data_generator_list=val_generator_list,
                    model=model_localization)
                print('IoU', IoU)
                res.append(IoU)
                tuning_file = open(tuning_file_path, 'a')
                tuning_file.write(
                    repr(optimizer) + '\t' + repr(lr) + '\t' +
                    repr(dropratio) + '\t' + repr(IoU) + '\n')
                tuning_file.close()

    min_idx = res.index(max(res))
    min_lr_idx = int(min_idx / (len(optimizers_list) * len(dropratios)))
    min_idx_bar = int(min_idx % (len(optimizers_list) * len(dropratios)))
    min_opt_idx = int(min_idx_bar / len(optimizers_list))
    min_drop_idx = int(min_idx_bar % len(optimizers_list))

    hyp_file = open(hyp_file_path, 'w')
    hyp_file.write(optimizers_list[min_opt_idx] + '\t' +
                   repr(learning_rates[min_lr_idx]) + '\t' +
                   repr(dropratio[min_drop_idx]) + '\n')
    hyp_file.close()
    # return

    print('\n\ntuning model 2')
    tuning_file = open(tuning_file_path, 'a')
    tuning_file.write('model2\n')
    tuning_file.close()
    val_sample_num = train.get_train_sample_num(actual_val_orig_folder_path)
    print('\n#val images', val_sample_num)
    train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       shear_range=0.1,
                                       zoom_range=0.1,
                                       rotation_range=10.,
                                       width_shift_range=0.1,
                                       height_shift_range=0.1,
                                       horizontal_flip=True)
    train_generator = train_datagen.flow_from_directory(
        actual_train_orig_folder_path,
        target_size=(data.IMAGE_ROW_SIZE, data.IMAGE_COLUMN_SIZE),
        batch_size=data.BATCH_SIZE,
        shuffle=True,
        classes=data.FishNames,
        class_mode='categorical')
    val_generator = get_val_data_gen_for_classification(
        actual_val_orig_folder_path)
    learning_rates = [0.1]
    # learning_rates = [0.001]
    optimizers_list = ['RMSProp', 'SGD', 'ADAM']
    # optimizers_list = ['RMSProp', 'ADAM']

    res = []
    for lr in learning_rates:
        for optimizer in optimizers_list:
            if lr == 0.1 and optimizer == 'SGD':
                continue
            if optimizer == 'SGD':
                opt = optimizers.SGD(lr)
            elif optimizer == 'ADAM':
                opt = optimizers.Adam(lr)
            elif optimizer == 'RMSProp':
                opt = optimizers.RMSprop(lr)
            else:
                opt = optimizers.RMSprop(lr)
            print('lr: ', lr, 'optimizer', optimizer)
            model_classification = train_classification_model(
                train_generator, train_sample_num, opt)
            logloss = predict_classification(actual_val_orig_folder_path,
                                             val_generator,
                                             model_classification,
                                             val_sample_num)
            print('log loss', logloss)
            res.append(logloss)
            tuning_file = open(tuning_file_path, 'a')
            tuning_file.write(
                repr(optimizer) + '\t' + repr(lr) + '\t' + repr(logloss) +
                '\n')
            tuning_file.close()

    min_idx = res.index(min(res))
    min_lr_idx = int(min_idx / len(optimizers_list))
    min_lr_idx = int(min_idx % len(optimizers_list))

    hyp_file = open(hyp_file_path, 'a')
    hyp_file.write(optimizers_list[min_opt_idx] + '\t' +
                   repr(learning_rates[min_lr_idx]) + '\n')
    hyp_file.close()

    if os.path.exists(actual_train_orig_folder_path):
        rmtree(actual_train_orig_folder_path)
    if os.path.exists(actual_train_loc_folder_path):
        rmtree(actual_train_loc_folder_path)
    if os.path.exists(actual_val_orig_folder_path):
        rmtree(actual_val_orig_folder_path)
    if os.path.exists(actual_val_loc_folder_path):
        rmtree(actual_val_loc_folder_path)
    if os.path.exists(os.path.join(data_folder_path, train_suffix)):
        rmtree(os.path.join(data_folder_path, train_suffix))
    if os.path.exists(os.path.join(data_folder_path, val_suffix)):
        rmtree(os.path.join(data_folder_path, val_suffix))
示例#19
0
from __future__ import print_function
import IPython

import train
import numpy as np
import pprint

dataset = train.load_dataset('data/squad')


all_cs = dataset['contexts']
all_qs = dataset['questions']
all_spans = dataset['spans']
vocab = dataset['vocab']

q_len = [len(q) for q in all_qs]
c_len = [len(c) for c in all_cs]
span_len = [e - s + 1 for s, e in all_spans]

def hist(data, bins):
    np_hist = np.histogram(data, bins)
    pprint.pprint(list(zip(*np_hist)))


print("ques hist: ")
hist(q_len, 20)

print("ctx hist: ",)
hist(c_len, 20)

print("span_end hist: ", )
示例#20
0
def test_load_data():
    (x_train, y_train), (x_test, y_test) = train.load_dataset()
    assert x_train.shape == (60000, 28, 28)
    assert y_train.shape == (60000, )
    assert x_test.shape == (10000, 28, 28)
    assert y_test.shape == (10000, )
示例#21
0
import sys
import time
import pickle

from train import load_dataset, extract_feature, predict, get_pred_statistics
from config import DATA_FILEPATH, FEATURE_EXTRACTOR_FILEPATH, CLASSIFIER_FILEPATH

if __name__ == '__main__':
    total_time = 0
    runs = int(sys.argv[1])

    for i in range(0, runs):
        # To log the inference time
        start = time.time()

        X, y = load_dataset()

        with open(FEATURE_EXTRACTOR_FILEPATH, 'rb') as infile:
            extractor = pickle.load(infile)

        with open(CLASSIFIER_FILEPATH, 'rb') as infile:
            classifier = pickle.load(infile)

        # Apply feature extractor, e.g. TF-IDF, on the whole data
        X_vec = extract_feature(X, extractor, fit=False)

        if i == 0:
            print("*" * 50)
            print()

            print("Statistics using test data:")
示例#22
0
out_dim = 3  # Output dimension

enc = tf.keras.Sequential([
    layers.LSTM(args.n_units, batch_input_shape=[1, 20, 3]),
    layers.Dense(args.latent_dim)
])

dec = tf.keras.Sequential([
    layers.LSTM(args.n_units,
                batch_input_shape=[1, 1, 3 + args.latent_dim],
                stateful=True),
    layers.Dense(out_dim)
])

val_dataset = tf.data.Dataset.from_tensor_slices(
    load_dataset("dataset/validation"))
val_dataset = val_dataset.repeat(1)
val_dataset = val_dataset.batch(1)

for i, data in enumerate(val_dataset):
    # Encoding
    z = enc(data)

    # Decoding
    y = []
    yt = data[:, 0, :]
    for _ in range(data.shape[1]):
        dec_in = tf.concat((yt, z), 1)
        dec_in = tf.reshape(dec_in,
                            shape=(dec_in.shape[0], 1, dec_in.shape[1]))
        yt = dec(dec_in)
示例#23
0
import config as config

#print(config.hyperparameters["Name"])
import train as train

a, b = train.load_dataset()