Exemplo n.º 1
0
def main():
    args = parse_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    t0 = time.time()

    if not os.path.exists(args.save_directory):
        os.makedirs(args.save_directory)
    LOG_PATH = os.path.join(args.save_directory, 'log')
    with open(LOG_PATH, 'w+') as ouf:
        pass

    print("Loading File Paths")
    train_paths, dev_paths, test_paths = load_paths()
    train_paths, dev_paths, test_paths = train_paths[:args.
                                                     max_train], dev_paths[:
                                                                           args
                                                                           .
                                                                           max_dev], test_paths[:
                                                                                                args
                                                                                                .
                                                                                                max_test]
    t1 = time.time()
    print_log('%.2f Seconds' % (t1 - t0), LOG_PATH)

    print("Loading Y Data")
    test_paths = test_paths[:args.max_data]
    train_ys = load_y_data('train')  # 1-dim np array of strings
    dev_ys = load_y_data('dev')
    test_ys = load_y_data('test')
    t1 = time.time()
    print_log('%.2f Seconds' % (t1 - t0), LOG_PATH)

    print("Building Charset")
    charset = build_charset(np.concatenate((train_ys, dev_ys, test_ys),
                                           axis=0))
    charmap = make_charmap(charset)  # {string: int}
    charcount = len(charset)
    t1 = time.time()
    print_log('%.2f Seconds' % (t1 - t0), LOG_PATH)

    print("Mapping Characters")
    testchars = map_characters(test_ys, charmap)
    print("Building Loader")
    test_loader = make_loader(test_paths,
                              testchars,
                              args,
                              shuffle=False,
                              batch_size=args.batch_size)

    if 'transcript' in args.test_mode or 'perp' in args.test_mode:
        print("Building Model")
        model = Seq2SeqModel(args, vocab_size=charcount)

        CKPT_PATH = os.path.join(args.save_directory, 'model.ckpt')
        print('ckpt : ' + CKPT_PATH)
        if args.cuda:
            model.load_state_dict(torch.load(CKPT_PATH))
        else:
            gpu_dict = torch.load(CKPT_PATH,
                                  map_location=lambda storage, loc: storage)
            cpu_model_dict = {}
            for key, val in gpu_dict.items():
                cpu_model_dict[key] = val.cpu()
            model.load_state_dict(cpu_model_dict)
        print("Loaded Checkpoint")

        if args.cuda:
            model = model.cuda()

        model.eval()

    TRANSCRIPT_LOG_PATH = os.path.join(args.save_directory,
                                       'transcript_log.txt')
    CSV_PATH = os.path.join(args.save_directory, 'submission.csv')

    if 'transcript' in args.test_mode:
        print('generating transcripts')
        with open(TRANSCRIPT_LOG_PATH, 'w+') as ouf:
            pass
        if not os.path.exists(CSV_PATH):
            transcripts = write_transcripts(path=CSV_PATH,
                                            args=args,
                                            model=model,
                                            loader=test_loader,
                                            charset=charset,
                                            log_path=TRANSCRIPT_LOG_PATH)
        else:
            transcripts = []
            with open(CSV_PATH, 'r') as csvfile:
                raw_csv = csv.reader(csvfile)
                for row in raw_csv:
                    with open(TRANSCRIPT_LOG_PATH, 'a') as ouf:
                        ouf.write('%s\n' % row[1])
                    transcripts.append(row[1])
        t1 = time.time()
        print("Finshed Writing Transcripts")
        print('%.2f Seconds' % (t1 - t0))

    if 'cer' in args.test_mode:
        print('calculating cer values')
        CER_LOG_PATH = os.path.join(args.save_directory, 'cer_log.txt')
        with open(CER_LOG_PATH, 'w+') as ouf:
            pass
        transcripts = []
        with open(CSV_PATH, 'r') as csvfile:
            raw_csv = csv.reader(csvfile)
            for row in raw_csv:
                transcripts.append(row[1])
        transcripts = [l.strip() for l in transcripts]
        CER_PATH = os.path.join(args.save_directory, 'test_cer.npy')
        DIST_PATH = os.path.join(args.save_directory, 'test_dist.npy')
        norm_dists, dists = cer_from_transcripts(transcripts, test_ys,
                                                 CER_LOG_PATH)
        np.save(CER_PATH, norm_dists)
        np.save(DIST_PATH, dists)

    if 'perp' in args.test_mode:
        print('calculating perp values')
        PERP_LOG_PATH = os.path.join(args.save_directory, 'perp_log.txt')
        with open(PERP_LOG_PATH, 'w+') as ouf:
            pass
        PERP_PATH = os.path.join(args.save_directory, 'test_perp.npy')
        all_perps = perplexities_from_x(model, test_loader)
        np.save(PERP_PATH, all_perps)
Exemplo n.º 2
0
import sys
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter
import time

from data_processing.processing import *
from models.cnns import cnn1d, cnn2d, cnn2d_2, cnn2d_wavelets
from models.recurrent import lstm1d, lstm1d_2
from models.predefined import inception1d, resnet1d
from baseline import parse_args, get_based_parameters
from data_processing.raw_data_processing import samples

if __name__ == '__main__':

    args = parse_args()
    data, path, numclasses, model_name, saved_model_path, prefix = get_based_parameters(
    )
    signal_1d_file = args.data_1d_file  #'cpsc2018/cpsc_1145_25.pkl'

    ## Use PTB_XL data as unknown classes for CPSC 2018 dataset
    unknown_file = 'ptb_xl_data/ptb_xl_75_25_cwt.pkl'
    unknown_classes = 75
    unknown_num = unknown_classes * samples

    #load model
    model = load_model(saved_model_path)
    x_train, x_test, x_valid, y_train, y_test, y_valid = load_train_test_data(
        path, prefix, data)

    #prepare data