Exemplo n.º 1
0
def get_output_path(args):
    if args.model.suffix == '.pt':
        model_path = args.model
    else:
        serializer = Serializer(args.model)
        model_path = serializer._id2path(args.step)
    return args.output / (model_path.stem + '.pkl')
Exemplo n.º 2
0
def preprocess_args(args):
    args.output = get_output_path(args)
    args.is_temporary_model = True
    f = tempfile.NamedTemporaryFile(suffix='.pt', delete=False)
    Serializer(args.model).finalize(args.step,
                                    f.name,
                                    map_location=args.device)
    args.model = Path(f.name)
    f.close()
    return args
Exemplo n.º 3
0
def process_all(args):
    args.__dict__.pop('step', None)
    serializer = Serializer(args.model)
    all_args = [
        SimpleNamespace(step=s, **args.__dict__)
        for s in serializer.list_known_steps()
    ]
    with multiprocessing.Pool(args.tests_per_gpu) as p:
        GPUPool(p, args.gpus, args.tests_per_gpu)(process_single, all_args)
    writer = torch.utils.tensorboard.SummaryWriter(args.output / 'log')
    for step_args in all_args:
        samples_passed = get_samples_passed(step_args)
        with get_output_path(step_args).open('rb') as f:
            results = pickle.load(f)
        for result in results:
            tag = f'{result.dataset}/{result.sequence}/{result.step}/' \
                  f'{result.start}/{result.stop}'
            writer.add_scalar(f'Test/mean AEE/{tag}', result.mAEE,
                              samples_passed)
            writer.add_scalar(f'Test/mean %AEE/{tag}', result.mpAEE * 100,
                              samples_passed)
Exemplo n.º 4
0
from utils.text import gather_documents 
from utils.config import Config
from utils.serializer import Serializer
import os

config = Config("./settings.yml")
data_dir = config.get("data_dir")
output_dir = config.get("output_dir")
stats_dir = config.get("stats_dir")


def clean():
    print("Removing all files in the output folder")
    output_files = os.listdir(output_dir)
    for file_name in output_files:
        os.remove(output_dir + file_name)
    stats_files = os.listdir(stats_dir)
    for file_name in stats_files:
        os.remove(stats_dir + file_name)

if __name__ == "__main__":
    clean()
    documents = gather_documents(data_dir)
    indexer = Indexer(documents)
    indexer.run()
    merger = Merger()
    merger.run()
    serializer = Serializer()
    serializer.pickle_to_txt()
    serializer.wrap_up()
def main():
    # torch.autograd.set_detect_anomaly(True)

    args = parse_args(sys.argv[1:])

    device = torch.device(args.device)
    if device.type == 'cuda':
        torch.cuda.set_device(device)
    if args.timers:
        timers = SynchronizedWallClockTimer()
    else:
        timers = FakeTimer()

    model = init_model(args, device)

    serializer = Serializer(args.model, args.num_checkpoints,
                            args.permanent_interval)

    args.do_not_continue = (args.do_not_continue
                            or len(serializer.list_known_steps()) == 0)
    last_step = (0 if args.do_not_continue else
                 serializer.list_known_steps()[-1])

    optimizer, scheduler = construct_train_tools(args,
                                                 model,
                                                 passed_steps=last_step)

    losses = init_losses(args.shape,
                         args.bs,
                         model,
                         device,
                         sequence_length=args.prefix_length +
                         args.suffix_length + 1,
                         timers=timers)

    # allow only manual flush
    logger = SummaryWriter(str(args.log_path),
                           max_queue=100000000,
                           flush_secs=100000000)

    periodic_hooks, hooks = create_hooks(args, model, optimizer, losses,
                                         logger, serializer)

    if not args.do_not_continue:
        global_step, state = serializer.load_checkpoint(model,
                                                        last_step,
                                                        optimizer=optimizer,
                                                        device=device)
        samples_passed = state.pop('samples_passed', global_step * args.bs)
    else:
        global_step = 0
        samples_passed = 0
        hooks['serialization'](global_step, samples_passed)

    loader = get_dataloader(get_trainset_params(args),
                            sample_idx=samples_passed,
                            process_only_once=False)

    if not args.skip_validation:
        hooks['validation'](global_step, samples_passed)

    with Profiler(args.profiling, args.model/'profiling'), \
            GPUMonitor(args.log_path):
        train(model,
              device,
              loader,
              optimizer,
              args.training_steps,
              scheduler=scheduler,
              evaluator=losses,
              logger=logger,
              weights=args.loss_weights,
              is_raw=args.is_raw,
              accumulation_steps=args.accum_step,
              timers=timers,
              hooks=periodic_hooks,
              init_step=global_step,
              init_samples_passed=samples_passed,
              max_events_per_batch=args.max_events_per_batch)

    samples = samples_passed + (args.training_steps - global_step) * args.bs
    hooks['serialization'](args.training_steps, samples)
    if not args.skip_validation:
        hooks['validation'](args.training_steps, samples)
Exemplo n.º 6
0
 def __init__(self, documents):
     self.documents =  documents
     self.serializer = Serializer()
     self.tokenizer = Tokenizer(self.stoplist_path, self.stemmer)
Exemplo n.º 7
0
def get_samples_passed(args):
    serializer = Serializer(args.model)
    model_path = serializer._id2path(args.step)
    data = torch.load(model_path, map_location='cpu')
    return data.get('samples_passed', data['global_step'] * args.bs)
Exemplo n.º 8
0
def main():
    # torch.autograd.set_detect_anomaly(True)

    args = parse_args()

    device = torch.device(args.device)
    torch.cuda.set_device(device)
    if args.timers:
        timers = SynchronizedWallClockTimer()
    else:
        timers = FakeTimer()

    model = init_model(args, device)

    loader = get_dataloader(get_trainset_params(args))

    serializer = Serializer(args.model, args.num_checkpoints,
                            args.permanent_interval)

    args.do_not_continue = (args.do_not_continue
                            or len(serializer.list_known_steps()) == 0)
    last_step = (0 if args.do_not_continue else
                 serializer.list_known_steps()[-1])

    optimizer, scheduler = construct_train_tools(args,
                                                 model,
                                                 passed_steps=last_step)

    losses = init_losses(get_resolution(args),
                         args.bs,
                         model,
                         device,
                         timers=timers)

    logger = SummaryWriter(str(args.log_path))

    periodic_hooks, hooks = create_hooks(args, model, optimizer, losses,
                                         logger, serializer)

    if not args.do_not_continue:
        global_step, state = serializer.load_checkpoint(model,
                                                        last_step,
                                                        optimizer=optimizer,
                                                        device=device)
        samples_passed = state.pop('samples_passed', global_step * args.bs)
    else:
        global_step = 0
        samples_passed = 0
        hooks['serialization'](global_step, samples_passed)

    hooks['validation'](global_step, samples_passed)

    with Profiler(args.profiling, args.model / 'profiling'):
        train(model,
              device,
              loader,
              optimizer,
              args.training_steps,
              scheduler=scheduler,
              evaluator=losses,
              logger=logger,
              weights=args.loss_weights,
              is_raw=args.is_raw,
              accumulation_steps=args.accum_step,
              timers=timers,
              hooks=periodic_hooks,
              init_step=global_step,
              init_samples_passed=samples_passed)

    samples = samples_passed + (args.training_steps - global_step) * args.bs
    hooks['serialization'](args.training_steps, samples)
    hooks['validation'](args.training_steps, samples)