def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = {'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': []} # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + list(ladder.costs.denois.values()), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.items(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.items()} return inputs['targets_labeled'], res[0]
def analyze(cli_params): """ called when evaluating :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean ] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, balanced_classes=p.balanced_classes, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # df = DataFrame.from_dict(main_loop.log, orient='index') # col = 'valid_final_error_rate_clean' # logger.info('%s %g' % (col, df[col].iloc[-1])) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.concatenate(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean ] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s"%p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]