示例#1
0
def read_from_file(modelName,
                   noDataset=False,
                   debugDataset=False,
                   simpleLoading=None):
    # Keep trying to get a right filename
    while (True):
        try:
            f = open('./saved_models/' + modelName, 'r')
            break
        except IOError:
            modelName = raw_input(
                "This model does not exist! Please provide the name of the model you want to inspect:\n"
            )

    savedVars, settings = load_from_pickle(f)

    print(settings)

    if (simpleLoading is not None):
        settings['simple_data_loading'] = simpleLoading

    if (debugDataset):
        settings['max_dataset_size'] = 1000

    dataset, rnn = constructModels(settings, None, None, dataset=noDataset)

    # Actually load variables
    rnn.loadVars(savedVars)

    f.close()

    return dataset, rnn, settings
def read_from_file(modelName):
    # Keep trying to get a right filename
    while (True):
        try:
            f = open('./saved_models/' + modelName, 'r')
            break
        except IOError:
            modelName = raw_input(
                "This model does not exist! Please provide the name of the model you want to inspect:\n"
            )

    savedVars, settings = load_from_pickle(f)

    print(settings)

    dataset, rnn = constructModels(settings, None, None)

    # Actually load variables
    rnn.loadVars(savedVars)

    return dataset, rnn, settings
        # Print parameters
        printF(str(parameters), experimentId, currentIteration)

        # Warn for unusual parameters
        if (parameters['max_dataset_size'] is not False):
            printF("WARNING! RUNNING WITH LIMIT ON DATASET SIZE!",
                   experimentId, currentIteration)
        if (not using_gpu()):
            printF("WARNING! RUNNING WITHOUT GPU USAGE!", experimentId,
                   currentIteration)

        # Set simple loading processor
        processor = processSampleFindX

        # Construct models
        dataset, model = constructModels(parameters, 0, {})

        # Load pretrained only_cause_expression = 1 model
        if (parameters['load_cause_expression_1'] is not False):
            loadedVars, _ = load_from_pickle_with_filename(
                "./saved_models/" + parameters['load_cause_expression_1'])
            if (model.loadPartialDataDimVars(dict(loadedVars), 0,
                                             model.data_dim)):
                printF("Loaded pretrained model (expression 1) successfully!",
                       experimentId, currentIteration)
            else:
                raise ValueError(
                    "Loading pretrained model failed: wrong variables supplied!"
                )

        # Train on all datasets in succession
示例#4
0
    # Ask for seed if running random baseline
    seed = 0
    if (parameters['random_baseline']):
        seed = int(
            raw_input(
                "Please provide an integer seed for the random number generation: "
            ))

    # Warn for unusual parameters
    if (parameters['max_training_size'] is not False):
        print("WARNING! RUNNING WITH LIMIT ON TRAINING SIZE!")
    if (not using_gpu()):
        print("WARNING! RUNNING WITHOUT GPU USAGE!")

    # Construct models
    datasets, rnn = constructModels(parameters, seed, verboseOutputter)

    ### From here the experiment should be the same every time

    # Start experiment clock
    start = time.clock()

    # Train on all datasets in succession
    train(rnn,
          datasets,
          parameters,
          name,
          start,
          saveModels=saveModels,
          targets=not parameters['single_digit'],
          verboseOutputter=verboseOutputter)
from tools.file import load_from_pickle_with_filename
from tools.model import constructModels
import theano

if __name__ == '__main__':
    theano.config.floatX = 'float32'

    name = sys.argv[1]

    filepath = "./saved_models/%s.model" % name
    if (os.path.isfile(filepath)):
        modelName = name
        result = load_from_pickle_with_filename(filepath)
        if (result is not False):
            savedVars, settings = result
            dataset, rnn = constructModels(settings, 0, None)
            modelSet = rnn.loadVars(dict(savedVars))
            if (modelSet):
                modelInfo = settings
                floats = {}
                for key in sorted(rnn.vars.keys()):
                    floats[key] = rnn.vars[key].get_value().astype('float32')

                f_model = open(filepath)
                _ = f_model.readline()
                settingsLine = f_model.readline()
                f_model.close()

                f = open('./saved_models/%s.floats' % name, 'wb')
                f.writelines(['###\n', settingsLine])
                pickle.dump(floats.items(), f)
示例#6
0
    def testExists(self):
        params = [
            '--finish_subsystems', 'True', '--only_cause_expression', '1',
            '--dataset', '../data/subsystems_shallow_simple_topcause',
            "--sample_testing_size", "10000", "--n_max_digits", "17",
            "--intervention_base_offset", "0", "--intervention_range", "17",
            "--nesterov_optimizer", "True", "--decoder", "True",
            "--learning_rate", "0.005", "--hidden_dim", "256"
        ]
        params = processCommandLineArguments(params)
        datasets, _ = constructModels(params, 1234, {})
        dataset = datasets[0]

        storage = dataset.expressionsByPrefix

        expressions = [
            "(3-9)*(0-3)=18", "(4-7)+(6*5)=27", "(0/6)+(2*8)=16",
            "(1-4)+(3+6)=6", "(6+0)+(0-1)=5"
        ]
        for i, expr in enumerate(expressions):
            self.assertEqual(
                storage.exists(expr), True,
                "(exists) Failing exists lookup for sample %d" % i)
            _, _, _, _, branch = storage.get(expr[:4], alsoGetStructure=True)
            closest, _, _, _ = branch.get_closest(expr[4:])
            self.assertNotEqual(
                closest, False,
                "(exists) Branch-based lookup failed with False for sample %d: %s"
                % (i, closest))
            self.assertEqual(
                closest, expr,
                "(exists) Failing branch-based lookup for sample %d: %s" %
                (i, closest))

            # Apply mutations and test if both methods get the same new label
            for n in range(20):
                intervention_location = np.random.randint(0, len(expr))
                new_symbol = np.random.randint(dataset.data_dim)
                new_expression = expr[:intervention_location] + dataset.findSymbol[
                    new_symbol] + expr[intervention_location + 1:]
                print("Old: %s\tNew: %s" % (expr, new_expression))

                _, _, valids, _, branch = storage.get(
                    new_expression[:intervention_location + 1],
                    alsoGetStructure=True)
                if (new_expression not in valids and len(valids) > 0):
                    # Old method: compare all
                    profiler.start('old')
                    nearest = -1
                    nearest_score = 100000
                    for j, nexpr in enumerate(valids):
                        score = string_difference(new_expression, nexpr)
                        if (score < nearest_score):
                            nearest = j
                            nearest_score = score
                    closest_old = valids[nearest]
                    profiler.stop('old')

                    profiler.start('new')
                    # New method:
                    closest_new, _, _, _ = branch.get_closest(
                        new_expression[intervention_location + 1:])
                    profiler.stop('new')

                    if (closest_old != closest_new):
                        print(
                            "(exists) Intervened closest do not match for sample %d: loc %d / orig %s / int %s / old %s / new %s"
                            % (i, intervention_location, expr, new_expression,
                               closest_old, closest_new))


#                     self.assertEqual(closest_old, closest_new,
#                                      "(exists) Intervened closest do not match for sample %d: loc %d / orig %s / int %s / old %s / new %s" %
#                                         (i, intervention_location, expr, new_expression, closest_old, closest_new));

        profiler.profile()
    # Process parameters
    parameters = processCommandLineArguments(sys.argv[1:])

    # Specific settings - default name is time of experiment
    name = parameters['output_name'] + time.strftime("_%d-%m-%Y_%H-%M-%S")
    saveModels = True

    # Warn for unusual parameters
    if (parameters['max_training_size'] is not False):
        print("WARNING! RUNNING WITH LIMIT ON TRAINING SIZE!")
    if (not using_gpu()):
        print("WARNING! RUNNING WITHOUT GPU USAGE!")

    # Construct models
    _, model = constructModels(parameters, 0, {}, noDataset=True)

    # Load data
    dataset_data = load_data(parameters)

    # Train on all datasets in succession
    # Print settings headers to raw results file
    print("# " + str(parameters))

    # Compute batching variables
    repetition_size = len(dataset_data)
    if (parameters['max_training_size'] is not False):
        repetition_size = min(parameters['max_training_size'], repetition_size)
    next_testing_threshold = parameters['test_interval'] * repetition_size

    for r in range(parameters['repetitions']):
 if (not using_gpu()):
     print("WARNING! RUNNING WITHOUT GPU USAGE!");
 
 # Check for valid subbatch size
 if (parameters['minibatch_size'] % parameters['subbatch_size'] != 0):
     raise ValueError("Subbatch size is not compatible with minibatch size: m.size = %d, s.size = %d" % 
                         (parameters['minibatch_size'], parameters['subbatch_size']));
 
 # Check for valid intervention ranges
 if (parameters['intervention_base_offset'] <= 0):
     raise ValueError("Invalid intervention base offset: is %d, must be at least 1." % parameters['intervention_base_offset']);
 
 
 
 # Construct models
 dataset, _ = constructModels(parameters, 0, {}, noModel=True);
 actual_data_dim = dataset.data_dim;
 if (parameters['only_cause_expression'] is False):
     actual_data_dim *= 2;
 model = Autoencoder(actual_data_dim, parameters['hidden_dim'], parameters['minibatch_size'], parameters['n_max_digits'], 
                     parameters['learning_rate'], dataset.GO_symbol_index, dataset.EOS_symbol_index, parameters['only_cause_expression']);
 
 # Train on all datasets in succession
 # Print settings headers to raw results file
 print("# " + str(parameters));
 
 # Compute batching variables
 repetition_size = dataset.lengths[dataset.TRAIN];
 if (parameters['max_training_size'] is not False):
     repetition_size = min(parameters['max_training_size'],repetition_size);
 next_testing_threshold = parameters['test_interval'] * repetition_size;