def validate(data_type, model, seq_length=40, saved_model=None,
             class_limit=None, image_shape=None):
    batch_size = 32

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Evaluate!
    results = rm.model.evaluate_generator(
        generator=val_generator,
        val_samples=3200)

    print(results)
    print(rm.model.metrics_names)
示例#2
0
    def __init__(self, name=None, description=None, **kwargs):
       
        # Algorithmic description
        self.name = name
        self.description = description
        self.parameters = DataSet(name='Parameter set')  # List of parameters 
                                                         # (of type Parameter)
        self.measures = DataSet(name='Measure set')  # List of measures 
                                                     # (the observation of the 
                                                     # algorithm)
        self.constraints = []

        # Computational description
        self.parameter_file = self.name + '.param'
        self.sessions = {} # dictionary map between session id and parameter
示例#3
0
 def log_likelihood(self, dataset):
     ll = 0.0
     for instance, count in dataset:
         evidence = DataSet.evidence(instance)
         pr = self.probability(evidence)
         ll += count * math.log(pr)
     return ll
示例#4
0
def get_and_log_mst_weight_from_checker(input_graph, force_recompute=False, inputslogfn=None):
    """Returns the a 2-tuple of (input, weight).  If force_recompute is not
    True, then it will check the input log cache to see if we already know the
    answer first.  Logs the result."""
    ti = __get_ti(input_graph)

    # load in the inputs in the category of input_graph
    if inputslogfn is None:
        logfn = InputSolution.get_path_to(ti.prec, ti.dims, ti.min, ti.max)
    else:
        logfn = inputslogfn
    ds = DataSet.read_from_file(InputSolution, logfn)
    if ds.dataset.has_key(ti):
        input_soln = ds.dataset[ti]
        do_log = True

        # see if we already know the answer
        if not force_recompute:
            if input_soln.has_mst_weight():
                return (ti, input_soln.mst_weight)  # cache hit!
    else:
        # if we weren't tracking the input before, don't start now
        do_log = False

    # compute the answer and (if specified) save it
    w = compute_mst_weight(input_graph)
    if do_log:
        if input_soln.update_mst_weight(w):
            ds.save_to_file(logfn)
    return (ti, w)
示例#5
0
def gather_weight_data(wtype):
    # get the results
    results = {} # maps |V| to ResultAccumulator
    ds = DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype))
    for data in ds.dataset.values():
        result = results.get(data.input().num_verts)
        if result is None:
            result = ResultAccumulator(data.mst_weight)
            results[data.input().num_verts] = result
        else:
            result.add_data(data.mst_weight)

    try:
        # open a file to output to
        fh = open(DATA_PATH + wtype + '.dat', 'w')

        # compute relevant stats and output them
        print >> fh, '#|V|\tLower\tAverage\tUpper  (Lower/Upper from 99% CI)'
        keys = results.keys()
        keys.sort()
        for num_verts in keys:
            r = results[num_verts]
            r.compute_stats()
            if len(r.values) > 1:
                print >> fh, '%u\t%.3f\t%.3f\t%.3f\t%u' % (num_verts, r.lower99, r.mean, r.upper99, len(r.values))
        fh.close()
        return 0
    except IOError, e:
        print sys.stderr, "failed to write file: " + str(e)
        return -1
示例#6
0
 def soft_em_e_step(self, instance, count=1):
     count, evidence = float(count), DataSet.evidence(instance)
     value = self.value(evidence=evidence, clear_data=False)
     pre = value / self.theta_sum
     self.marginals(evidence=evidence, clear_data=False, do_bottom_up=False)
     self._soft_em_accumulate(evidence, pre, count=count)
     return pre
def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit):
    model = load_model(saved_model)

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length, image_shape=image_shape,
            class_limit=class_limit)
    
    # Extract the sample from the data.
    sample = data.get_frames_by_filename(video_name, data_type)

    # Predict!
    prediction = model.predict(np.expand_dims(sample, axis=0))
    print(prediction)
    data.print_class_from_prediction(np.squeeze(prediction, axis=0))
示例#8
0
    def normalize(self, ds_source):
        """
        Apply the normalizing operation to a given `DataSet`.

        :Parameters:
            ds_source : `DataSet` 
                Data set to normalize.

        :Returns:
            `DataSet` : Normalized data set.

        :Raises NpyDataTypeError:
            If the given `DataSet` has not been numerized.
        """

        if ds_source.is_numerized == False:
            raise NpyDataTypeError, 'ds_source must be numerized first.'

        ds_dest = DataSet()
        ds_dest.set_name_attribute(ds_source.get_name_attribute())

        data_instances = ds_source.get_data_instances()
        for data_instance_old in data_instances:

            attributes_new = []

            # Normalize each attribute
            for index, value in enumerate(data_instance_old.get_attributes()):
                value_new = (value - self.min[index]) * self.max[index] * (self.upper_bound - self.lower_bound) + self.lower_bound
                attributes_new.append(value_new)

            ds_dest.add_data_instance(data_instance_old.get_index_number(), attributes_new, data_instance_old.get_label_number())

        ds_dest.is_numerized = True
        return ds_dest
示例#9
0
文件: gbt.py 项目: FurongPeng/GBDT
def main(data_filename,stat_filename,max_iter,sample_rate,learn_rate,max_depth,split_points):
    dataset=DataSet(data_filename);
    print "Model parameters configuration:[data_file=%s,stat_file=%s,max_iter=%d,sample_rate=%f,learn_rate=%f,max_depth=%d,split_points=%d]"%(data_filename,stat_filename,max_iter,sample_rate,learn_rate,max_depth,split_points);
    dataset.describe();
    stat_file=open(stat_filename,"w");
    stat_file.write("iteration\taverage loss in train data\tprediction accuracy on test data\taverage loss in test data\n");
    model=Model(max_iter,sample_rate,learn_rate,max_depth,split_points); 
    train_data=sample(dataset.get_instances_idset(),int(dataset.size()*2.0/3.0));
    test_data=set(dataset.get_instances_idset())-set(train_data);
    model.train(dataset,train_data,stat_file,test_data);
    #model.test(dataset,test_data);
    stat_file.close();
示例#10
0
    def numerize(self, ds_source):
        """
        Apply the numerizing operation to a given `DataSet`.

        :Parameters:
            ds_source : `DataSet`
                Data set to numerize.

        :Returns:
            `DataSet` : Numerized data set.

        :Raises NpyDataTypeError:
            If ds_source has already been numerized.
        """
        if ds_source.is_numerized == True:
            raise NpyDataTypeError, 'ds_source has already been numerized.'

        ds_dest = DataSet()
        ds_dest.set_name_attribute(ds_source.get_name_attribute())

        data_instances = ds_source.get_data_instances()
        for data_instance_old in data_instances:

            attributes = []

            # Process the attribute values
            for index, value in enumerate(data_instance_old.get_attributes()):
                try:
                    number = float(value)
                except ValueError:
                    # Every time a non-float attribute value is met,
                    # it is added to the numerizer
                    number = self.attribute_string_to_number(value, index) 
                attributes.append(number)

            # Process the label value
            label_old = data_instance_old.get_label_number()
            try:
                label_new = float(label_old)
            except ValueError:
                # Every time a non-float label value is met,
                # it is added to the numerizer
                label_new = self.label_string_to_number(label_old)

            ds_dest.add_data_instance(data_instance_old.get_index_number(), attributes, label_new)

        ds_dest.is_numerized = True
        return ds_dest 
示例#11
0
        str_ans_corr = fmt % ans_corr
        str_ans_out = fmt % ans_out
        if str_ans_corr == str_ans_out:
            outcome = CORRECT
        else:
            print >> sys.stderr, "correctness FAILED: %s (correct is %s, output had %s)" % (ppinput(input_graph), str_ans_corr, str_ans_out)
            outcome = INCORRECT

    # log the result of the correctness check
    if rev is not None and run is not None:
        if ti is None:
            ti = __get_ti(input_graph)

        data = CorrResult(ti.dims, ti.min, ti.max, ti.num_verts, ti.num_edges, ti.seed, rev, run, outcome)
        try:
            DataSet.add_data_to_log_file(data)
            print 'logged correctness result to ' + data.get_path()
        except DataError, e:
            fmt = "Unable to log result to file %s (correct is %s, output had %s): %s"
            print >> sys.stderr, fmt % (ppinput(input_graph), str_ans_corr, str_ans_out, e)

    return outcome

def main(argv=sys.argv[1:]):
    usage = """usage: %prog [options] INPUT_GRAPH OUTPUT_TO_CHECK
Checks the validity of an MST.  Exits with code 0 on success.  Otherwise, it
prints an error message and exits with a non-zero code.  Does not log the result."""
    parser = OptionParser(usage)
    parser.add_option("-f", "--force-recompute",
                      action="store_true", default=False,
                      help="recomputes the MST weight with the checker even if we have a cached value")
示例#12
0
def gather_perf_data(alg, rev, index, latest):
    """Gathers performance data for a single revision of an algorithm"""
    print 'gathering perf data for %s (rev=%s index=%u latest=%s)' % (alg, rev, index, str(latest))

    # get the results
    results = {} # maps (|V|, |E|) to ResultAccumulator
    ds = DataSet.read_from_file(PerfResult, PerfResult.get_path_to(rev))
    for data in ds.dataset.values():
        key = (data.input().num_verts, data.input().num_edges)
        result = results.get(key)
        if result is None:
            result = ResultAccumulator(data.time_sec)
            result.defaultCI = DEFAULT_CI
            results[key] = result
        else:
            result.add_data(data.time_sec)

    # put the results in order
    keys_density = results.keys()
    keys_density.sort(density_compare)
    keys_pom = results.keys()
    keys_pom.sort(pom_compare)
    keys = {}
    keys['density'] = keys_density
    keys['pom'] = keys_pom

    # compute stats for all the results
    for num_verts in results.keys():
        results[num_verts].compute_stats()

    # generate dat files for each x-axis cross important vertex counts
    for xaxis in keys:
        if xaxis == 'pom':
            computex = lambda v, e : get_percent_of_max(v, e)
        elif xaxis == 'density':
            computex = lambda v, e : get_density(v, e)
        else:
            print >> sys.stderr, "unexpected x-axis value: " + str(xaxis)
            sys.exit(-1)
        header_txt = '#|V|\t|E|\t' + xaxis + '\tLower\tAverage\tUpper\t#Runs  (Lower/Upper from ' + str(DEFAULT_CI) + '% CI)'

        for vip in IMPORTANT_VERTS:
            # open a file to output to
            dat = get_output_dat_name(xaxis, alg, rev, index, vip)
            print 'creating ' + dat
            if latest:
                latest_fn = make_latest(xaxis, alg, rev, index, vip)
            try:
                fh = open(dat, 'w')

                # compute relevant stats and output them
                print >> fh, header_txt
                count = 0
                for (v, e) in keys[xaxis]:
                    if vip=='all' or vip==v:
                        count += 1
                        r = results[(v, e)]
                        x = computex(v, e)
                        print >> fh, '%u\t%u\t%.6f\t%.3f\t%.3f\t%.3f\t%u' % (v, e, x, r.lower99, r.mean, r.upper99, len(r.values))
                fh.close()

                # don't create empty files
                if count == 0:
                    quiet_remove(dat)
                    if latest:
                        quiet_remove(latest_fn)

            except IOError, e:
                print sys.stderr, "failed to write file: " + str(e)
                return -1
示例#13
0
def main():
    usage = """usage: %prog [options]
Searches for missing results and uses run_test.py to collect it."""
    parser = OptionParser(usage)
    parser.add_option("-i", "--input_graph",
                      metavar="FILE",
                      help="restrict the missing data check to the specified input graph")
    parser.add_option("-l", "--inputs-list-file",
                      metavar="FILE",
                      help="collect data for all inputs in the specified log file")
    parser.add_option("--list-only",
                      action="store_true", default=False,
                      help="only list missing data (do not collect it)")
    parser.add_option("-n", "--num-runs",
                      type="int", default="1",
                      help="number of desired runs per revision-input combination [default: 1]")
    parser.add_option("-r", "--rev",
                      help="restrict the missing data check to the specified revision, or 'all' [default: current]")

    group = OptionGroup(parser, "Data Collection Options")
    group.add_option("-p", "--performance",
                      action="store_true", default=True,
                      help="collect performance data (this is the default)")
    group.add_option("-c", "--correctness",
                      action="store_true", default=False,
                      help="collect correctness data")
    parser.add_option_group(group)

    group2 = OptionGroup(parser, "Weight (Part II) Data Collection Options")
    group2.add_option("-v", "--num_vertices",
                      metavar="V", type="int", default=0,
                      help="collect weight data for V vertices (requires -d or -e)")
    group2.add_option("-d", "--dims",
                      metavar="D", type="int", default=0,
                      help="collect weight data for randomly positioned vertices in D-dimensional space (requires -v)")
    group2.add_option("-e", "--edge",
                      action="store_true", default=False,
                      help="collect weight data for random uniform edge weights in the range (0, 1] (requires -v)")
    parser.add_option_group(group2)

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error("too many arguments")

    if options.num_runs < 1:
        parser.error("-n must be at least 1")
    input_solns = None

    # prepare for a weight data collection
    num_on = 0
    weight_test = False
    if options.num_vertices > 0:
        weight_test = True
        if options.input_graph or options.inputs_list_file:
            parser.error('-i, -l, and -v are mutually exclusive')

        if options.dims > 0:
            num_on += 1
            wtype = 'loc%u' % options.dims

        if options.edge:
            num_on += 1
            wtype = 'edge'

        if num_on == 0:
            parser.error('-v requires either -d or -e be specified too')

        if options.num_runs > 1:
            options.num_runs = 1
            print 'warning: -v truncates the number of runs to 1 (weight should not change b/w runs)'

        input_path = InputSolution.get_path_to(15, options.dims, 0.0, 1.0)
        print 'reading inputs to run on from ' + input_path
        input_solns = DataSet.read_from_file(InputSolution, input_path)
        revs = [None] # not revision-specific (assuming our alg is correct)
        get_results_for_rev = lambda _ : DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype))
        collect_missing_data = collect_missing_weight_data
    elif options.dims > 0 or options.edge:
        parser.error('-v is required whenever -d or -e is used')

    # handle -i, -l: collect data for a particular graph(s)
    if options.input_graph and options.inputs_list_file:
        parser.error('-i and -l are mutually exclusive')
    if options.input_graph is not None:
        try:
            i = extract_input_footer(options.input_graph)
        except ExtractInputFooterError, e:
            parser.error(e)
        input_solns = DataSet({0:InputSolution(i.prec,i.dims,i.min,i.max,i.num_verts,i.num_edges,i.seed)})
示例#14
0
        get_results_for_rev = lambda _ : DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype))
        collect_missing_data = collect_missing_weight_data
    elif options.dims > 0 or options.edge:
        parser.error('-v is required whenever -d or -e is used')

    # handle -i, -l: collect data for a particular graph(s)
    if options.input_graph and options.inputs_list_file:
        parser.error('-i and -l are mutually exclusive')
    if options.input_graph is not None:
        try:
            i = extract_input_footer(options.input_graph)
        except ExtractInputFooterError, e:
            parser.error(e)
        input_solns = DataSet({0:InputSolution(i.prec,i.dims,i.min,i.max,i.num_verts,i.num_edges,i.seed)})
    elif options.inputs_list_file is not None:
        input_solns = DataSet.read_from_file(InputSolution, options.inputs_list_file)

    # prepare for a correctness data collection
    if options.correctness:
        num_on += 1
        get_results_for_rev = lambda rev : DataSet.read_from_file(CorrResult, CorrResult.get_path_to(rev))
        options.inputs_list_file_arg = '' if options.inputs_list_file is None else ' -l ' + options.inputs_list_file
        collect_missing_data = lambda w,x,y,z: collect_missing_correctness_data(w,x,y,z,options.inputs_list_file_arg)

    # make sure no more than 1 type of data collection was specified
    if num_on > 1:
        parser.error('at most one of -c, -d, and -e may be specified')
    elif num_on == 0:
        # prepare for a performance data collection (default if nothing else is specified)
        get_results_for_rev = lambda rev : DataSet.read_from_file(PerfResult, PerfResult.get_path_to(rev))
        collect_missing_data = collect_missing_performance_data
extract all 101 classes. For instance, set class_limit = 8 to just
extract features for the first 8 (alphabetical) classes in the dataset.
Then set the same number when training models.
"""
import numpy as np
import os.path
from data import DataSet
from extractor import Extractor
from tqdm import tqdm

# Set defaults.
seq_length = 40
class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.

# Get the dataset.
data = DataSet(seq_length=seq_length, class_limit=class_limit)

# get the model.
model = Extractor()

# Loop through data.
pbar = tqdm(total=len(data.data))
for video in data.data:

    # Get the path to the sequence for this video.
    path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy

    # Check if we already have it.
    if os.path.isfile(path + '.npy'):
        pbar.update(1)
示例#16
0
class Algorithm:
    """
    
    An abstract class to define the specifics of a wrapper of an algorithm. 
    An object of this class represents to an executable 
    wrapper of target algorithm. It provokes the target
    algorithm to solve a problem and collects the elementary
    measures

    An object of this class works as an interface of the target algorithm 
    with OPAL. It contains at least three informations:
    
    1. What are the parammeters
    2. How to invoke the algorithm to solve a problem
    3. What are the measures we get after running algorithm

    
    :parameters:
        :name:  Name of the algorithm (string)
        :purpose: Synopsis of purpose (string)

    Each algorithm has two aspect:

     1. Algorithmic aspect: the name, purpose, parameters, measures and the
        constraints on the parameters. The measures represent the output of the
        alorithm.

     2. Computational aspect: the description of how to run algorithm and what
        the output is.

    Example:

      >>> dfo = Algorithm(name='DFO', purpose='Derivative-free optimization')
      >>> delmin = Parameter(default=1.0e-3, name='DELMIN')
      >>> dfo.add_param(delmin)
      >>> maxit = Parameter(type='integer', default=100, name='MAXIT')
      >>> dfo.add_param(maxit)
      >>> cpuTime = Measure(type='real', name='TIME')
      >>> dfo.add_measure(cpuTime)
      >>> print [param.name for param in dfo.parameters]
      ['DELMIN', 'MAXIT']
      >>> real_params = [param for param in dfo.parameters if param.is_real]
      >>> print [param.name for param in real_params]
      ['DELMIN']
    """

    def __init__(self, name=None, description=None, **kwargs):
       
        # Algorithmic description
        self.name = name
        self.description = description
        self.parameters = DataSet(name='Parameter set')  # List of parameters 
                                                         # (of type Parameter)
        self.measures = DataSet(name='Measure set')  # List of measures 
                                                     # (the observation of the 
                                                     # algorithm)
        self.constraints = []

        # Computational description
        self.parameter_file = self.name + '.param'
        self.sessions = {} # dictionary map between session id and parameter
                           # values

    def add_param(self, param):
        "Add a parameter to an algorithm"
        if isinstance(param, Parameter):
            self.parameters.append(param)
        else:
            raise TypeError, 'param must be a Parameter'
        return
    
    def add_measure(self, measure):
        "Add a measure to an algorithm"
        if isinstance(measure, Measure):
            self.measures.append(measure)
        else:
            raise TypeError, 'measure must be a Measure object'
        return

   
    def update_parameters(self, parameters):
        """
        
        This method return an unique identity for the 
        test basing on the parameter values

        The identity obtains by hashing the parameter values string. 
        This is an inversable function. It means that we can get 
        the parameter_values form the id


        This virtual method determines how values for the parameters of the
        algorithm are written to intermediated file that are read after by 
        algorithm driver. 
        
        The format of intermediated file depend on this method. By default, 
        the parameter set are written by pickle.
       
        """
        values = dict((param.name,param.value) for param in parameters)
        # Fill the values to parameter set
        self.parameters.set_values(values)
        # Write the values to a temporary parameter file 
        # for communicating with an executable wrapper 
        return 
    

    def create_tag(self, problem):
        return 

    def set_executable_command(self, command):
        self.executable = command
        return

    def write_parameter(self, fileName):
        f = open(fileName, 'w')
        for param in self.parameters:
            f.write(param.name + ':' +  param.kind + ':' + \
                    str(param.value) + '\n')
        f.close()
        return

    def read_measure(self, fileName):
        """

        Ths virtual method determines how to  measure value from the
        output of the algorithm.

        :parameters:
            :problem:
            :measures: List of measures we want to extract

        :returns: A mapping measure name --> measure value

        By default, the algorithm returns the measure values to the standard
        output. In the `run()` method, the output is redirected to file.
        """
        
        f = open(fileName)
        lines = f.readlines()
        f.close()
        converters = {'categorical':str, 'integer':int, 'real':float}
        measure_values = {}
        for line in lines:
            line.strip('\n')
            if len(line) < 1:
                continue
            fields = line.split(' ')
            if len(fields) < 2:
                continue
            measureName = fields[0].strip(' ')
            if measureName not in self.measures:
                continue
            measure_values[measureName] = fields[1].strip(' ')
        for i in range(len(self.measures)):
            convert = converters[self.measures[i].get_type()]
            try:
                measure_values[self.measures[i].name] = \
                    convert(measure_values[self.measures[i].name])
            except ValueError:
                return None
        return measure_values

    def solve(self, problem, parameters=None, parameterTag=None ):
        """
        .. warning::

            Why do we need `paramValues` here???
            What kind of object is `problem`???

        This virtual method determines how to run the algorithm.

        :parameters:
            :paramValues: List of parameter values
            :problem: Problem (???)

        :returns: The command for executing the algorithm.

        By default, the algorithm is called by the command 

            `./algorithm paramfile problem`
        """
        
        if parameters is not None:
            self.update_parameters(parameters)

        if parameterTag is not None:
            sessionTag = problem.name + '_' + parameterTag
        else:
            sessionTag = self.create_tag(problem)

        algoName = self.name.replace(' ','_')
        parameterFile = algoName + '_' +\
                        str(sessionTag) +\
                        '.param'
                                                        
        outputFile = algoName + '_' +\
                     str(sessionTag) +\
                     '.measure'

        if not os.path.exists(parameterFile):
            self.write_parameter(parameterFile)
        cmd = self.executable + ' ' +\
              parameterFile + ' ' +\
              problem.name + ' ' +\
              outputFile        
       
            
        return cmd, parameterFile, outputFile, sessionTag

    
    def add_parameter_constraint(self, paramConstraint):
        """
        Specify the domain of a parameter.
        """
        if isinstance(paramConstraint, ParameterConstraint):
            self.constraints.append(paramConstraint)
        elif isinstance(paramConstraint, str):
            self.constraints.append(ParameterConstraint(paramConstraint))
        else:
            msg = 'paramConstraint must be a String or ParameterConstraint'
            raise TypeError, msg
        return

    def are_parameters_valid(self):
        """
        Return True if all parameters are in their domain and satisfy the
        constraints. Return False otherwise.
        """
        #print '[algorithm.py]',[param.value for param in parameters]
        for constraint in self.constraints:
            if constraint(self.parameters) is ParameterConstraint.violated:
                return ParameterConstraint.violated
        for param in self.parameters:
            if not param.is_valid():
                return False
        return True
def train(data_type, seq_length, model, saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
示例#18
0
#!/usr/bin/env python

from data import DataSet, InputSolution
from check_output import get_and_log_mst_weight_from_checker
from generate_input import main as generate_input
import sys, time

if len(sys.argv) != 2:
    print 'usage: gather_correctness.py LOG_FN'
    sys.exit(-1)

# get the file to read inputs from
logfn = sys.argv[1]
ds = DataSet.read_from_file(InputSolution, logfn)

# compute correctness for each input
inputs = ds.dataset.keys() # Input objects
inputs.sort()
on = 0
for i in inputs:
    on += 1
    # figure out how to generate the graph and where it will be sotred
    argstr = '-mt ' + i.make_args_for_generate_input()
    input_graph = generate_input(argstr.split(), get_output_name_only=True)
    print time.ctime(time.time()) + ' input # ' + str(on) + ' => gathering correctness data for ' + argstr

    # generate the graph
    generate_input(argstr.split())

    # compute the weight for the graph
    get_and_log_mst_weight_from_checker(input_graph, force_recompute=False, inputslogfn=logfn)
示例#19
0
    mst_weight = -1
    if options.dont_generate:
        print_if_not_quiet('graph not saved (as requested)')
    else:
        print_input_footer(num_verts, num_edges, about, out)
        print_if_not_quiet('graph saved to ' + ppinput(options.output_file))
        if out != sys.stdout:
            out.close()

        # generate output with correctness checker, if desired
        if options.correctness:
            if options.dont_track:
                print >> sys.stderr, "warning: skipping correctness output (only done when -t is not specified)"
                return 0
            try:
                mst_weight = compute_mst_weight(options.output_file)
            except CheckerError, e:
                print >> sys.stderr, e

    # record this new input in our input log
    if not options.dont_track:
        data = InputSolution(options.precision, dimensionality, min_val, max_val, num_verts, num_edges, __RND_SEED, mst_weight)
        path = data.get_path() if options.inputs_list_file is None else options.inputs_list_file
        DataSet.add_data_to_log_file(data, path)
        print_if_not_quiet('logged to ' + path)

    return 0

if __name__ == "__main__":
    sys.exit(main())