示例#1
0
    def __init__(self,
                 filename,
                 batch_size=20,
                 learning_rate=.001,
                 weight_penalty=0.0,
                 model_type='classification'):
        '''Initialize the class by loading the required datasets and building the 
        tensorlfow computation graph.

        Args:
            filename: a file containing the data.
            batch_size: number of training examples in each training batch. 
            learning_rate: the initial learning rate used in stochastic 
                gradient descent.
            weight_penalty: the coefficient of the L2 weight regularization
                applied to the loss function. Set to > 0.0 to apply weight 
                regularization, 0.0 to remove.
            model_type: the type of regression. Either 'classification' in 
                which case the model is a Logistic Regression classifier, or 
                'regression', in which it is a linear regression model.
        '''
        # Save the hyperparameters
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.weight_penalty = weight_penalty

        # Optimization function used to train the model.
        # It's widely accepted that the Adam optimizer rules ---
        # I mean, is state-of-the-art --- but you could choose
        # others, like tf.train.AdagradOptimizer, or even
        # tf.train.GradientDescentOptimizer.
        self.optimizer = tf.train.AdamOptimizer

        # Logistics
        self.model_type = model_type
        self.output_every_nth = 10  # save performance every n steps

        # Extract the data from the filename
        self.data_loader = data_funcs.DataLoader(filename)
        self.input_size = self.data_loader.get_feature_size()
        if model_type == 'classification':
            print "\nPerforming classification."
            self.output_size = 1  # limited to binary classification
            self.metric_name = 'accuracy'
        else:
            print "\nPerforming regression."
            self.output_size = self.data_loader.num_outputs
            self.metric_name = 'RMSE'

        # Set up tensorflow computation graph.
        self.graph = tf.Graph()
        self.build_graph()

        # Set up and initialize tensorflow session.
        self.session = tf.Session(graph=self.graph)
        self.session.run(self.init)

        # Use for plotting evaluation.
        self.train_metrics = []
        self.val_metrics = []
示例#2
0
    def __init__(self, filename, C=1.0, kernel='linear', gamma=.01, poly_degree=3, 
                 max_iter=-1, tolerance=0.001):
        """Initialize the class by loading the required data and setting the parameters

        Args:
            filename: a file containing the data.
            C: a float for the soft-margin SVM misclassification penalty.
            kernel: the type of kernel to use. Can be 'linear', 'rbf', or 'poly'. 
            gamma: a float kernel parameter. 
            poly_degree: the degree of the polynomial used in the 'poly' kernel. 
            max_iter: the maximum number of iterations to run when training.
            tolerance: a float epsilon value. If the loss function changes by only
                tolerance or less, the funtion will stop training.
        """
        # Load the data.
        self.data_loader = data_funcs.DataLoader(filename)

        # Set the parameters.
        self.C = C
        self.gamma = gamma
        self.kernel = kernel
        self.poly_degree = poly_degree
        self.max_iter = max_iter
        self.tolerance = tolerance

        self.classifier = None
 def load_data(self):
     """Initialize's the classes data_loader object, which takes care of loading 
     data from a file."""
     self.data_loader = data_funcs.DataLoader(
         self.datasets_path + self.filename,
         normalize_and_fill=self.normalize_and_fill,
         cross_validation=self.cross_validation,
         normalization=self.normalization)
示例#4
0
 def load_data(self):
     """Use the DataLoader class to load unsupervised and supervised data from 
     files for the MMAE and classification portions of the network, respectively.
     """
     self.data_loader = data_funcs.DataLoader(
         self.datasets_path + self.mmae_filename, 
         normalize_and_fill=False,
         supervised=False,
         cross_validation=True,
         separate_noisy_data=self.check_noisy_data)
     self.classification_data_loader = data_funcs.DataLoader(
         self.datasets_path + self.classification_filename, 
         normalize_and_fill=False,
         cross_validation=True,
         supervised=True,
         separate_noisy_data=self.check_noisy_data,
         wanted_label=self.wanted_label)
    def load_data(self):
        """Loads data from csv files using the DataLoader class."""
        self.data_loader = data_funcs.DataLoader(
            self.datasets_path + self.filename,
            normalize_and_fill=False,
            supervised=False,
            #cross_validation=True,
            normalization=self.normalization,
            fill_missing_with=self.fill_missing)

        # Loads additional classification data
        self.classification_data_loader = data_funcs.DataLoader(
            self.datasets_path + self.classification_filename,
            normalize_and_fill=False,
            supervised=True,
            #cross_validation=True,
            normalization=self.normalization,
            fill_missing_with=self.fill_missing,
            separate_noisy_data=True)
示例#6
0
 def load_data(self):
     """Loads data from csv files using the DataLoader class. Must change labels to be {-1,1}."""
     self.data_loader = data_funcs.DataLoader(
         self.datasets_path + self.filename,
         normalize_and_fill=self.normalize_and_fill,
         cross_validation=True,
         supervised=True,
         wanted_label=self.wanted_label,
         normalization=self.normalization,
         labels_to_sign=True,
         separate_noisy_data=self.check_noisy_data)
 def load_data(self):
     """Initializes the data loader object of the class. Specific to classification 
     because the data loader must load supervised data, based on the wanted class label,
     and possibly separate noisy data."""
     self.data_loader = data_funcs.DataLoader(
         self.datasets_path + self.filename,
         normalize_and_fill=self.normalize_and_fill,
         cross_validation=self.cross_validation,
         supervised=True,
         wanted_label=self.wanted_label,
         normalization=self.normalization,
         separate_noisy_data=self.check_noisy_data)
示例#8
0
    def __init__(self,
                 filename=None,
                 layer_sizes=[128, 64],
                 batch_size=20,
                 learning_rate=.001,
                 dropout_prob=1.0,
                 weight_penalty=0.0,
                 model_name='NN',
                 clip_gradients=True,
                 data_loader=None,
                 checkpoint_dir=DEFAULT_MAIN_DIRECTORY + 'temp_saved_models/',
                 verbose=True):
        '''Initialize the class by loading the required datasets 
        and building the graph.

        Args:
            filename: a file containing the data.
            layer_sizes: a list of sizes of the neural network layers.
            batch_size: number of training examples in each training batch. 
            learning_rate: the initial learning rate used in stochastic 
                gradient descent.
            dropout_prob: the probability that a node in the network will not
                be dropped out during training. Set to < 1.0 to apply dropout, 
                1.0 to remove dropout.
            weight_penalty: the coefficient of the L2 weight regularization
                applied to the loss function. Set to > 0.0 to apply weight 
                regularization, 0.0 to remove.
            model_name: name of the model being trained. Used in saving
                model checkpoints.
            clip_gradients: a bool indicating whether or not to clip gradients. 
                This is effective in preventing very large gradients from skewing 
                training, and preventing your loss from going to inf or nan. 
            data_loader: A DataLoader class object which already has pre-loaded
                data.
            checkpoint_dir: the directly where the model will save checkpoints,
                saved files containing trained network weights.
            verbose: if True, will print many informative output statements.
            '''
        # Hyperparameters that should be tuned
        self.layer_sizes = layer_sizes
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.dropout_prob = dropout_prob
        self.weight_penalty = weight_penalty

        # Hyperparameters that could be tuned
        # (but are probably the best to use)
        self.clip_gradients = clip_gradients
        self.activation_func = 'relu'
        self.optimizer = tf.train.AdamOptimizer

        # Logistics
        self.checkpoint_dir = checkpoint_dir
        self.filename = filename
        self.model_name = model_name
        self.output_every_nth = 100
        self.verbose = verbose

        # Extract the data from the filename
        if data_loader is None:
            self.data_loader = data_funcs.DataLoader(filename)
        else:
            self.data_loader = data_loader
        self.input_size = self.data_loader.get_feature_size()
        self.output_size = self.data_loader.num_labels
        if self.verbose:
            print "Input dimensions (number of features):", self.input_size
            print "Number of classes/outputs:", self.output_size

        # Set up tensorflow computation graph.
        self.graph = tf.Graph()
        self.build_graph()

        # Set up and initialize tensorflow session.
        self.session = tf.Session(graph=self.graph)
        self.session.run(self.init)

        # Use for plotting evaluation.
        self.train_acc = []
        self.val_acc = []
    def __init__(self, filename, model_name, layer_sizes=[128,64, 32], batch_size=25, 
                 learning_rate=.01, dropout_prob=0.9, weight_penalty=0.01, 
                 clip_gradients=True, model_type='regression', 
                 checkpoint_dir='./saved_models/'):
        '''Initialize the class by loading the required datasets 
        and building the graph.

        Args:
            filename: a file containing the data.
            model_name: name of the model being trained. Used in saving
                model checkpoints.
            layer_sizes: a list of sizes of the neural network layers.
            batch_size: number of training examples in each training batch. 
            learning_rate: the initial learning rate used in stochastic 
                gradient descent.
            dropout_prob: the probability that a node in the network will not
                be dropped out during training. Set to < 1.0 to apply dropout, 
                1.0 to remove dropout.
            weight_penalty: the coefficient of the L2 weight regularization
                applied to the loss function. Set to > 0.0 to apply weight 
                regularization, 0.0 to remove.
            clip_gradients: a bool indicating whether or not to clip gradients. 
                This is effective in preventing very large gradients from skewing 
                training, and preventing your loss from going to inf or nan. 
            model_type: the type of output prediction. Either 'classification'
                or 'regression'.
            checkpoint_dir: the directly where the model will save checkpoints,
                saved files containing trained network weights.
            '''
        # Hyperparameters that should be tuned

        self.task_weights = []
        self.task_bias = []

        self.layer_sizes = layer_sizes
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.dropout_prob = dropout_prob 
        self.weight_penalty = weight_penalty

        # Hyperparameters that could be tuned 
        # (but are probably the best to use)
        self.clip_gradients = clip_gradients
        self.activation_func = 'relu'
        self.optimizer = tf.train.AdamOptimizer

        # Logistics
        self.checkpoint_dir = checkpoint_dir
        self.filename = filename
        self.model_name = model_name
        self.model_type = model_type
        self.output_every_nth = 10

        # Extract the data from the filename
        self.data_loader = data_funcs.DataLoader(filename)
        print dir(self.data_loader)
        self.input_size = self.data_loader.get_feature_size()
        if model_type == 'classification':
            print "\nPerforming classification."
            self.output_size = self.data_loader.num_classes
            self.metric_name = 'accuracy'
        else:
            print "\nPerforming regression."
            self.output_size = self.data_loader.num_outputs
            self.metric_name = 'RMSE'
        print "Input dimensions (number of features):", self.input_size
        print "Number of classes/outputs:", self.output_size
        
        # Set up tensorflow computation graph.
        self.graph = tf.Graph()
        self.build_graph()

        # Set up and initialize tensorflow session.
        self.session = tf.Session(graph=self.graph)
        self.session.run(self.init)

        # Use for plotting evaluation.
        self.train_metrics = []
        self.val_metrics = []
        self.validation_scores = []