class Bidirectional_Recurrent_Neural_Network_Language_Model(object, Vector_Math):
    """features are stored in format max_seq_len x nseq x nvis where n_max_obs is the maximum number of observations per sequence
    and nseq is the number of sequences
    weights are stored as nvis x nhid at feature level
    biases are stored as 1 x nhid
    rbm_type is either rbm_gaussian_bernoulli, rbm_bernoulli_bernoulli, logistic"""

    def __init__(self, config_dictionary):  # completed
        """variables for Neural Network: feature_file_name(read from)
        required_variables - required variables for running system
        all_variables - all valid variables for each type"""
        self.feature_file_name = self.default_variable_define(config_dictionary, "feature_file_name", arg_type="string")
        self.features, self.feature_sequence_lens = self.read_feature_file()
        self.model = Bidirectional_RNNLM_Weight()
        self.output_name = self.default_variable_define(config_dictionary, "output_name", arg_type="string")

        self.required_variables = dict()
        self.all_variables = dict()
        self.required_variables["train"] = ["mode", "feature_file_name", "output_name"]
        self.all_variables["train"] = self.required_variables["train"] + [
            "label_file_name",
            "num_hiddens",
            "weight_matrix_name",
            "initial_weight_max",
            "initial_weight_min",
            "initial_bias_max",
            "initial_bias_min",
            "save_each_epoch",
            "do_pretrain",
            "pretrain_method",
            "pretrain_iterations",
            "pretrain_learning_rate",
            "pretrain_batch_size",
            "do_backprop",
            "backprop_method",
            "backprop_batch_size",
            "l2_regularization_const",
            "num_epochs",
            "num_line_searches",
            "armijo_const",
            "wolfe_const",
            "steepest_learning_rate",
            "momentum_rate",
            "conjugate_max_iterations",
            "conjugate_const_type",
            "truncated_newton_num_cg_epochs",
            "truncated_newton_init_damping_factor",
            "krylov_num_directions",
            "krylov_num_batch_splits",
            "krylov_num_bfgs_epochs",
            "second_order_matrix",
            "krylov_use_hessian_preconditioner",
            "krylov_eigenvalue_floor_const",
            "fisher_preconditioner_floor_val",
            "use_fisher_preconditioner",
            "structural_damping_const",
            "validation_feature_file_name",
            "validation_label_file_name",
        ]
        self.required_variables["test"] = ["mode", "feature_file_name", "weight_matrix_name", "output_name"]
        self.all_variables["test"] = self.required_variables["test"] + ["label_file_name"]

    def dump_config_vals(self):
        no_attr_key = list()
        print "********************************************************************************"
        print "Neural Network configuration is as follows:"

        for key in self.all_variables[self.mode]:
            if hasattr(self, key):
                print key, "=", eval("self." + key)
            else:
                no_attr_key.append(key)

        print "********************************************************************************"
        print "Undefined keys are as follows:"
        for key in no_attr_key:
            print key, "not set"
        print "********************************************************************************"

    def default_variable_define(
        self,
        config_dictionary,
        config_key,
        arg_type="string",
        default_value=None,
        error_string=None,
        exit_if_no_default=True,
        acceptable_values=None,
    ):
        # arg_type is either int, float, string, int_comma_string, float_comma_string, boolean
        try:
            if arg_type == "int_comma_string":
                return self.read_config_comma_string(config_dictionary[config_key], needs_int=True)
            elif arg_type == "float_comma_string":
                return self.read_config_comma_string(config_dictionary[config_key], needs_int=False)
            elif arg_type == "int":
                return int(config_dictionary[config_key])
            elif arg_type == "float":
                return float(config_dictionary[config_key])
            elif arg_type == "string":
                return config_dictionary[config_key]
            elif arg_type == "boolean":
                if (
                    config_dictionary[config_key] == "False"
                    or config_dictionary[config_key] == "0"
                    or config_dictionary[config_key] == "F"
                ):
                    return False
                elif (
                    config_dictionary[config_key] == "True"
                    or config_dictionary[config_key] == "1"
                    or config_dictionary[config_key] == "T"
                ):
                    return True
                else:
                    print config_dictionary[
                        config_key
                    ], "is not valid for boolean type... Acceptable values are True, False, 1, 0, T, or F... Exiting now"
                    sys.exit()
            else:
                print arg_type, "is not a valid type, arg_type can be either int, float, string, int_comma_string, float_comma_string... exiting now"
                sys.exit()
        except KeyError:
            if error_string != None:
                print error_string
            else:
                print "No", config_key, "defined,",
            if default_value == None and exit_if_no_default:
                print "since", config_key, "must be defined... exiting now"
                sys.exit()
            else:
                if acceptable_values != None and (default_value not in acceptable_values):
                    print default_value, "is not an acceptable input, acceptable inputs are", acceptable_values, "... Exiting now"
                    sys.exit()
                if error_string == None:
                    print "setting", config_key, "to", default_value
                return default_value

    def read_feature_file(self, feature_file_name=None):  # completed
        if feature_file_name is None:
            feature_file_name = self.feature_file_name
        try:
            feature_data = sp.loadmat(feature_file_name)
            features = feature_data["features"].astype(np.int32)
            sequence_len = feature_data["feature_sequence_lengths"]
            sequence_len = np.reshape(sequence_len, (sequence_len.size,))
            return features, sequence_len  # in MATLAB format
        except IOError:
            print "Unable to open ", feature_file_name, "... Exiting now"
            sys.exit()

    def read_label_file(self, label_file_name=None):  # completed
        """label file is a two-column file in the form
        sent_id label_1
        sent_id label_2
        ...
        """
        if label_file_name is None:
            label_file_name = self.label_file_name
        try:
            label_data = sp.loadmat(label_file_name)["labels"].astype(np.int32)
            return label_data  # [:,1], label_data[:,0]#in MATLAB format
        except IOError:
            print "Unable to open ", label_file_name, "... Exiting now"
            sys.exit()

    def batch_size(self, feature_sequence_lens):
        return np.sum(feature_sequence_lens)

    def read_config_comma_string(self, input_string, needs_int=False):
        output_list = []
        for elem in input_string.split(","):
            if "*" in elem:
                elem_list = elem.split("*")
                if needs_int:
                    output_list.extend([int(elem_list[1])] * int(elem_list[0]))
                else:
                    output_list.extend([float(elem_list[1])] * int(elem_list[0]))
            else:
                if needs_int:
                    output_list.append(int(elem))
                else:
                    output_list.append(float(elem))
        return output_list

    def levenshtein_string_edit_distance(self, string1, string2):  # completed
        dist = dict()
        string1_len = len(string1)
        string2_len = len(string2)

        for idx in range(-1, string1_len + 1):
            dist[(idx, -1)] = idx + 1
        for idx in range(-1, string2_len + 1):
            dist[(-1, idx)] = idx + 1

        for idx1 in range(string1_len):
            for idx2 in range(string2_len):
                if string1[idx1] == string2[idx2]:
                    cost = 0
                else:
                    cost = 1
                dist[(idx1, idx2)] = min(
                    dist[(idx1 - 1, idx2)] + 1,  # deletion
                    dist[(idx1, idx2 - 1)] + 1,  # insertion
                    dist[(idx1 - 1, idx2 - 1)] + cost,  # substitution
                )
                if idx1 and idx2 and string1[idx1] == string2[idx2 - 1] and string1[idx1 - 1] == string2[idx2]:
                    dist[(idx1, idx2)] = min(dist[(idx1, idx2)], dist[idx1 - 2, idx2 - 2] + cost)  # transposition
        return dist[(string1_len - 1, string2_len - 1)]

    def check_keys(self, config_dictionary):  # completed
        print "Checking config keys...",
        exit_flag = False

        config_dictionary_keys = config_dictionary.keys()

        if self.mode == "train":
            correct_mode = "train"
            incorrect_mode = "test"
        elif self.mode == "test":
            correct_mode = "test"
            incorrect_mode = "train"

        for req_var in self.required_variables[correct_mode]:
            if req_var not in config_dictionary_keys:
                print req_var, "needs to be set for", correct_mode, "but is not."
                if exit_flag == False:
                    print "Because of above error, will exit after checking rest of keys"
                    exit_flag = True

        for var in config_dictionary_keys:
            if var not in self.all_variables[correct_mode]:
                print var, "in the config file given is not a valid key for", correct_mode
                if var in self.all_variables[incorrect_mode]:
                    print "but", var, "is a valid key for", incorrect_mode, "so either the mode or key is incorrect"
                else:
                    string_distances = np.array(
                        [
                            self.levenshtein_string_edit_distance(var, string2)
                            for string2 in self.all_variables[correct_mode]
                        ]
                    )
                    print "perhaps you meant ***", self.all_variables[correct_mode][
                        np.argmin(string_distances)
                    ], "\b*** (levenshtein string edit distance", np.min(
                        string_distances
                    ), "\b) instead of ***", var, "\b***?"
                if exit_flag == False:
                    print "Because of above error, will exit after checking rest of keys"
                    exit_flag = True

        if exit_flag:
            print "Exiting now"
            sys.exit()
        else:
            print "seems copacetic"

    def check_labels(self):  # want to prune non-contiguous labels, might be expensive
        # TODO: check sentids to make sure seqences are good
        print "Checking labels..."
        if len(self.labels.shape) != 2:
            print "labels need to be in (n_samples,2) format and the shape of labels is ", self.labels.shape, "... Exiting now"
            sys.exit()
        if self.labels.shape[0] != sum(self.feature_sequence_lens):
            print "Number of examples in feature file: ", sum(
                self.feature_sequence_lens
            ), " does not equal size of label file, ", self.labels.size, "... Exiting now"
            sys.exit()
            #        if  [i for i in np.unique(self.labels)] != range(np.max(self.labels)+1):
            #            print "Labels need to be in the form 0,1,2,....,n,... Exiting now"
            sys.exit()
        #        label_counts = np.bincount(np.ravel(self.labels[:,1])) #[self.labels.count(x) for x in range(np.max(self.labels)+1)]
        #        print "distribution of labels is:"
        #        for x in range(len(label_counts)):
        #            print "#", x, "\b's:", label_counts[x]
        print "labels seem copacetic"

    def forward_layer(
        self, inputs, weights, biases, weight_type, secondary_inputs=None, secondary_weights=None
    ):  # completed
        #        raise ValueError("forward_layer() not implemented yet")
        if weight_type == "logistic":
            return self.softmax(
                self.weight_matrix_multiply(inputs, weights, biases) + np.dot(secondary_inputs, secondary_weights)
            )
        elif weight_type == "rbm_gaussian_bernoulli" or weight_type == "rbm_bernoulli_bernoulli":
            return self.sigmoid(
                weights[(inputs), :] + self.weight_matrix_multiply(secondary_inputs, secondary_weights, biases)
            )
        # added to test finite differences calculation for pearlmutter forward pass
        elif weight_type == "linear":  # only used for the logistic layer
            return self.weight_matrix_multiply(inputs, weights, biases) + np.dot(secondary_inputs, secondary_weights)
        else:
            print "weight_type", weight_type, "is not a valid layer type.",
            print "Valid layer types are", self.model.valid_layer_types, "Exiting now..."
            sys.exit()

    def forward_pass_single_batch(self, inputs, model=None, return_hiddens=False, linear_output=False):
        """forward pass for single batch size. Mainly for speed in this case
        """
        if model == None:
            model = self.model
        num_observations = inputs.size
        hiddens_forward = model.weights["visible_hidden"][(inputs), :]
        hiddens_forward[:1, :] += self.weight_matrix_multiply(
            model.init_hiddens["forward"], model.weights["hidden_hidden_forward"], model.bias["hidden_forward"]
        )
        expit(hiddens_forward[0, :], hiddens_forward[0, :])

        hiddens_backward = model.weights["visible_hidden"][(inputs), :]
        hiddens_backward[-1:, :] += self.weight_matrix_multiply(
            model.init_hiddens["backward"], model.weights["hidden_hidden_backward"], model.bias["hidden_backward"]
        )
        expit(hiddens_backward[-1, :], hiddens_backward[-1, :])

        for time_step in range(1, num_observations):
            hiddens_forward[time_step : time_step + 1, :] += self.weight_matrix_multiply(
                hiddens_forward[time_step - 1 : time_step, :],
                model.weights["hidden_hidden_forward"],
                model.bias["hidden_forward"],
            )
            expit(hiddens_forward[time_step, :], hiddens_forward[time_step, :])  # sigmoid

            hiddens_backward[
                num_observations - time_step - 1 : num_observations - time_step, :
            ] += self.weight_matrix_multiply(
                hiddens_backward[num_observations - time_step : num_observations - time_step + 1, :],
                model.weights["hidden_hidden_backward"],
                model.bias["hidden_backward"],
            )
            expit(
                hiddens_backward[num_observations - time_step - 1, :],
                hiddens_backward[num_observations - time_step - 1, :],
            )  # sigmoid

        outputs = self.forward_layer(
            hiddens_forward,
            model.weights["hidden_output_forward"],
            model.bias["output"],
            model.weight_type["hidden_output"],
            hiddens_backward,
            model.weights["hidden_output_backward"],
        )

        if return_hiddens:
            return outputs, hiddens_forward, hiddens_backward
        else:
            del hiddens_forward, hiddens_backward
            return outputs

    def forward_pass(
        self, inputs, feature_sequence_lens, model=None, return_hiddens=False, linear_output=False
    ):  # completed
        """forward pass each layer starting with feature level
        inputs in the form n_max_obs x n_seq x n_vis"""
        raise ValueError("forward_pass() not implemented yet")
        if model == None:
            model = self.model
        architecture = self.model.get_architecture()
        max_sequence_observations = inputs.shape[0]
        num_sequences = inputs.shape[1]
        num_hiddens = architecture[1]
        num_outs = architecture[2]
        hiddens_forward = np.zeros((max_sequence_observations, num_sequences, num_hiddens))
        hiddens_backward = np.zeros((max_sequence_observations, num_sequences, num_hiddens))
        outputs = np.zeros((max_sequence_observations, num_sequences, num_outs))
        # propagate hiddens
        hiddens_forward[0, :, :] = self.forward_layer(
            inputs[0, :],
            model.weights["visible_hidden"],
            model.bias["hidden"],
            model.weight_type["visible_hidden"],
            model.init_hiddens["forward"],
            model.weights["hidden_hidden_forward"],
        )

        hiddens_backward[0, :, :] = self.forward_layer(
            inputs[0, :],
            model.weights["visible_hidden"],
            model.bias["hidden"],
            model.weight_type["visible_hidden"],
            model.init_hiddens["backward"],
            model.weights["hidden_hidden_backward"],
        )
        if linear_output:
            outputs[0, :, :] = self.forward_layer(
                hiddens_forward[0, :, :], model.weights["hidden_output"], model.bias["output"], "linear"
            )
        else:
            outputs[0, :, :] = self.forward_layer(
                hiddens[0, :, :],
                model.weights["hidden_output"],
                model.bias["output"],
                model.weight_type["hidden_output"],
            )
        for sequence_index in range(1, max_sequence_observations):
            sequence_input = inputs[sequence_index, :]
            hiddens[sequence_index, :, :] = self.forward_layer(
                sequence_input,
                model.weights["visible_hidden"],
                model.bias["hidden"],
                model.weight_type["visible_hidden"],
                hiddens[sequence_index - 1, :, :],
                model.weights["hidden_hidden"],
            )
            if linear_output:
                outputs[sequence_index, :, :] = self.forward_layer(
                    hiddens[sequence_index, :, :], model.weights["hidden_output"], model.bias["output"], "linear"
                )
            else:
                outputs[sequence_index, :, :] = self.forward_layer(
                    hiddens[sequence_index, :, :],
                    model.weights["hidden_output"],
                    model.bias["output"],
                    model.weight_type["hidden_output"],
                )
            # find the observations where the sequence has ended,
            # and then zero out hiddens and outputs, so nothing horrible happens during backprop, etc.
            zero_input = np.where(feature_sequence_lens <= sequence_index)
            hiddens[sequence_index, zero_input, :] = 0.0
            outputs[sequence_index, zero_input, :] = 0.0
        if return_hiddens:
            return outputs, hiddens
        else:
            del hiddens
            return outputs

    def flatten_output(self, output, feature_sequence_lens=None):
        """outputs in the form of max_obs_seq x n_seq x n_outs  get converted to form
        n_data x n_outs, so we can calculate classification accuracy and cross-entropy
        """
        if feature_sequence_lens == None:
            feature_sequence_lens = self.feature_sequence_lens
        num_outs = output.shape[2]
        #        num_seq = output.shape[1]
        flat_output = np.zeros((self.batch_size(feature_sequence_lens), num_outs))
        cur_index = 0
        for seq_index, num_obs in enumerate(feature_sequence_lens):
            flat_output[cur_index : cur_index + num_obs, :] = copy.deepcopy(output[:num_obs, seq_index, :])
            cur_index += num_obs

        return flat_output

    def calculate_log_perplexity(self, output, flat_labels):  # completed, expensive, should be compiled
        """calculates perplexity with flat labels
        """
        return -np.sum(
            np.log2(np.clip(output, a_min=1e-12, a_max=1.0))[np.arange(flat_labels.shape[0]), flat_labels[:, 1]]
        )

    def calculate_cross_entropy(self, output, flat_labels):  # completed, expensive, should be compiled
        """calculates perplexity with flat labels
        """
        return -np.sum(
            np.log(np.clip(output, a_min=1e-12, a_max=1.0))[np.arange(flat_labels.shape[0]), flat_labels[:, 1]]
        )

    def calculate_classification_accuracy(self, flat_output, labels):  # completed, possibly expensive
        prediction = flat_output.argmax(axis=1).reshape(labels.shape)
        classification_accuracy = sum(prediction == labels) / float(labels.size)
        return classification_accuracy[0]
class Bidirectional_Recurrent_Neural_Network_Language_Model(
        object, Vector_Math):
    """features are stored in format max_seq_len x nseq x nvis where n_max_obs is the maximum number of observations per sequence
    and nseq is the number of sequences
    weights are stored as nvis x nhid at feature level
    biases are stored as 1 x nhid
    rbm_type is either rbm_gaussian_bernoulli, rbm_bernoulli_bernoulli, logistic"""
    def __init__(self, config_dictionary):  #completed
        """variables for Neural Network: feature_file_name(read from)
        required_variables - required variables for running system
        all_variables - all valid variables for each type"""
        self.feature_file_name = self.default_variable_define(
            config_dictionary, 'feature_file_name', arg_type='string')
        self.features, self.feature_sequence_lens = self.read_feature_file()
        self.model = Bidirectional_RNNLM_Weight()
        self.output_name = self.default_variable_define(config_dictionary,
                                                        'output_name',
                                                        arg_type='string')

        self.required_variables = dict()
        self.all_variables = dict()
        self.required_variables['train'] = [
            'mode', 'feature_file_name', 'output_name'
        ]
        self.all_variables['train'] = self.required_variables['train'] + [
            'label_file_name', 'num_hiddens', 'weight_matrix_name',
            'initial_weight_max', 'initial_weight_min', 'initial_bias_max',
            'initial_bias_min', 'save_each_epoch', 'do_pretrain',
            'pretrain_method', 'pretrain_iterations', 'pretrain_learning_rate',
            'pretrain_batch_size', 'do_backprop', 'backprop_method',
            'backprop_batch_size', 'l2_regularization_const', 'num_epochs',
            'num_line_searches', 'armijo_const', 'wolfe_const',
            'steepest_learning_rate', 'momentum_rate',
            'conjugate_max_iterations', 'conjugate_const_type',
            'truncated_newton_num_cg_epochs',
            'truncated_newton_init_damping_factor', 'krylov_num_directions',
            'krylov_num_batch_splits', 'krylov_num_bfgs_epochs',
            'second_order_matrix', 'krylov_use_hessian_preconditioner',
            'krylov_eigenvalue_floor_const', 'fisher_preconditioner_floor_val',
            'use_fisher_preconditioner', 'structural_damping_const',
            'validation_feature_file_name', 'validation_label_file_name'
        ]
        self.required_variables['test'] = [
            'mode', 'feature_file_name', 'weight_matrix_name', 'output_name'
        ]
        self.all_variables['test'] = self.required_variables['test'] + [
            'label_file_name'
        ]

    def dump_config_vals(self):
        no_attr_key = list()
        print "********************************************************************************"
        print "Neural Network configuration is as follows:"

        for key in self.all_variables[self.mode]:
            if hasattr(self, key):
                print key, "=", eval('self.' + key)
            else:
                no_attr_key.append(key)

        print "********************************************************************************"
        print "Undefined keys are as follows:"
        for key in no_attr_key:
            print key, "not set"
        print "********************************************************************************"

    def default_variable_define(self,
                                config_dictionary,
                                config_key,
                                arg_type='string',
                                default_value=None,
                                error_string=None,
                                exit_if_no_default=True,
                                acceptable_values=None):
        #arg_type is either int, float, string, int_comma_string, float_comma_string, boolean
        try:
            if arg_type == 'int_comma_string':
                return self.read_config_comma_string(
                    config_dictionary[config_key], needs_int=True)
            elif arg_type == 'float_comma_string':
                return self.read_config_comma_string(
                    config_dictionary[config_key], needs_int=False)
            elif arg_type == 'int':
                return int(config_dictionary[config_key])
            elif arg_type == 'float':
                return float(config_dictionary[config_key])
            elif arg_type == 'string':
                return config_dictionary[config_key]
            elif arg_type == 'boolean':
                if config_dictionary[
                        config_key] == 'False' or config_dictionary[
                            config_key] == '0' or config_dictionary[
                                config_key] == 'F':
                    return False
                elif config_dictionary[
                        config_key] == 'True' or config_dictionary[
                            config_key] == '1' or config_dictionary[
                                config_key] == 'T':
                    return True
                else:
                    print config_dictionary[
                        config_key], "is not valid for boolean type... Acceptable values are True, False, 1, 0, T, or F... Exiting now"
                    sys.exit()
            else:
                print arg_type, "is not a valid type, arg_type can be either int, float, string, int_comma_string, float_comma_string... exiting now"
                sys.exit()
        except KeyError:
            if error_string != None:
                print error_string
            else:
                print "No", config_key, "defined,",
            if default_value == None and exit_if_no_default:
                print "since", config_key, "must be defined... exiting now"
                sys.exit()
            else:
                if acceptable_values != None and (default_value
                                                  not in acceptable_values):
                    print default_value, "is not an acceptable input, acceptable inputs are", acceptable_values, "... Exiting now"
                    sys.exit()
                if error_string == None:
                    print "setting", config_key, "to", default_value
                return default_value

    def read_feature_file(self, feature_file_name=None):  #completed
        if feature_file_name is None:
            feature_file_name = self.feature_file_name
        try:
            feature_data = sp.loadmat(feature_file_name)
            features = feature_data['features'].astype(np.int32)
            sequence_len = feature_data['feature_sequence_lengths']
            sequence_len = np.reshape(sequence_len, (sequence_len.size, ))
            return features, sequence_len  #in MATLAB format
        except IOError:
            print "Unable to open ", feature_file_name, "... Exiting now"
            sys.exit()

    def read_label_file(self, label_file_name=None):  #completed
        """label file is a two-column file in the form
        sent_id label_1
        sent_id label_2
        ...
        """
        if label_file_name is None:
            label_file_name = self.label_file_name
        try:
            label_data = sp.loadmat(label_file_name)['labels'].astype(np.int32)
            return label_data  #[:,1], label_data[:,0]#in MATLAB format
        except IOError:
            print "Unable to open ", label_file_name, "... Exiting now"
            sys.exit()

    def batch_size(self, feature_sequence_lens):
        return np.sum(feature_sequence_lens)

    def read_config_comma_string(self, input_string, needs_int=False):
        output_list = []
        for elem in input_string.split(','):
            if '*' in elem:
                elem_list = elem.split('*')
                if needs_int:
                    output_list.extend([int(elem_list[1])] * int(elem_list[0]))
                else:
                    output_list.extend([float(elem_list[1])] *
                                       int(elem_list[0]))
            else:
                if needs_int:
                    output_list.append(int(elem))
                else:
                    output_list.append(float(elem))
        return output_list

    def levenshtein_string_edit_distance(self, string1, string2):  #completed
        dist = dict()
        string1_len = len(string1)
        string2_len = len(string2)

        for idx in range(-1, string1_len + 1):
            dist[(idx, -1)] = idx + 1
        for idx in range(-1, string2_len + 1):
            dist[(-1, idx)] = idx + 1

        for idx1 in range(string1_len):
            for idx2 in range(string2_len):
                if string1[idx1] == string2[idx2]:
                    cost = 0
                else:
                    cost = 1
                dist[(idx1, idx2)] = min(
                    dist[(idx1 - 1, idx2)] + 1,  # deletion
                    dist[(idx1, idx2 - 1)] + 1,  # insertion
                    dist[(idx1 - 1, idx2 - 1)] + cost,  # substitution
                )
                if idx1 and idx2 and string1[idx1] == string2[
                        idx2 - 1] and string1[idx1 - 1] == string2[idx2]:
                    dist[(idx1, idx2)] = min(dist[(idx1, idx2)],
                                             dist[idx1 - 2, idx2 - 2] +
                                             cost)  # transposition
        return dist[(string1_len - 1, string2_len - 1)]

    def check_keys(self, config_dictionary):  #completed
        print "Checking config keys...",
        exit_flag = False

        config_dictionary_keys = config_dictionary.keys()

        if self.mode == 'train':
            correct_mode = 'train'
            incorrect_mode = 'test'
        elif self.mode == 'test':
            correct_mode = 'test'
            incorrect_mode = 'train'

        for req_var in self.required_variables[correct_mode]:
            if req_var not in config_dictionary_keys:
                print req_var, "needs to be set for", correct_mode, "but is not."
                if exit_flag == False:
                    print "Because of above error, will exit after checking rest of keys"
                    exit_flag = True

        for var in config_dictionary_keys:
            if var not in self.all_variables[correct_mode]:
                print var, "in the config file given is not a valid key for", correct_mode
                if var in self.all_variables[incorrect_mode]:
                    print "but", var, "is a valid key for", incorrect_mode, "so either the mode or key is incorrect"
                else:
                    string_distances = np.array([
                        self.levenshtein_string_edit_distance(var, string2)
                        for string2 in self.all_variables[correct_mode]
                    ])
                    print "perhaps you meant ***", self.all_variables[
                        correct_mode][np.argmin(
                            string_distances
                        )], "\b*** (levenshtein string edit distance", np.min(
                            string_distances
                        ), "\b) instead of ***", var, "\b***?"
                if exit_flag == False:
                    print "Because of above error, will exit after checking rest of keys"
                    exit_flag = True

        if exit_flag:
            print "Exiting now"
            sys.exit()
        else:
            print "seems copacetic"

    def check_labels(
            self):  #want to prune non-contiguous labels, might be expensive
        #TODO: check sentids to make sure seqences are good
        print "Checking labels..."
        if len(self.labels.shape) != 2:
            print "labels need to be in (n_samples,2) format and the shape of labels is ", self.labels.shape, "... Exiting now"
            sys.exit()
        if self.labels.shape[0] != sum(self.feature_sequence_lens):
            print "Number of examples in feature file: ", sum(
                self.feature_sequence_lens
            ), " does not equal size of label file, ", self.labels.size, "... Exiting now"
            sys.exit()
            #        if  [i for i in np.unique(self.labels)] != range(np.max(self.labels)+1):
            #            print "Labels need to be in the form 0,1,2,....,n,... Exiting now"
            sys.exit()
#        label_counts = np.bincount(np.ravel(self.labels[:,1])) #[self.labels.count(x) for x in range(np.max(self.labels)+1)]
#        print "distribution of labels is:"
#        for x in range(len(label_counts)):
#            print "#", x, "\b's:", label_counts[x]
        print "labels seem copacetic"

    def forward_layer(self,
                      inputs,
                      weights,
                      biases,
                      weight_type,
                      secondary_inputs=None,
                      secondary_weights=None):  #completed
        #        raise ValueError("forward_layer() not implemented yet")
        if weight_type == 'logistic':
            return self.softmax(
                self.weight_matrix_multiply(inputs, weights, biases) +
                np.dot(secondary_inputs, secondary_weights))
        elif weight_type == 'rbm_gaussian_bernoulli' or weight_type == 'rbm_bernoulli_bernoulli':
            return self.sigmoid(weights[
                (inputs), :] + self.weight_matrix_multiply(
                    secondary_inputs, secondary_weights, biases))
        #added to test finite differences calculation for pearlmutter forward pass
        elif weight_type == 'linear':  #only used for the logistic layer
            return self.weight_matrix_multiply(
                inputs, weights, biases) + np.dot(secondary_inputs,
                                                  secondary_weights)
        else:
            print "weight_type", weight_type, "is not a valid layer type.",
            print "Valid layer types are", self.model.valid_layer_types, "Exiting now..."
            sys.exit()

    def forward_pass_single_batch(self,
                                  inputs,
                                  model=None,
                                  return_hiddens=False,
                                  linear_output=False):
        """forward pass for single batch size. Mainly for speed in this case
        """
        if model == None:
            model = self.model
        num_observations = inputs.size
        hiddens_forward = model.weights['visible_hidden'][(inputs), :]
        hiddens_forward[:1, :] += self.weight_matrix_multiply(
            model.init_hiddens['forward'],
            model.weights['hidden_hidden_forward'],
            model.bias['hidden_forward'])
        expit(hiddens_forward[0, :], hiddens_forward[0, :])

        hiddens_backward = model.weights['visible_hidden'][(inputs), :]
        hiddens_backward[-1:, :] += self.weight_matrix_multiply(
            model.init_hiddens['backward'],
            model.weights['hidden_hidden_backward'],
            model.bias['hidden_backward'])
        expit(hiddens_backward[-1, :], hiddens_backward[-1, :])

        for time_step in range(1, num_observations):
            hiddens_forward[time_step:time_step +
                            1, :] += self.weight_matrix_multiply(
                                hiddens_forward[time_step - 1:time_step, :],
                                model.weights['hidden_hidden_forward'],
                                model.bias['hidden_forward'])
            expit(hiddens_forward[time_step, :],
                  hiddens_forward[time_step, :])  #sigmoid

            hiddens_backward[num_observations - time_step -
                             1:num_observations -
                             time_step, :] += self.weight_matrix_multiply(
                                 hiddens_backward[num_observations -
                                                  time_step:num_observations -
                                                  time_step + 1, :],
                                 model.weights['hidden_hidden_backward'],
                                 model.bias['hidden_backward'])
            expit(hiddens_backward[num_observations - time_step - 1, :],
                  hiddens_backward[num_observations - time_step -
                                   1, :])  #sigmoid

        outputs = self.forward_layer(hiddens_forward,
                                     model.weights['hidden_output_forward'],
                                     model.bias['output'],
                                     model.weight_type['hidden_output'],
                                     hiddens_backward,
                                     model.weights['hidden_output_backward'])

        if return_hiddens:
            return outputs, hiddens_forward, hiddens_backward
        else:
            del hiddens_forward, hiddens_backward
            return outputs

    def forward_pass(self,
                     inputs,
                     feature_sequence_lens,
                     model=None,
                     return_hiddens=False,
                     linear_output=False):  #completed
        """forward pass each layer starting with feature level
        inputs in the form n_max_obs x n_seq x n_vis"""
        raise ValueError("forward_pass() not implemented yet")
        if model == None:
            model = self.model
        architecture = self.model.get_architecture()
        max_sequence_observations = inputs.shape[0]
        num_sequences = inputs.shape[1]
        num_hiddens = architecture[1]
        num_outs = architecture[2]
        hiddens_forward = np.zeros(
            (max_sequence_observations, num_sequences, num_hiddens))
        hiddens_backward = np.zeros(
            (max_sequence_observations, num_sequences, num_hiddens))
        outputs = np.zeros(
            (max_sequence_observations, num_sequences, num_outs))
        #propagate hiddens
        hiddens_forward[0, :, :] = self.forward_layer(
            inputs[0, :], model.weights['visible_hidden'],
            model.bias['hidden'], model.weight_type['visible_hidden'],
            model.init_hiddens['forward'],
            model.weights['hidden_hidden_forward'])

        hiddens_backward[0, :, :] = self.forward_layer(
            inputs[0, :], model.weights['visible_hidden'],
            model.bias['hidden'], model.weight_type['visible_hidden'],
            model.init_hiddens['backward'],
            model.weights['hidden_hidden_backward'])
        if linear_output:
            outputs[0, :, :] = self.forward_layer(
                hiddens_forward[0, :, :],
                model.weights['hidden_output'],
                model.bias['output'],
                'linear',
            )
        else:
            outputs[0, :, :] = self.forward_layer(
                hiddens[0, :, :], model.weights['hidden_output'],
                model.bias['output'], model.weight_type['hidden_output'])
        for sequence_index in range(1, max_sequence_observations):
            sequence_input = inputs[sequence_index, :]
            hiddens[sequence_index, :, :] = self.forward_layer(
                sequence_input, model.weights['visible_hidden'],
                model.bias['hidden'], model.weight_type['visible_hidden'],
                hiddens[sequence_index - 1, :, :],
                model.weights['hidden_hidden'])
            if linear_output:
                outputs[sequence_index, :, :] = self.forward_layer(
                    hiddens[sequence_index, :, :],
                    model.weights['hidden_output'], model.bias['output'],
                    'linear')
            else:
                outputs[sequence_index, :, :] = self.forward_layer(
                    hiddens[sequence_index, :, :],
                    model.weights['hidden_output'], model.bias['output'],
                    model.weight_type['hidden_output'])
            #find the observations where the sequence has ended,
            #and then zero out hiddens and outputs, so nothing horrible happens during backprop, etc.
            zero_input = np.where(feature_sequence_lens <= sequence_index)
            hiddens[sequence_index, zero_input, :] = 0.0
            outputs[sequence_index, zero_input, :] = 0.0
        if return_hiddens:
            return outputs, hiddens
        else:
            del hiddens
            return outputs

    def flatten_output(self, output, feature_sequence_lens=None):
        """outputs in the form of max_obs_seq x n_seq x n_outs  get converted to form
        n_data x n_outs, so we can calculate classification accuracy and cross-entropy
        """
        if feature_sequence_lens == None:
            feature_sequence_lens = self.feature_sequence_lens
        num_outs = output.shape[2]
        #        num_seq = output.shape[1]
        flat_output = np.zeros(
            (self.batch_size(feature_sequence_lens), num_outs))
        cur_index = 0
        for seq_index, num_obs in enumerate(feature_sequence_lens):
            flat_output[cur_index:cur_index + num_obs, :] = copy.deepcopy(
                output[:num_obs, seq_index, :])
            cur_index += num_obs

        return flat_output

    def calculate_log_perplexity(
            self, output,
            flat_labels):  #completed, expensive, should be compiled
        """calculates perplexity with flat labels
        """
        return -np.sum(
            np.log2(np.clip(output, a_min=1E-12,
                            a_max=1.0))[np.arange(flat_labels.shape[0]),
                                        flat_labels[:, 1]])

    def calculate_cross_entropy(
            self, output,
            flat_labels):  #completed, expensive, should be compiled
        """calculates perplexity with flat labels
        """
        return -np.sum(
            np.log(np.clip(output, a_min=1E-12,
                           a_max=1.0))[np.arange(flat_labels.shape[0]),
                                       flat_labels[:, 1]])

    def calculate_classification_accuracy(
            self, flat_output, labels):  #completed, possibly expensive
        prediction = flat_output.argmax(axis=1).reshape(labels.shape)
        classification_accuracy = sum(prediction == labels) / float(
            labels.size)
        return classification_accuracy[0]