def get_speech_data(): print_message(message='Reading alignments...') read_alignments() print_param(description='Number of loaded records (alignments)', param_str=str(len(mlf.keys()))) print_param(description='Number of alignment frames', param_str=str(len(mlf[mlf.keys()[0]]))) print_param(description='Number of found phonemes', param_str=str(len(samples))) print_param(description='Found phonemes', param_str=str(sorted(samples.keys()))) print_message(message='Reading features...') read_features() print_param(description='Number of loaded records (features)', param_str=str(len(features.keys()))) print_param(description='Number of feature frames', param_str=str(len(features[features.keys()[0]]))) print_message(message='Adding samples...') add_samples() print_message(message='Splitting data...') split_data() print_param(description='Number of training samples', param_str=str(len(data['x']))) print_param(description='Number of validation samples', param_str=str(len(data['x_val']))) print_param(description='Number of testing samples', param_str=str(len(data['x_test']))) print_param(description='Problem dimension', param_str=str(data['x'][0].shape[0])) print_param(description='Number of classes', param_str=str(len(samples))) print_message(message='Number of samples per class:') for phonem in sorted(samples.keys()): print_param(description=phonem, param_str=str(len(samples[phonem])))
def load_stats(self, file_name): print_message(message='Loading pruning process statistics from ' + file_name) with open(file_name, 'r') as f: stats_pack = load_cpickle(f) self.stats_data = stats_pack['data'] self.means = stats_pack['means'] self.stds = stats_pack['stds'] self.vars = stats_pack['vars'] self.n_obs = stats_pack['n_obs'] self.pruning_steps = stats_pack['pruning_steps']
def dump_stats(self, file_name): stats_pack = { 'data': self.stats_data, 'means': self.means, 'stds': self.stds, 'vars': self.vars, 'n_obs': self.n_obs, 'pruning_steps': self.pruning_steps } with open(file_name, 'w') as f: dump_cpickle(stats_pack, f) print_message(message='Experiment statistics dumped as ' + file_name)
def dump(self, net_file_name): net_pack = { 'w': self.w, 'b': self.b, 'w_is': self.w_is, 'b_is': self.b_is, 'w_init': self.w_init, 'b_init': self.b_init, 'structure': self.structure, 'tf': self.tf_name, 'labels': self.labels, 'features': self.used_features, 'label_sign': self.label_sign } if 'pruning' in self.opt.keys(): net_pack['pruning_stats'] = self.opt['pruning'].stats with open(net_file_name, 'w') as f: dump_cpickle(net_pack, f) print_message(message='Net dumped as ' + net_file_name)
type=int, default=-1, help='Number of training samples per class.') parser.add_argument('-na', '--name_appendix', type=str, default='', help='Dataset filename appendix') return parser.parse_args() if __name__ == '__main__': args = parse_arguments() destination = 'dataset_mnist' + args.name_appendix + '.ds' print_message(message='Loading YannLecun\'s MNIST data...') with open_gzip('../../../data/data_mnist/mnist.pkl.gz', 'rb') as f: data_train, data_val, data_test = load_cpickle(f) dataset = open_shelve(destination, 'c') class_counter = dict() if args.n_samples == -1: print_message(message='Got MNIST dataset: ' + str(len(data_train[0])) + ' : ' + str(len(data_val[0])) + ' : ' + str(len(data_test[0])) + ', saving...') dataset['x'] = [reshape(x, (784, 1)) for x in data_train[0]] dataset['y'] = data_train[1] else: print_message(message='Got MNIST dataset: ' + str(args.n_samples * 10) + ' : ' + str(len(data_val[0])) + ' : ' + str(len(data_test[0])) +
'--req_acc', type=float, default=0.96, help='Required classificationa accuracy') parser.add_argument('-lev', '--levels', type=int, default=(75, 50, 35, 20, 15, 10, 7, 5, 3, 2, 1, 0), nargs='+', help='Pruning percentile levels') return parser.parse_args() if __name__ == '__main__': args = parse_arguments() print_message(message='EXAMPLE: MNIST dataset') print_param(description='Number of experiment observations', param_str=str(args.n_obs)) print_param(description='Initial number of hidden neurons', param_str=str(args.hidden_structure)) print_param(description='Required accuracy', param_str=str(args.req_acc)) params_str = '_hs' + str(args.hidden_structure) + '_ra' + str( args.req_acc).replace('.', '') + '_no' + str(args.n_obs) if args.generate: stats_data = list() for i_obs in range(1, args.n_obs + 1): print_message(message='MNIST experiment, observation ' + str(i_obs) + '/' + str(args.n_obs)) net = FeedForwardNet(hidden=args.hidden_structure, tf_name='Sigmoid')
print_message(message='Number of samples per class:') for phonem in sorted(samples.keys()): print_param(description=phonem, param_str=str(len(samples[phonem]))) if __name__ == '__main__': args = parse_arguments() destination = 'dataset_speech_bs'+str(args.border_size) destination += '_cs'+str(args.context_size)+'_nf'+str(args.n_filters) destination += '_ds'+str(int(args.data_split[0]*10))+str(int(args.data_split[1]*10))+str(int(args.data_split[2]*10)) destination += '_ns'+str(args.n_samples)+'_nr'+str(args.n_records) if args.phonemes: destination += '_'+str(args.phonemes).replace(',','+').replace(' ', '').replace('\'', '')[1:-1] destination += args.name_appendix+'.ds' print_message(message='Processing SPEECH data...') print_param(description='Path to features', param_str=args.feature_filename) print_param(description='Path to alignments', param_str=args.alignment_filename) print_param(description='Border size (strictness)', param_str=str(args.border_size)) print_param(description='Context size', param_str=str(args.context_size)) print_param(description='Number of MEL filters', param_str=str(args.n_filters)) print_param(description='Number of records', param_str=str(args.n_records)) print_param(description='Number of samples', param_str=str(args.n_samples)) print_param(description='Maximum number of other phonemes', param_str=str(args.max_rest)) print_param(description='Phonemes as classes', param_str=str(args.phonemes) if args.phonemes else 'all') print_param(description='Data split (train/val/test)', param_str=str(args.data_split)) print_param(description='Dataset destination file name', param_str=destination) mlf = dict() features = dict() samples = dict()
if abs(sum(args_tmp.data_split) - 1) > 1e-5: stderr.write( 'Error: data_split args must give 1.0 together (e.g. 0.8 0.1 0.1).\n' ) exit() else: return args_tmp if __name__ == '__main__': args = parse_arguments() split_bounds = (args.n_samples * args.data_split[0], args.n_samples * (args.data_split[0] + args.data_split[1])) destination = 'dataset_train' + args.name_appendix + '.ds' print_message(message='Generating and splitting TRAIN data...') data = { 'x': list(), 'y': list(), 'x_val': list(), 'y_val': list(), 'x_test': list(), 'y_test': list() } for ni in range(args.n_samples): if ni % 3 == 0: x_east = [0, 1, 1, 0, 0, 0, 1] x_west = [0, 1, 1, 1, 1, 0, 0] elif ni % 3 == 1: x_east = [0, 0, 1, 0, 1, 0, 0] x_west = [1, 1, 1, 0, 1, 0, 0]
parser = ArgumentParser(description='Run experiments and plot results for XOR dataset.') parser.add_argument('-g', '--generate', type=bool, default=False, help='Generate new stats or load the dumped?') parser.add_argument('-no', '--n_obs', type=int, default=10, help='Number of experiment observations') parser.add_argument('-hs', '--hidden_structure', type=int, default=[50], nargs='+', help='Neural network structure') parser.add_argument('-ra', '--req_acc', type=float, default=1.0, help='Required classificationa accuracy') parser.add_argument('-lev', '--levels', type=int, default=(75, 50, 35, 20, 10, 5, 1, 0), nargs='+', help='Pruning percentile levels') return parser.parse_args() if __name__ == '__main__': args = parse_arguments() print_message(message='EXAMPLE: XOR dataset') print_param(description='Number of experiment observations', param_str=str(args.n_obs)) print_param(description='Initial number of hidden neurons', param_str=str(args.hidden_structure)) print_param(description='Required accuracy', param_str=str(args.req_acc)) params_str = '_hs'+str(args.hidden_structure)+'_ra'+str(args.req_acc).replace('.', '')+'_no'+str(args.n_obs) if args.generate: stats_data = list() for i_obs in range(1, args.n_obs+1): print_message(message='XOR experiment, observation '+str(i_obs)+'/'+str(args.n_obs)) net = FeedForwardNet(hidden=args.hidden_structure, tf_name='Sigmoid') dataset = open_shelve('../examples/xor/dataset_xor.ds', 'c') net.fit(x=dataset['x'], y=dataset['y'], x_val=dataset['x_val'], y_val=dataset['y_val'], learning_rate=0.4, n_epoch=50, req_acc=1.0) res = net.evaluate(x=dataset['x_test'], y=dataset['y_test']) print_message(message='Evaluation on test data after training:')