def create_split_streams(frame_files,label_files,left_context,right_context): streams = [] for frame_file,label_file in izip(frame_files,label_files): frame_stream = data_io.stream_file(frame_file) frame_stream = data_io.context(frame_stream, left=left_context,right=right_context) label_stream = data_io.stream_file(label_file) stream = data_io.zip_streams(frame_stream,label_stream) streams.append(stream) return streams
def create_split_streams(frame_files, label_files, left_context, right_context): streams = [] for frame_file, label_file in izip(frame_files, label_files): frame_stream = data_io.stream_file(frame_file) frame_stream = data_io.context(frame_stream, left=left_context, right=right_context) label_stream = data_io.stream_file(label_file) stream = data_io.zip_streams(frame_stream, label_stream) streams.append(stream) return streams
def create_split_streams(frame_files,left_context,right_context): streams = [] for frame_file in frame_files: stream = data_io.stream_file(frame_file) stream = data_io.context(stream, left=left_context,right=right_context) stream = data_io.zip_streams(stream) streams.append(stream) return streams
def stream(): stream = data_io.random_select_stream(*[ data_io.stream_file('data/train.%02d.pklgz' % i) for i in xrange(1, 20) ]) stream = data_io.buffered_sort(stream, key=lambda x: x[1].shape[0], buffer_items=128) batched_stream = reader.batch_and_pad(stream, batch_size=16, mean=mean, std=std) batched_stream = data_io.buffered_random(batched_stream, buffer_items=4) return batched_stream
def validate(): stream = data_io.stream_file('data/train.%02d.pklgz' % 0) stream = data_io.buffered_sort(stream, key=lambda x: x[1].shape[0], buffer_items=128) batched_stream = reader.batch_and_pad(stream, batch_size=32, mean=mean, std=std) total_cost = 0 total_frames = 0 for data, lengths in batched_stream: batch_avg_cost = test(data,lengths) batch_frames = np.sum(lengths) total_cost += batch_avg_cost * batch_frames total_frames += batch_frames return total_cost / total_frames
P_learn = Parameters() updates = updates.adam(parameters,gradients,learning_rate=0.00025,P=P_learn) updates = normalise_weights(updates) print "Compiling..." train = theano.function( inputs=[X,l], outputs=batch_cost, updates=updates, ) test = theano.function(inputs=[X,l],outputs=batch_cost) print "Calculating mean variance..." rand_stream = data_io.random_select_stream(*[ data_io.stream_file('data/train.%02d.pklgz' % i) for i in xrange(1, 20) ]) mean, std, count = reader.get_normalise(rand_stream) print "Dataset count:", count def stream(): stream = data_io.random_select_stream(*[ data_io.stream_file('data/train.%02d.pklgz' % i) for i in xrange(1, 20) ]) stream = data_io.buffered_sort(stream, key=lambda x: x[1].shape[0], buffer_items=128) batched_stream = reader.batch_and_pad(stream, batch_size=16, mean=mean, std=std) batched_stream = data_io.buffered_random(batched_stream, buffer_items=4) return batched_stream