def make_embedding(CV, MODEL, DATA, EMBED): DATA_FOLD = DATA + f"/FOLD-{CV}" if not os.path.exists(EMBED): os.mkdir(EMBED) graph, features, labels = load_dataset(DATA, DATA_FOLD) fltr = GraphConv.preprocess(graph).astype('f4') fltr = ops.sp_matrix_to_sp_tensor(fltr) X_in = Input((features.shape[1], )) fltr_in = Input((features.shape[0], ), sparse=True) X_1 = GraphConv(512, 'relu', True, kernel_regularizer=l2(5e-4))([X_in, fltr_in]) X_1 = Dropout(0.5)(X_1) X_2 = GraphConv(256, 'relu', True, kernel_regularizer=l2(5e-4))([X_1, fltr_in]) X_2 = Dropout(0.5)(X_2) X_3 = GraphConv(128, 'relu', True, kernel_regularizer=l2(5e-4))([X_2, fltr_in]) X_3 = Dropout(0.5)(X_3) X_4 = GraphConv(64, 'linear', True, kernel_regularizer=l2(5e-4))([X_3, fltr_in]) X_5 = Dense(labels.shape[1], use_bias=True)(X_4) loaded_model = load_model(f"{MODEL}") model_without_task = Model(inputs=[X_in, fltr_in], outputs=X_4) model_without_task.set_weights(loaded_model.get_weights()[:8]) final_node_representations = model_without_task([features, fltr], training=False) save_embedding(final_node_representations, EMBED, DATA_FOLD, CV)
def get_image(): """Gets an image file via POST request, feeds the image to the FaceNet model then saves both the original image and its resulting embedding from the FaceNet model in their designated folders. 'uploads' folder: for image files 'embeddings' folder: for embedding numpy files. """ if request.method == 'POST': if 'file' not in request.files: return render_template("warning.html", status="No 'file' field in POST request!") file = request.files['file'] filename = file.filename if filename == "": return render_template("warning.html", status="No selected file!") if file and allowed_file(filename=filename, allowed_set=allowed_set): filename = secure_filename(filename=filename) # Read image file as numpy array of RGB dimension img = imread(name=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If a human face is detected if img is not None: embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size ) # Save cropped face image to 'uploads/' folder save_image(img=img, filename=filename, uploads_path=uploads_path) # Remove file extension from image filename for numpy file storage being based on image filename filename = remove_file_extension(filename=filename) # Save embedding to 'embeddings/' folder save_embedding(embedding=embedding, filename=filename, embeddings_path=embeddings_path) return render_template( "upload_result.html", status="Image uploaded and embedded successfully!" ) else: return render_template( "upload_result.html", status="Image upload was unsuccessful! No human face was detected!" ) else: return render_template("warning.html", status="POST HTTP method required!")
def get_image(): if request.method == 'POST': if 'file' not in request.files: return "No file part" file = request.files['file'] filename = file.filename if filename == "": return "No selected file" if file and allowed_file(filename=filename, allowed_set=allowed_set): # Read image file as numpy array of RGB dimension img = io.imread(fname=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If a human face is detected if img is not None: embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Save cropped face image to 'uploads/' folder save_image(img=img, filename=filename, uploads_path=uploads_path) # Remove file extension from image filename for numpy file storage being based on image filename filename = remove_file_extension(filename=filename) # Save embedding to 'embeddings/' folder save_embedding(embedding=embedding, filename=filename, embeddings_path=embeddings_path) return render_template( "upload_result.html", status="Image uploaded and embedded successfully!") else: return render_template( "upload_result.html", status= "Image upload was unsuccessful! No human face was detected." ) else: return "POST HTTP method required!"
def main(args): """ Characteristic function embedding wrapper. :param args: Arguments object parsed up. """ if args.model_type == "FEATHER": print("\nFitting a node embedding.\n") graph = load_graph(args.graph_input) features = load_features(args.feature_input) model = FEATHER() model.fit(graph, features) elif args.model_type == "FEATHER-G": print("\nFitting a graph level embedding.\n") graphs = load_graphs(args.graphs_input) model = FEATHERG() model.fit(graphs) else: quit() X = model.get_embedding() save_embedding(X, args.output)
def main(args): print("Loading data...") data = load_data(args.graphdir, supervised=False, with_authors=args.use_authors, collate_coauthorship=(not args.first_class_authors), undirected=True) # Switch case on main training function labels, embedding = { 'random': embed_random, 'lsa': embed_lsa, 'deepwalk': embed_deepwalk, 'gcn_cv_sc': embed_control_variate }[args.model](args, data) with open(os.path.join(args.out, MDS['args_file']), 'w') as argsfile: print(args, file=argsfile) embedding_file = os.path.join(args.out, MDS['embedding_file']) save_embedding(labels, embedding, embedding_file)
def point_embed_mesh1d(model, mesh1d, bounding_shape, **kwargs): ''' Embed points of mesh1d into Xd bounding shape. An attempt is made to insert intermediate points so that also edges are embedded ''' x = mesh1d.coordinates() foo = df.MeshFunction('size_t', mesh1d, 1, 0) foo.array()[:] = np.arange(1, 1 + mesh1d.num_cells()) df.File('foo.pvd') << foo mesh1d.init(1, 0) e2v = mesh1d.topology()(1, 0) topology = [list(e2v(e)) for e in range(mesh1d.num_entities(1))] target_l = trim.edge_lengths(mesh1d).vector().get_local() converged, nneeds = False, [mesh1d.num_cells()] niters = kwargs.get('niters', 5) base_geo = kwargs['save_geo'] for k in range(niters): # Some mesh which embeds points but where these points are not # necessarily edges if base_geo: kwargs['save_geo'] = '_'.join([base_geo, str(k)]) t = utils.Timer('%d-th iteration of %d point embedding' % (k, len(x)), 1) embedding_mesh, vmap = _embed_points(model, x, bounding_shape, **kwargs) t.done() assert _embeds_points(embedding_mesh, x, vmap) # See which edges need to be improved needs_embedding = _not_embedded_edges(topology, vmap, embedding_mesh) nneeds.append(len(filter(bool, needs_embedding))) utils.print_green(' ', '# edges need embedding %d (was %r)' % (nneeds[-1], nneeds[:-1])) converged = not any(needs_embedding) if kwargs['debug'] and k == niters - 1: gmsh.fltk.initialize() gmsh.fltk.run() # Here's some debugging functionality which saves progress on emebdding if kwargs['monitor']: # Force current mesh1d embedding help_topology = _force_embed_edges(deepcopy([list(vmap[edge]) for edge in topology]), embedding_mesh, needs_embedding, defaultdict(list)) # And see about the length of edges under that embedding new_l = _edge_lengths(embedding_mesh.coordinates(), help_topology, needs_embedding) np.savetxt(os.path.join(kwargs['monitor'], 'length_diff_iter%d.txt' % k), (new_l-target_l)/new_l) utils.print_green(' ', 'Max relative length error', np.max(new_l)) # And distance new_d = _edge_distances(embedding_mesh.coordinates(), help_topology, needs_embedding) np.savetxt(os.path.join(kwargs['monitor'], 'distance_diff_iter%d.txt' % k), new_d) utils.print_green(' ', 'Max relative distance error', np.max(new_d)) old_l = target_l.sum() new_l = new_l.sum() utils.print_green(' ', 'Target %g, Current %g, Relative Error %g' % (old_l, new_l, (new_l-old_l)/old_l)) # Save the edges which needed embedding embedding_mesh.init(1, 0) e2v = embedding_mesh.topology()(1, 0) edge_lookup = {tuple(sorted(e2v(e))): e for e in range(embedding_mesh.num_entities(1))} edge_f = df.MeshFunction('size_t', embedding_mesh, 1, 0) topology_as_edge = [] for tag, edge in enumerate(help_topology, 1): if needs_embedding[tag-1]: the_edge = [] for e in zip(edge[:-1], edge[1:]): edge_index = edge_lookup[tuple(sorted(e))] # assert edge_f[edge_index] == 0 # Never seen edge_f[edge_index] = tag the_edge.append(edge_index) topology_as_edge.append(the_edge) df.File(os.path.join(kwargs['monitor'], 'need_embedding_iter%d.pvd' % k)) << edge_f if converged: break # Insert auxiliary points and retry t = utils.Timer('%d-th iteration of point insert' % k, 1) x, topology = _embed_edges(topology, x, needs_embedding) t.done() assert len(topology) == mesh1d.num_cells() utils.print_green(' ', '# num points increased to %d' % len(x)) skew_embed_vertex = defaultdict(list) # We capitulate and make approximations; if not converged: utils.print_red(' ', 'Falling back to non-conforming `embedding`') if base_geo: kwargs['save_geo'] = '_'.join([base_geo, str(niters)]) embedding_mesh, vmap = _embed_points(model, x, bounding_shape, **kwargs) assert _embeds_points(embedding_mesh, x, vmap) needs_embedding = _not_embedded_edges(topology, vmap, embedding_mesh) # We "embed" the mesh using __only__ existing vertices - translate topology topology = [list(vmap[edge]) for edge in topology] # An edges that need embedding is a branch with terminal vertices - so the # idea is to insert the interior path vertices t = utils.Timer('Force embedding edges', 1) topology = _force_embed_edges(topology, embedding_mesh, needs_embedding, skew_embed_vertex) t.done() if kwargs['monitor']: # And see about the length of edges under that embedding new_l = _edge_lengths(embedding_mesh.coordinates(), topology, needs_embedding) np.savetxt(os.path.join(kwargs['monitor'], 'length_diff_final.txt'), (new_l-target_l)/target_l) utils.print_green(' ', 'Max relative length error', np.max(new_l)) # And distance new_d = _edge_distances(embedding_mesh.coordinates(), topology, needs_embedding) np.savetxt(os.path.join(kwargs['monitor'], 'distance_diff_final.txt'), new_d) utils.print_green(' ', 'Max relative distance error', np.max(new_d)) old_l = target_l.sum() new_l = new_l.sum() utils.print_green(' ', 'Target %g, Current %g, Relative Error %g' % (old_l, new_l, (new_l-old_l)/old_l)) # Save the edges which needed embedding embedding_mesh.init(1, 0) e2v = embedding_mesh.topology()(1, 0) edge_lookup = {tuple(sorted(e2v(e))): e for e in range(embedding_mesh.num_entities(1))} edge_f = df.MeshFunction('size_t', embedding_mesh, 1, 0) topology_as_edge = [] for tag, edge in enumerate(topology, 1): if needs_embedding[tag-1]: the_edge = [] for e in zip(edge[:-1], edge[1:]): edge_index = edge_lookup[tuple(sorted(e))] # assert edge_f[edge_index] == 0 # Never seen edge_f[edge_index] = tag the_edge.append(edge_index) topology_as_edge.append(the_edge) df.File(os.path.join(kwargs['monitor'], 'need_embedding_final.pvd')) << edge_f else: # Since the original 1d mesh likely has been changed we give # topology wrt to node numbering of the embedding mesh topology = [list(vmap[edge]) for edge in topology] assert len(topology) == mesh1d.num_cells() t = utils.Timer('Fishing for edges', 1) # Need to color the edge function; embedding_mesh.init(1, 0) e2v = embedding_mesh.topology()(1, 0) edge_lookup = {tuple(sorted(e2v(e))): e for e in range(embedding_mesh.num_entities(1))} edge_f = df.MeshFunction('size_t', embedding_mesh, 1, 0) topology_as_edge = [] for tag, edge in enumerate(topology, 1): the_edge = [] for e in zip(edge[:-1], edge[1:]): edge_index = edge_lookup[tuple(sorted(e))] # assert edge_f[edge_index] == 0 # Never seen edge_f[edge_index] = tag the_edge.append(edge_index) topology_as_edge.append(the_edge) encode_edge = lambda path: [edge_lookup[tuple(sorted(e))] for e in zip(path[:-1], path[1:])] # Finally encode skew edges as edges skew_embed_edge = {k: map(encode_edge, edge_as_vertex) for k, edge_as_vertex in skew_embed_vertex.items()} t.done() df.File('foo_final.pvd') << edge_f ans = utils.LineMeshEmbedding(embedding_mesh, # The others were not part of original data vmap[:mesh1d.num_vertices()], edge_f, utils.EdgeMap(topology, topology_as_edge), utils.EdgeMap(skew_embed_vertex, skew_embed_edge)) kwargs['save_embedding'] and utils.save_embedding(ans, kwargs['save_embedding']) return ans
model = Sequential() embedding = Embedding(vocab_size, embedding_size, input_length=max_len, weights=[embedding_matrix]) model.add(embedding) model.add(Flatten()) model.add(Dropout(0.2)) model.add(Dense(100, activation='sigmoid')) model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() model.compile(loss=loss, optimizer=optimizer, metrics=metrics) model.fit(data, labels, epochs=100, verbose=1, batch_size=32, shuffle=True, validation_data=(test_data, test_labels)) save_embedding('glove-embedding_labeled.txt', embedding.get_weights()[0], vocab) tsne_plot(embedding, vocab, figure_name='glove-embedding_labeled', max_words=200, pos=['ADJ', 'VERB', 'NOUN'])
def test_mlp(learning_rate=0.01, L2_reg=0.00000001, n_epochs=2000, dataset='theano.join.data', ref_dataset = 'ref.theano.join.data', batch_size=10000, max_iter = 5000, output='theano.model.out', validation_freq = 100, ada_epsilon = 0.000001, alpha_share = 0.9, reg_join = 10, map_file = "labels.mapping", bidict_file = 'theano.en.sv.translation', english_file = ''): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path to the theano.classifier.data """ print (" Learning with params : ") print (" Learning rate : " + str(learn_rate)); print (" Regularlization params : " + str(L2_reg)) print (" Alpha of tieing together : " + str(alpha_share)) print (" Batch size : " + str(batch_size)) print (" Max Iter : " + str(max_iter)) print (" Evaluation frequency : " + str(validation_freq)) print ('... loading data ') ##### LOAD DATASET ORIGINAL and REF ############## print (' ----> load the mapping matrix ') mapping_matrix = load_mapping_matrix(map_file) print (' ----> load translation vectors ') ref_tras_idx, tras_idx = load_translation_vector(bidict_file) print (' ----> load the original data ') datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] E = datasets[2] W1 = datasets[3] B1 = datasets[4] W2 = datasets[5] print (' ----> load the ref data ') ref_datasets = load_data(ref_dataset) ref_train_set_x, ref_train_set_y = ref_datasets[0] ref_valid_set_x, ref_valid_set_y = ref_datasets[1] refE = ref_datasets[2] refW1 = ref_datasets[3] refB1 = ref_datasets[4] refW2 = ref_datasets[5] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.owner.inputs[0].get_value(borrow=True).shape[0] / batch_size n_ref_train_batches = ref_train_set_x.owner.inputs[0].get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.owner.inputs[0].get_value(borrow=True).shape[0] / batch_size if train_set_x.owner.inputs[0].get_value(borrow=True).shape[0] % batch_size > 100: n_train_batches +=1 if valid_set_x.owner.inputs[0].get_value(borrow=True).shape[0] % batch_size > 100 : n_valid_batches +=1 print 'Training batches : ' + str(n_train_batches) print 'Ref training batches : ' + str(n_ref_train_batches) print 'Valid batches : ' + str(n_valid_batches) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch ref_index = T.lscalar() # Reference index to the source minibatch x = T.imatrix('x') # the data is presented as rasterized images xref = T.imatrix('xref') # the data is presented as rasterized images yref = T.ivector('yref') # the labels are presented as 1D vector of y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) ###### DROP OUT RATE ############# dropout_rate_hidden = 0.5 dropout_rate_visible = 0.2 ############################# # construct the MLP class classifier = MLP(rng, input=x, refInput=xref, E=E, W1=W1, B1=B1, W2 = W2, refE = refE, refW1 = refW1, refB1 = refB1, refW2 = refW2, mapping = mapping_matrix, drop_out_rate=dropout_rate_hidden, drop_out_embedding_rate=dropout_rate_visible, ref_tras_idx = ref_tras_idx, tras_idx = tras_idx, ) train_errors = (classifier.errors(y)) if (alpha_share > 1): raise ValueError(" Value of Alpha must be [0,1] ") cost = ( alpha_share * classifier.negative_log_likelihood(y) + (1 - alpha_share) * classifier.refNegative_log_likelihood(yref) + L2_reg * classifier.L2_sqr + reg_join * classifier.reg_L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch (remember index should always to even) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], # x,y here is symbolic variable y: valid_set_y[index * batch_size: (index + 1) * batch_size] #xref: numpy.zeros(batch_size), #yref: numpy.zeros(batch_size) } ) # compute the gradient of cost with respect to theta gparams = [T.grad(cost, param) for param in classifier.params] # Put the adagrad here #learning_rate = T.fscalar('lr') # learning rate to use updates = OrderedDict() for accugrad, param, gparam in zip(classifier._accugrads, classifier.params, gparams): agrad = accugrad + gparam * gparam dx = - (learning_rate / T.sqrt(agrad + ada_epsilon)) * gparam updates[param] = param + dx updates[accugrad] = agrad # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index,ref_index], outputs=(cost, train_errors), updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], # x,y here is symbolic variable y: train_set_y[index * batch_size: (index + 1) * batch_size], xref: ref_train_set_x[ref_index * batch_size: (ref_index + 1) * batch_size], yref: ref_train_set_y[ref_index * batch_size: (ref_index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training ' # early-stopping parameters patience = 2000 # Long Duong : At least have to went through this much iteration patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant #validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch validation_frequency = validation_freq ######## FOR TESTING ONLY ################## #validation_frequency = 5 #n_train_batches = 10 #n_epochs = 1 ###################################### best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False ref_batch_idx = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): (minibatch_avg_cost, minibatch_avg_error) = train_model(minibatch_index, ref_batch_idx) ref_batch_idx += 1 if ref_batch_idx >= n_ref_train_batches: ref_batch_idx = 0 # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index print (' Iteration : ' + str(iter) + ' with Cost (join) = ' + str(minibatch_avg_cost) + ' with errors (target only) = ' + str(minibatch_avg_error)) # Long Duong : since in each epoch => n_train_batches has covered # iter : is the number of update for the parameters (~ number of batches considered) if (iter + 1) % validation_frequency == 0: # Note that because we validation_losses = [validate_model( i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index , n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): # Long Duong : this is the key : need iter to get this good result => Waiting this much iter to expect # other better result .... patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # Save the model save_model(output,classifier.embeddingLayer.E.get_value(), (classifier.dropout_HiddenLayer.W.get_value() * (1-dropout_rate_visible )).T, classifier.dropout_HiddenLayer.b.get_value(), (classifier.dropout_LogRegressionLayer.W.get_value() * (1- dropout_rate_hidden)).T) if english_file is not None: save_embedding(english_file,classifier.refEmbeddingLayer.E.get_value(),classifier.embeddingLayer.E.get_value()) # Long Duong : add max_iter criterion if (patience <= iter) or (iter > max_iter) : done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i') % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
vocab, couples, labels = process_data(train_filename, window_size) # generate model vocab_size = len(vocab) word_target, word_context = zip(*couples) input_target = Input((1,)) input_context = Input((1,)) embedding = Embedding(vocab_size, embedding_size, input_length=1) target = embedding(input_target) target = Reshape((embedding_size,))(target) context = embedding(input_context) context = Reshape((embedding_size,))(context) dot_product = dot([target, context], 1) dot_product = Reshape((1,))(dot_product) output = Dense(1, activation='sigmoid')(dot_product) model = Model(input=[input_target, input_context], output=output) model.summary() model.compile(loss=loss, optimizer=optimizer, metrics=metrics) model.fit_generator(batch_generator(word_target, word_context, labels, batch_size), steps_per_epoch=batch_size, epochs=epochs) save_embedding('skipgram-embedding_labeled.txt', embedding.get_weights()[0], vocab)
def line_embed_mesh1d(model, mesh1d, bounding_shape, **kwargs): '''Embed mesh1d in Xd square mesh''' time_model = utils.Timer('Line embedding model definition', 1) npoints, tdim = mesh1d.coordinates().shape # Figure out how to bound it counts = bounding_shape.create_volume(model, mesh1d.coordinates()) # In gmsh Point(4) will be returned as fourth node vertex_map = [] # mesh_1d.x[i] is embedding_mesh[vertex_map[i]] if tdim == 2: for xi in mesh1d.coordinates(): vertex_map.append(model.geo.addPoint(*np.r_[xi, 0]) - 1) else: for xi in mesh1d.coordinates(): vertex_map.append(model.geo.addPoint(*xi) - 1) vertex_map = np.array(vertex_map) # Dolfin to gmsh # Add lines of 1d mesh1d.init(1, 0) e2v = mesh1d.topology()(1, 0) lines, edge_encoding = [], [] for edge in tqdm.tqdm(range(mesh1d.num_entities(1))): v0, v1 = vertex_map[e2v(edge)] + 1 line = model.geo.addLine(v0, v1) # There will be a edge function such that edge corresponding # to edge `i` in mesh1d will have tag `i` model.addPhysicalGroup(1, [line], edge + 1) lines.append(line) # FIXME: edge_encoding.append([v0 - 1, v1 - 1]) model.addPhysicalGroup(tdim, [counts[tdim]], 1) model.geo.synchronize() model.mesh.embed(1, lines, tdim, counts[tdim]) model.geo.synchronize() # -- time_model.done() if kwargs['debug']: gmsh.fltk.initialize() gmsh.fltk.run() kwargs['save_geo'] and gmsh.write('%s.geo_unrolled' % kwargs['save_geo']) time_gen = utils.Timer('Generation line embedded mesh', 1) model.mesh.generate(tdim) time_gen.done() kwargs['save_msh'] and gmsh.write('%s.msh' % kwargs['save_msh']) time_conv = utils.Timer('Mesh conversion', 1) # FIXME: as part of debugging do this with mesh convert if kwargs.get('return_mesh_only', False): return conversion.mesh_from_gmshModel(model, include_mesh_functions=None)[0] # maybe the mesh_fs[1] is wrong embedding_mesh, mesh_fs = conversion.mesh_from_gmshModel( model, include_mesh_functions=1) time_conv.done() gmsh.clear() time_edge_encode = utils.Timer('Fishing for embedded edges', 1) edge_f = mesh_fs[1] edge_values = edge_f.array() embedding_mesh.init(1, 0) e2v = embedding_mesh.topology()(1, 0) x = embedding_mesh.coordinates() # It remains to account for the nodes that might have been inserted # on the edge E2V = mesh1d.topology()(1, 0) topology_as_edge = [] # FIXME: rewrite in terms of mesh1d? for tag, edge in enumerate(edge_encoding, 1): edges, = np.where(edge_values == tag) topology_as_edge.append(list(edges)) if len(edges) > 1: nodes = np.unique(np.hstack([e2v(e) for e in edges])) assert set(edge) <= set(nodes), ( edge, nodes, tag, embedding_mesh.coordinates()[edge], embedding_mesh.coordinates()[nodes], mesh1d.coordinates()[E2V(tag - 1)]) # print(edge, nodes, tag) # NOTE: Here we use the fact that we have a straight line so # we simply order interior nodes of the edge by their distance # from start idx = np.argsort(np.linalg.norm(x[nodes] - x[edge[0]], 2, axis=1)) nodes = nodes[idx] assert nodes[-1] == edge[1], (tag, edge, nodes) # Insder them< for i, n in enumerate(nodes[1:-1], 1): edge.insert(i, n) time_edge_encode.done() # Combine edge_encoding = utils.EdgeMap(edge_encoding, topology_as_edge) skew_encoding = utils.EdgeMap({}, {}) ans = utils.LineMeshEmbedding(embedding_mesh, vertex_map, edge_f, edge_encoding, skew_encoding) kwargs['save_embedding'] and utils.save_embedding(ans, kwargs['save_embedding']) return ans
def test_mlp(learning_rate=0.01, L2_reg=0.00000001, n_epochs=2000, dataset='theano.join.data', ref_dataset = 'ref.theano.join.data', batch_size=10000, max_iter = 5000, output='theano.model.out', validation_freq = 100, ada_epsilon = 0.000001, alpha_share = 0.9, map_file = "labels.mapping", english_file = ''): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path to the theano.classifier.data """ print (" Learning with params : ") print (" Learning rate : " + str(learn_rate)); print (" Regularlization params : " + str(L2_reg)) print (" Alpha of tieing together : " + str(alpha_share)) print (" Batch size : " + str(batch_size)) print (" Max Iter : " + str(max_iter)) print (" Evaluation frequency : " + str(validation_freq)) print ('... loading data ') ##### LOAD DATASET ORIGINAL and REF ############## print (' ----> load the mapping matrix ') mapping_matrix = load_mapping_matrix(map_file) print (' ----> load the original data ') datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] E = datasets[2] W1 = datasets[3] B1 = datasets[4] W2 = datasets[5] print (' ----> load the ref data ') ref_datasets = load_data(ref_dataset) ref_train_set_x, ref_train_set_y = ref_datasets[0] ref_valid_set_x, ref_valid_set_y = ref_datasets[1] refE = ref_datasets[2] refW1 = ref_datasets[3] refB1 = ref_datasets[4] refW2 = ref_datasets[5] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.owner.inputs[0].get_value(borrow=True).shape[0] / batch_size n_ref_train_batches = ref_train_set_x.owner.inputs[0].get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.owner.inputs[0].get_value(borrow=True).shape[0] / batch_size if train_set_x.owner.inputs[0].get_value(borrow=True).shape[0] % batch_size > 100: n_train_batches +=1 if valid_set_x.owner.inputs[0].get_value(borrow=True).shape[0] % batch_size > 100 : n_valid_batches +=1 print 'Training batches : ' + str(n_train_batches) print 'Ref training batches : ' + str(n_ref_train_batches) print 'Valid batches : ' + str(n_valid_batches) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch ref_index = T.lscalar() # Reference index to the source minibatch x = T.imatrix('x') # the data is presented as rasterized images xref = T.imatrix('xref') # the data is presented as rasterized images yref = T.ivector('yref') # the labels are presented as 1D vector of y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) ###### DROP OUT RATE ############# dropout_rate_hidden = 0.5 dropout_rate_visible = 0.2 ############################# # construct the MLP class classifier = MLP(rng, input=x, refInput=xref, E=E, W1=W1, B1=B1, W2 = W2, refE = refE, refW1 = refW1, refB1 = refB1, refW2 = refW2, mapping = mapping_matrix, drop_out_rate=dropout_rate_hidden, drop_out_embedding_rate=dropout_rate_visible ) train_errors = (classifier.errors(y)) if (alpha_share > 1): raise ValueError(" Value of Alpha must be [0,1] ") cost = ( alpha_share * classifier.negative_log_likelihood(y) + (1 - alpha_share) * classifier.refNegative_log_likelihood(yref) + L2_reg * classifier.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch (remember index should always to even) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], # x,y here is symbolic variable y: valid_set_y[index * batch_size: (index + 1) * batch_size] #xref: numpy.zeros(batch_size), #yref: numpy.zeros(batch_size) } ) # compute the gradient of cost with respect to theta gparams = [T.grad(cost, param) for param in classifier.params] # Put the adagrad here #learning_rate = T.fscalar('lr') # learning rate to use updates = OrderedDict() for accugrad, param, gparam in zip(classifier._accugrads, classifier.params, gparams): agrad = accugrad + gparam * gparam dx = - (learning_rate / T.sqrt(agrad + ada_epsilon)) * gparam updates[param] = param + dx updates[accugrad] = agrad # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index,ref_index], outputs=(cost, train_errors), updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], # x,y here is symbolic variable y: train_set_y[index * batch_size: (index + 1) * batch_size], xref: ref_train_set_x[ref_index * batch_size: (ref_index + 1) * batch_size], yref: ref_train_set_y[ref_index * batch_size: (ref_index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training ' # early-stopping parameters patience = 2000 # Long Duong : At least have to went through this much iteration patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant #validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch validation_frequency = validation_freq ######## FOR TESTING ONLY ################## #validation_frequency = 5 #n_train_batches = 10 #n_epochs = 1 ###################################### best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False ref_batch_idx = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): (minibatch_avg_cost, minibatch_avg_error) = train_model(minibatch_index, ref_batch_idx) ref_batch_idx += 1 if ref_batch_idx >= n_ref_train_batches: ref_batch_idx = 0 # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index print (' Iteration : ' + str(iter) + ' with Cost (join) = ' + str(minibatch_avg_cost) + ' with errors (target only) = ' + str(minibatch_avg_error)) # Long Duong : since in each epoch => n_train_batches has covered # iter : is the number of update for the parameters (~ number of batches considered) if (iter + 1) % validation_frequency == 0: # Note that because we validation_losses = [validate_model( i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index , n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): # Long Duong : this is the key : need iter to get this good result => Waiting this much iter to expect # other better result .... patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # Save the model save_model(output,classifier.embeddingLayer.E.get_value(), (classifier.dropout_HiddenLayer.W.get_value() * (1-dropout_rate_visible )).T, classifier.dropout_HiddenLayer.b.get_value(), (classifier.dropout_LogRegressionLayer.W.get_value() * (1- dropout_rate_hidden)).T) # Now save the nglish model if english_file is not None: save_embedding(english_file,classifier.refEmbeddingLayer.E.get_value(),classifier.embeddingLayer.E.get_value()) # Long Duong : add max_iter criterion if (patience <= iter) or (iter > max_iter) : done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i') % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def get_image(): """Gets an image file via POST request, feeds the image to the FaceNet model then saves both the original image and its resulting embedding from the FaceNet model in their designated folders. 'uploads' folder: for image files 'embeddings' folder: for embedding numpy files. """ if request.method == 'POST': if 'file' not in request.files: return "No 'file' field in POST request!" list_success=[] #file = request.files['file'] for file in request.files.getlist('file'): filename = file.filename if filename == "": return "No selected file!" if file and allowed_file(filename=filename, allowed_set=allowed_set): filename = secure_filename(filename=filename) try: # Read image file as numpy array of RGB dimension img = imread(name=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face( img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size ) # If a human face is detected if img is not None: embedding = img_to_encoding( img,FRmodel ) # Save cropped face image to 'uploads/' folder save_image(img=img, filename=filename, uploads_path=uploads_path) # Remove file extension from image filename for numpy file storage being based on image filename filename = remove_file_extension(filename=filename) # Save embedding to 'embeddings/' folder save_embedding( embedding=embedding, filename=filename, embeddings_path=embeddings_path ) embedding_dict[filename]=embedding list_success.append(filename) #return "Image uploaded and embedded successfully:- "+str(filename) #else: # return "Image upload was unsuccessful! No human face was detected!" except : return 'error'+str(filename)+'Image uploaded and embedded successfully ' +str(len(list_success)) return "Image uploaded and embedded successfully:- "+str(len(list_success)) else: return "POST HTTP method required!"
def detail(): ''' Detects text and face in Aadhaar Card ''' if request.method == 'POST': # saving current timestamp current_time = str(datetime.datetime.now()).replace('-', '_').replace( ':', '_') # The type of image i.e. Front or Back image image_type1 = 'Front' image_type2 = 'Back' # Path for Front image and the face image that will be croppped filename1 = uploads_path + image_type1 + '/' + current_time + '.jpg' photo_path = uploads_path + image_type1 + '/' + 'faces' + '/' + current_time + '.png' # Path for Back image and the face image that will be croppped filename2 = uploads_path + image_type2 + '/' + current_time + '.jpg' crop_path = uploads_path + image_type2 + '/temp/' + current_time + '.png' # if the Front folder (in uploads) doesn't already exist, create it if not os.path.exists(uploads_path + image_type1): os.mkdir(uploads_path + image_type1) # directory for saving faces in the id cards os.mkdir(uploads_path + image_type1 + '/' + 'faces') # if the Back folder (in uploads) doesn't already exist, create it if not os.path.exists(uploads_path + image_type2): os.mkdir(uploads_path + image_type2) os.mkdir(uploads_path + image_type2 + '/temp') # variable to store details extracted from card details = {} # get Front Card Photo from user photo1 = request.files['photo-front'] photo1.save(filename1) # get Front Card Photo from user photo2 = request.files['photo-back'] photo2.save(filename2) print("Processing Front Image ......") # Process The Front Card Image data, photo_path = recognise_text(filename1, photo_path) details = get_labels_from_aadhar(data) print("Processing Front Image ...... DONE") print("Processing Back Image .......") # Process The Back Card Image crop_aadhar(filename2, crop_path) data2, photo_path2 = recognise_text(crop_path, 'none') details.update(get_address(data2)) print("Processing Back Image ....... DONE") os.remove(crop_path) data_dict = { 'status': True, 'fields': details, 'image_path_front': filename1, 'image_path_back': filename2, 'photo_path': photo_path } print("save into json files") # the json file where the output must be stored with open('myfile.json', 'a+') as out_file: json.dump(data_dict, out_file, indent=6) img = imread(name=photo_path, mode='RGB') print("Processing Face Image .......") # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) print("Processing Face Image ....... DONE") # Save The Face embedding as the name of the Person filename = data_dict['fields']['Name'] filename = secure_filename(filename=filename) # Save embedding to 'embeddings/' folder save_embedding(embedding=embedding, filename=filename, embeddings_path=embeddings_path) # Write the Raw and Cleaned Text detected from the Card with open('outputs.txt', 'a+') as f: f.write( "##########################################################################\n\n" ) f.write( '######################## Raw Output for Front Card Image #############################\n\n' ) for value in data: f.write(str(value) + '\n') f.write( "##########################################################################\n\n" ) f.write( '######################## Raw Output for Back Card Image #############################\n\n' ) for value in data2: f.write(str(value) + '\n') f.write( '\n\n######################## Cleaned Output #############################\n\n' ) for key, value in details.items(): f.write(str(key) + ' : ' + str(value) + '\n') f.write( "##########################################################################\n\n" ) return jsonify(data_dict) else: # if not POST, terminate return jsonify({'status': False})