def homses(): dir = "training_image/" loc = Name + str(roll_num) alignment.align(dir, loc, Name, roll_num) #engines(text="faces are aligned") return render_template('trying.html', prediction_text="Faces Are Now Aligned")
def __init__(self, root, name='testing', mode='paired'): self.feat = make_dataset(os.path.join(root,name,'style')) self.input = make_dataset(os.path.join(root,name,'input')) self.mode = mode self.name = name self.trans = make_trans() if 'no_align' in self.mode: align(self.input, self.feat) print(len(self))
def align_wordvectors(*wvs, method="global"): target = wvs[0] aligned = [target] for wv in wvs[1:]: if method == "global": wv, tg, Q = align(wv, target) elif method == "noise_aware": Q, alpha, l, k = noise_aware(wv.vectors, target.vectors) wv.vectors = np.dot(wv.vectors, Q) aligned.append(wv) return aligned
def align(request, likelihood, path, name): a = alignment.align(name, likelihood, path, rel_width=0.3) score = data.get_score(name) audio_points, score_points = a.get_events(score) return HttpResponse( json.dumps({ 'audio': audio_points, 'score': score_points, 'duration': score.length() }))
def main(c1, c1_tag, c2, c2_tag, out, format): global dribble_file dribpath = out + ".log" with open(dribpath, "w") as dribfile: dribble.dribble_file = dribfile dribble.log("\nLogging to %s" % (dribpath, )) A = cl.read_checklist(c1, c1_tag + ".", "low-checklist") B = cl.read_checklist(c2, c2_tag + ".", "high-checklist") dribble.log("Node counts: %s %s" % (len(A.get_all_nodes()), len(B.get_all_nodes()))) # Map each B to a corresponding A dribble.log("Aligning ...") (al, xmrcas) = alignment.align(B, A) dribble.log(" ... finished aligning; %s articulations\n" % len(al)) # Where do xmrcas come from? write_report(A, B, al, xmrcas, format, out) dribble.dribble_file = None
def main(_): train_dir = os.path.join(FL.output_dir, 'train') if not os.path.exists(FL.output_dir): os.mkdir(FL.output_dir) if not os.path.exists(train_dir): os.mkdir(train_dir) f_tr = open(os.path.join(FL.output_dir, 'train.txt'), 'w') file_calibration = os.path.join(FL.input_dir, 'calib_cam_to_cam.txt') calib_camera = get_line(file_calibration, 'P_rect_02') imgs = sorted(os.listdir(os.path.join(FL.input_dir, 'img'))) logging.info('Total {} images in {}'.format(len(imgs), FL.input_dir)) ct = 1 triplet, seg_triplet = [], [] for i in range(0, len(imgs), STEPSIZE): img_file = os.path.join(FL.input_dir, 'img', imgs[i]) segimg_file = os.path.join(FL.input_dir, 'segimg', imgs[i].replace('.png', '-seg.png')) logging.info('Processing {} ...'.format(img_file)) img = cv2.imread(img_file) if os.path.exists(segimg_file): segimg = cv2.imread(segimg_file, 0) # read as grayscale else: segimg = np.zeros(shape=(img.shape[0], img.shape[1])) # all black img, segimg, cam_intr = img_scale(img, segimg, calib_camera) calib_representation = ','.join([str(c) for c in cam_intr.flatten()]) triplet.append(img) seg_triplet.append(segimg) # if there are enough frames for a triplet if len(triplet)==3: output_name = str(ct).zfill(10) cmb = np.hstack(triplet) #align1, align2, align3 = seg_triplet[0], seg_triplet[1], seg_triplet[2] align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2]) cmb_seg = np.hstack([align1, align2, align3]) cv2.imwrite(os.path.join(train_dir, output_name + '.png'), cmb) cv2.imwrite(os.path.join(train_dir, output_name + '-fseg.png'), cmb_seg) f = open(os.path.join(train_dir, output_name + '_cam.txt'), 'w') f.write(calib_representation) f.close() f_tr.write('{} {}\n'.format('train', output_name)) del triplet[0] del seg_triplet[0] ct+=1 f_tr.close()
def run_all(): img1 = cv2.imread( '/home/ee401_2/ferdyan_train/data/kitti_raw/2011_09_26/2011_09_26_drive_0048_sync/image_02_new/data/0000000002.png' ) img2 = cv2.imread( '/home/ee401_2/ferdyan_train/data/kitti_raw/2011_09_26/2011_09_26_drive_0048_sync/image_02_new/data/0000000003.png' ) img3 = cv2.imread( '/home/ee401_2/ferdyan_train/data/kitti_raw/2011_09_26/2011_09_26_drive_0048_sync/image_02_new/data/0000000004.png' ) gbr1, gbr2, gbr3 = align(img1, img2, img3, threshold_same=0.1) cv2.imwrite(OUTPUT_DIR + 'gbr1.png', gbr1) cv2.imwrite(OUTPUT_DIR + 'gbr2.png', gbr2) cv2.imwrite(OUTPUT_DIR + 'gbr3.png', gbr3) print('done')
def alignment(pocket, proj_direction): """Principal Axes Alignment Returns transformation coordinates(matrix: X*3)""" pocket_coords = np.array([pocket.x, pocket.y, pocket.z]).T pocket_center = np.mean(pocket_coords, axis=0) # calculate mean of each column pocket_coords = pocket_coords - pocket_center # Centralization inertia = np.cov( pocket_coords.T) # get covariance matrix (of centralized data) e_values, e_vectors = np.linalg.eig( inertia) # linear algebra eigenvalue eigenvector sorted_index = np.argsort( e_values)[::-1] # sort eigenvalues (increase)and reverse (decrease) sorted_vectors = e_vectors[:, sorted_index] transformation_matrix = align(sorted_vectors, proj_direction) transformed_coords = (np.matmul(transformation_matrix, pocket_coords.T)).T return transformed_coords
def compare_videos(self, path1, path2, write_skeleton=False, skeleton_out1='', skeleton_out2='', write_aligned=False, aligned_out1='', aligned_out2='', write_combined=False, combined_out=''): frames1, frames2, fps, shape1, shape2 = align(path1, path2, outpath1=aligned_out1, outpath2=aligned_out2, write=write_aligned) cvOut1 = [] cvOut2 = [] for i in tqdm(range(len(frames1))): datum1, datum2 = self.process_image_pair(frames1[i], frames2[i]) cvOut1.append(datum1.cvOutputData) cvOut2.append(datum2.cvOutputData) if write_skeleton: print('1/2') self.write_video(skeleton_out1, cvOut1, fps, shape1) print('2/2') self.write_video(skeleton_out2, cvOut2, fps, shape2) if write_combined: check_alignment(frames1, frames2, fps, shape1, shape2, combined_out) return self.dance_end()
def compare_sequences(sequence, ncbiSeq): # the available commandline alignment software required fasta file names to be provided, # so an alignment method was required that would take sequences as input seqs = align(sequence, ncbiSeq) # propagates errors from the alignment process if seqs is False: return -1 # separate the returned tuple seq, ncbiSeq = seqs outputSeq = "" # add each residue to the output sequence, taking uppercase letters from the structure sequence where they are present, # and lowercase letters from the ncbi sequence where no structure sequence is present for i in range(len(seq)): if seq[i] == "-": outputSeq += ncbiSeq[i].lower() else: outputSeq += seq[i] return outputSeq
def main(): """ Runs main experiments using self supervised alignment. """ # wv_source = "wordvectors/latin/corpus1/0.vec" # wv_target = "wordvectors/latin/corpus2/0.vec" # wv_source = "wordvectors/source/theguardianuk.vec" # wv_target = "wordvectors/source/thenewyorktimes_1.vec" wv_source = "wordvectors/semeval/latin-corpus1.vec" wv_target = "wordvectors/semeval/latin-corpus2.vec" # wv_source = "wordvectors/usuk/bnc.vec" # wv_target = "wordvectors/usuk/coca_mag.vec" # wv_source = "wordvectors/artificial/NYT-0.vec" # wv_target = "wordvectors/artificial/NYT-500_random.vec" plt.style.use("seaborn") # Read WordVectors normalized = False wv1 = WordVectors(input_file=wv_source, normalized=normalized) wv2 = WordVectors(input_file=wv_target, normalized=normalized) wv1, wv2 = intersection(wv1, wv2) landmarks, non_landmarks, Q = s4(wv1, wv2, cls_model="nn", n_targets=100, n_negatives=100, rate=1, t=0.5, iters=100, verbose=1, plot=1) wv1, wv2, Q = align(wv1, wv2, anchor_words=landmarks) d_l = [cosine(wv1[w], wv2[w]) for w in landmarks] d_n = [cosine(wv1[w], wv2[w]) for w in non_landmarks] sns.distplot(d_l, color="blue") sns.distplot(d_n, color="red") plt.legend() plt.show()
def main(): """ The following experiments are available: - Find most stable words in each ArXiv category (cs, math, cond-mat, physics) - Find most unstable (changed) words in earch category - Finds stable/unstable words across categories - Using different alignment strategies """ parser = argparse.ArgumentParser() parser.add_argument("cat1", type=str, help="Name of first arXiv category") parser.add_argument("cat2", type=str, help="Name of second arXiv category") args = parser.parse_args() cat1 = args.cat1 cat2 = args.cat2 cat1_name = cat1.split("/")[-1] cat2_name = cat2.split("/")[-1] # cat1_name = cat1.split("_")[2].rstrip(".vec") # cat2_name = cat2.split("_")[2].rstrip(".vec") path_out = "results/arxiv/" wva = WordVectors(input_file=cat1) wvb = WordVectors(input_file=cat2) wva, wvb = intersection(wva, wvb) wva, wvb, Q = align(wva, wvb) words = wva.words print("-- Common vocab", len(words)) # each column of this matrix will store a set of results for a method out_grid = np.zeros((len(words), 5)) d = distribution_of_change(wva, wvb) print("====== GLOBAL") print("=> landmarks", len(wva.words)) print_table(d, wva.words) out_grid[:, 0] = d # add first column print("====== Noise Aware") Q, alpha, landmarks, noisy = noise_aware(wva.vectors, wvb.vectors) wva, wvb, Q = align(wva, wvb, anchor_words=landmarks) print("=> landmarks", len(landmarks)) d = distribution_of_change(wva, wvb) print_table(d, wva.words) out_grid[:, 1] = d # add new column print("===== SELF") landmarks, nonl, Q = s4(wva, wvb, iters=100, verbose=1) wva, wvb, Q = align(wva, wvb, anchor_words=landmarks) d = distribution_of_change(wva, wvb) print_table(d, wva.words) out_grid[:, 2] = d # last column # WRITE-OUT with open(os.path.join(path_out, "%s-%s.csv" % (cat1_name, cat2_name)), "w") as fout: fout.write("word,global,noise-aware,self,top,bot\n") for i, w in enumerate(words): fout.write("%s,%.3f,%.3f,%.3f,%.3f,%.3f\n" % (w, out_grid[i][0], out_grid[i][1], out_grid[i][2], out_grid[i][3], out_grid[i][4]))
ORIGINAL_HEIGHT, ORIGINAL_WIDTH, _ = img0.shape zoom_x = WIDTH/ORIGINAL_WIDTH zoom_y = HEIGHT/ORIGINAL_HEIGHT # Adjust intrinsics. calib_current = calib_camera.copy() calib_current[0, 0] *= zoom_x calib_current[0, 2] *= zoom_x calib_current[1, 1] *= zoom_y calib_current[1, 2] *= zoom_y calib_representation = ','.join([str(c) for c in calib_current.flatten()]) if wrt == 3: img0, img1, img2 = align(img0, img1, img2, threshold_same=0.5) img0 = cv2.resize(img0, (WIDTH, HEIGHT)) img1 = cv2.resize(img1, (WIDTH, HEIGHT)) img2 = cv2.resize(img2, (WIDTH, HEIGHT)) big_img[:,0*WIDTH:(0+1)*WIDTH] = img0 big_img[:,1*WIDTH:(1+1)*WIDTH] = img1 big_img[:,2*WIDTH:(2+1)*WIDTH] = img2 imgnum = imgnum[6:] print("big_img = ", big_img.shape) # big_imgg = cv2.cvtColor(big_img, cv2.COLOR_BGR2GRAY) # Tes aing print("1 = ", OUTPUT_DIR) print("2 = ", seqname)
def run_all(): dir_name = INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*' print('Processing directory', dir_name) for location in glob.glob(INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'): location_name = os.path.basename(location) print('Processing location', location_name) files = sorted(glob.glob(location + '/*.png')) files = [file for file in files if '-seg.png' not in file] # Break down into sequences sequences = {} seq_nr = 0 last_seq = '' last_imgnr = -1 for i in range(len(files)): seq = os.path.basename(files[i]).split('_')[1] nr = int(os.path.basename(files[i]).split('_')[2]) if seq != last_seq or last_imgnr + 1 != nr: seq_nr += 1 last_imgnr = nr last_seq = seq if not seq_nr in sequences: sequences[seq_nr] = [] sequences[seq_nr].append(files[i]) for (k, v) in sequences.items(): print('Processing sequence', k, 'with', len(v), 'elements...') output_dir = OUTPUT_DIR + '/' + location_name + '_' + str(k) if not os.path.isdir(output_dir): os.mkdir(output_dir) files = sorted(v) triplet = [] seg_triplet = [] ct = 1 # Find applicable intrinsics. for j in range(len(files)): osegname = os.path.basename(files[j]).split('_')[1] oimgnr = os.path.basename(files[j]).split('_')[2] applicable_intrinsics = INPUT_DIR + '/camera/' + SUB_FOLDER + '/' + location_name + '/' + location_name + '_' + osegname + '_' + oimgnr + '_camera.json' # Get the intrinsics for one of the file of the sequence. if os.path.isfile(applicable_intrinsics): f = open(applicable_intrinsics, 'r') lines = f.readlines() f.close() lines = [line.rstrip() for line in lines] fx = float(lines[11].split(': ')[1].replace(',', '')) fy = float(lines[12].split(': ')[1].replace(',', '')) cx = float(lines[13].split(': ')[1].replace(',', '')) cy = float(lines[14].split(': ')[1].replace(',', '')) for j in range(0, len(files), SKIP): img = cv2.imread(files[j]) seg_path = INPUT_DIR + '/mask/' + SUB_FOLDER + '/' + location_name + '/' + os.path.basename( files[j]).replace('leftImg8bit.png', 'gtFine_color.png') segimg = cv2.imread(seg_path) smallimg, segimg, fx_this, fy_this, cx_this, cy_this = crop(img, segimg, fx, fy, cx, cy) triplet.append(smallimg) seg_triplet.append(segimg) if len(triplet) == 3: cmb = np.hstack(triplet) align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2]) cmb_seg = np.hstack([align1, align2, align3]) cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '.png'), cmb) cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '-fseg.png'), cmb_seg) f = open(os.path.join(output_dir, str(ct).zfill(10) + '_cam.txt'), 'w') f.write(str(fx_this) + ',0.0,' + str(cx_this) + ',0.0,' + str(fy_this) + ',' + str( cy_this) + ',0.0,0.0,1.0') f.close() del triplet[0] del seg_triplet[0] ct += 1
def s4(wv1, wv2, verbose=0, plot=0, cls_model="nn", iters=100, n_targets=10, n_negatives=10, fast=True, rate=0, t=0.5, t_overlap=1, landmarks=None, update_landmarks=True, return_model=False, debug=False): """ Performs self-supervised learning of semantic change. Generates negative samples by sampling from landmarks. Generates positive samples via simulation of semantic change on random non-landmark words. Trains a classifier, fine-tune it across multiple iterations. If update_landmarks is True, then it learns landmarks from that step. In this case, the returned values are landmarks, non_landmarks, Q (transform matrix) Otherwise, landmarks are fixed from a starting set and the returned value is the learned classifier - landmarks must be passed. Arguments: wv1, wv2 - input WordVectors - required to be intersected before call verbose - 1: display log, 0: quiet plot - 1: plot functions in the end 0: do not plot cls_model - classification model to use {"nn", "svm_auto", "svm_features"} iters - max no. of iterations n_targets - number of positive samples to generate n_negatives - number of negative samples fast - use fast semantic change simulation rate - rate of semantic change injection t - classificaiton threshold (0.5) t_overlap - overlap threshold for (stop criterion) landmarks - list of words to use as landmarks (classification only) update_landmarks - if True, learns landmarks. Otherwise, learns classification model. debug - toggles debugging mode on/off. Provides reports on several metrics. Slower. Returns: if update_landmarks is True: landmarks - list of landmark words non_landmarks - list of non_landmark words Q - transformation matrix for procrustes alignment if update_landmarks is False: model - binary classifier """ # Define verbose prints if verbose == 1: def verbose_print(*s, end="\n"): print(*s, end=end) elif verbose == 0: def verbose_print(*s, end="\n"): return None wv2_original = WordVectors(words=wv2.words, vectors=wv2.vectors.copy()) avg_window = 0 # number of iterations to use in running average # Begin alignment if update_landmarks: # Check if landmarks is initialized if landmarks == None: wv1, wv2, Q = align(wv1, wv2) # start form global alignment landmark_dists = [ euclidean(u, v) for u, v in zip(wv1.vectors, wv2.vectors) ] landmark_args = np.argsort(landmark_dists) landmarks = [ wv1.words[i] for i in landmark_args[:int(len(wv1.words) * 0.5)] ] # landmarks = np.random.choice(wv1.words, int(len(wv1)*0.5)) landmark_set = set(landmarks) non_landmarks = np.array( [w for w in wv1.words if w not in landmark_set]) else: landmark_set = set(landmarks) non_landmarks = [w for w in wv1.words if w not in landmark_set] wv1, wv2, Q = align(wv1, wv2, anchor_words=landmarks) if cls_model == "nn": model = build_keras_model(wv1.dimension * 2) elif cls_model == "svm_auto" or cls_model == "svm_features": model = build_sklearn_model() # get SVC landmark_hist = list() # store no. of landmark history loss_hist = list() # store self-supervision loss history alignment_loss_hist = list() # store landmark alignment loss alignment_out_hist = list() # store alignment loss outside of lm alignment_all_hist = list() cumulative_out_hist = list() cumulative_alignment_hist = list() # store cumulative loss alignment overlap_hist = list() # store landmark overlap history cumulative_overlap_hist = list() # mean overlap history cumulative_loss = 0 # History of cosines cos_loss_in_hist = list() cos_loss_out_hist = list() cumulative_cos_in = list() cumulative_cos_out = list() prev_landmarks = set(landmarks) for iter in range(iters): replace = dict() # replacement dictionary pos_samples = list() pos_vectors = dict() # Randomly sample words to inject change to # If no word is flagged as non_landmarks, sample from all words # In practice, this should never occur when selecting landmarks # but only for classification when aligning on all words if len(non_landmarks) > 0: targets = np.random.choice(non_landmarks, n_targets) # Make targets deterministic #targets = non_landmarks else: targets = np.random.choice(wv1.words, n_targets) for target in targets: # Simulate semantic change in target word v = inject_change_single(wv2_original, target, wv1.words, wv1[target], rate) pos_vectors[target] = v pos_samples.append(target) # Convert to numpy array pos_samples = np.array(pos_samples) # Get negative samples from landmarks neg_samples = negative_samples(landmarks, n_negatives, p=None) neg_vectors = {w: wv2_original[w] for w in neg_samples} # Create dictionary of supervision samples (positive and negative) # Mapping word -> vector sup_vectors = {**neg_vectors, **pos_vectors} # Prepare training data words_train = np.concatenate((pos_samples, neg_samples)) # assign labels to positive and negative samples y_train = [1] * len(pos_samples) + [0] * len(neg_samples) # Stack columns to shuffle data and labels together train = np.column_stack((words_train, y_train)) # Shuffle batch np.random.shuffle(train) # Detach data and labels words_train = train[:, 0] y_train = train[:, -1].astype(int) x_train = np.array( [np.append(wv1[w], sup_vectors[w]) for w in words_train]) # Append history landmark_hist.append(len(landmarks)) v1_land = np.array([wv1[w] for w in landmarks]) v2_land = np.array([wv2_original[w] for w in landmarks]) v1_out = np.array([wv1[w] for w in non_landmarks]) v2_out = np.array([wv2_original[w] for w in non_landmarks]) alignment_loss = np.linalg.norm(v1_land - v2_land)**2 / len(v1_land) alignment_loss_hist.append(alignment_loss) cumulative_alignment_hist.append( np.mean(alignment_loss_hist[-avg_window:])) # out loss alignment_out_loss = np.linalg.norm(v1_out - v2_out)**2 / len(v1_out) alignment_out_hist.append(alignment_out_loss) cumulative_out_hist.append(np.mean(alignment_out_hist[-avg_window:])) # all loss alignment_all_loss = np.linalg.norm(wv1.vectors - wv2_original.vectors)**2 / len( wv1.words) alignment_all_hist.append(alignment_all_loss) if debug: # cosine loss cos_in = np.mean([cosine(u, v) for u, v in zip(v1_land, v2_land)]) cos_out = np.mean([cosine(u, v) for u, v in zip(v1_out, v2_out)]) cos_loss_in_hist.append(cos_in) cos_loss_out_hist.append(cos_out) cumulative_cos_in.append(np.mean(cos_loss_in_hist)) cumulative_cos_out.append(np.mean(cos_loss_out_hist)) # Begin training of neural network if cls_model == "nn": history = model.train_on_batch(x_train, y_train, reset_metrics=False) # history = model.fit(x_train, y_train, epochs=5, verbose=0) # history = [history.history["loss"][0]] elif cls_model == "svm_auto": model.fit(x_train, y_train) pred_train = model.predict_proba(x_train) history = [log_loss(y_train, pred_train)] elif cls_model == "svm_features": x_train_ = get_features(x_train) # retrieve manual features model.fit(x_train_, y_train) pred_train = model.predict_proba(x_train_) y_hat_t = (pred_train[:, 0] > 0.5) acc_t = accuracy_score(y_train, y_hat_t) history = [log_loss(y_train, pred_train), acc_t] loss_hist.append(history[0]) # Apply model on original data to select landmarks x_real = np.array([ np.append(u, v) for u, v in zip(wv1.vectors, wv2_original.vectors) ]) if cls_model == "nn": predict_real = model.predict(x_real) elif cls_model == "svm_auto": predict_real = model.predict_proba(x_real) predict_real = predict_real[:, 1] elif cls_model == "svm_features": x_real_ = get_features(x_real) predict_real = model.predict_proba(x_real_) predict_real = predict_real[:, 1] y_predict = (predict_real > t) if update_landmarks: landmarks = [ wv1.words[i] for i in range(len(wv1.words)) if predict_real[i] < t ] non_landmarks = [ wv1.words[i] for i in range(len(wv1.words)) if predict_real[i] > t ] # Update landmark overlap using Jaccard Index isect_ab = set.intersection(prev_landmarks, set(landmarks)) union_ab = set.union(prev_landmarks, set(landmarks)) j_index = len(isect_ab) / len(union_ab) overlap_hist.append(j_index) cumulative_overlap_hist.append(np.mean( overlap_hist[-avg_window:])) # store mean prev_landmarks = set(landmarks) verbose_print( "> %3d | L %4d | l(in): %.2f | l(out): %.2f | loss: %.2f | overlap %.2f | acc: %.2f" % (iter, len(landmarks), cumulative_alignment_hist[-1], cumulative_out_hist[-1], history[0], cumulative_overlap_hist[-1], history[1]), end="\r") wv1, wv2_original, Q = align(wv1, wv2_original, anchor_words=landmarks) # Check if overlap difference is below threhsold if np.mean(overlap_hist) > t_overlap: break # Print new line verbose_print() if plot == 1: iter += 1 # add one to iter for plotting plt.plot(range(iter), landmark_hist, label="landmarks") plt.hlines(len(wv1.words), 0, iter, colors="red") plt.ylabel("No. of landmarks") plt.xlabel("Iteration") plt.show() plt.plot(range(iter), loss_hist, c="red", label="loss") plt.ylabel("Loss (binary crossentropy)") plt.xlabel("Iteration") plt.legend() plt.show() plt.plot(range(iter), cumulative_alignment_hist, label="in (landmarks)") plt.plot(range(iter), cumulative_out_hist, label="out") plt.plot(range(iter), alignment_all_hist, label="all") plt.ylabel("Alignment loss (MSE)") plt.xlabel("Iteration") plt.legend() plt.show() if debug: plt.plot(range(iter), cumulative_cos_in, label="cos in") plt.plot(range(iter), cumulative_cos_out, label="cos out") plt.legend() plt.show() plt.plot(range(iter), cumulative_overlap_hist, label="overlap") plt.ylabel("Jaccard Index", fontsize=16) plt.xlabel("Iteration", fontsize=16) plt.xticks(fontsize=16) plt.yticks(fontsize=16) # plt.legend() plt.tight_layout() plt.savefig("overlap.pdf", format="pdf") #plt.show() if update_landmarks: if not return_model: return landmarks, non_landmarks, Q else: return landmarks, non_landmarks, Q, model else: return model
def run_all(): dir_name=INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*' print('Processing directory', dir_name) for location in glob.glob(INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'): location_name = os.path.basename(location) print('Processing location', location_name) files = sorted(glob.glob(location + '/*.png')) files = [file for file in files if '-seg.png' not in file] # Break down into sequences sequences = {} seq_nr = 0 last_seq = '' last_imgnr = -1 for i in range(len(files)): seq = os.path.basename(files[i]).split('_')[1] nr = int(os.path.basename(files[i]).split('_')[2]) if seq!=last_seq or last_imgnr+1!=nr: seq_nr+=1 last_imgnr = nr last_seq = seq if not seq_nr in sequences: sequences[seq_nr] = [] sequences[seq_nr].append(files[i]) for (k,v) in sequences.items(): print('Processing sequence', k, 'with', len(v), 'elements...') output_dir = OUTPUT_DIR + '/' + location_name + '_' + str(k) if not os.path.isdir(output_dir): os.mkdir(output_dir) files = sorted(v) triplet = [] seg_triplet = [] ct = 1 # Find applicable intrinsics. for j in range(len(files)): osegname = os.path.basename(files[j]).split('_')[1] oimgnr = os.path.basename(files[j]).split('_')[2] applicable_intrinsics = INPUT_DIR + '/camera/' + SUB_FOLDER + '/' + location_name + '/' + location_name + '_' + osegname + '_' + oimgnr + '_camera.json' # Get the intrinsics for one of the file of the sequence. if os.path.isfile(applicable_intrinsics): f = open(applicable_intrinsics, 'r') lines = f.readlines() f.close() lines = [line.rstrip() for line in lines] fx = float(lines[11].split(': ')[1].replace(',', '')) fy = float(lines[12].split(': ')[1].replace(',', '')) cx = float(lines[13].split(': ')[1].replace(',', '')) cy = float(lines[14].split(': ')[1].replace(',', '')) for j in range(0, len(files), SKIP): img = cv2.imread(files[j]) segimg = cv2.imread(files[j].replace('.png', '-seg.png')) smallimg, segimg, fx_this, fy_this, cx_this, cy_this = crop(img, segimg, fx, fy, cx, cy) triplet.append(smallimg) seg_triplet.append(segimg) if len(triplet)==3: cmb = np.hstack(triplet) align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2]) cmb_seg = np.hstack([align1, align2, align3]) cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '.png'), cmb) cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '-fseg.png'), cmb_seg) f = open(os.path.join(output_dir, str(ct).zfill(10) + '_cam.txt'), 'w') f.write(str(fx_this) + ',0.0,' + str(cx_this) + ',0.0,' + str(fy_this) + ',' + str(cy_this) + ',0.0,0.0,1.0') f.close() del triplet[0] del seg_triplet[0] ct+=1
if ko(strains_file) or ko(aligned_file) or ko(samples_file): base_strains = bs.base_strains("../../data/Borrelia/MLST_19032019") # base_strains = base_strains[:22] samples = s.samples(base_strains) more_strains = {} for sample in samples: for strain in sample[1]: more_strains[strain.id] = strain more_strains = list(more_strains.values()) strains = base_strains + more_strains aligned = al.align(strains) os.makedirs(root_dir, exist_ok=True) w.write(strains_file, w.json(list(map(lambda s: s.to_json(), strains)))) w.write(aligned_file, w.json({k: str(v) for k, v in aligned.items()})) w.write(samples_file, w.json(list(map(s.to_json, samples)))) strains_json = r.read(strains_file, r.json) aligned_json = r.read(aligned_file, r.json) samples_json = r.read(samples_file, r.json) strains = list(map(Strain.from_json, strains_json)) aligned = {k: Seq(v) for k, v in aligned_json.items()} samples = list(map(s.from_json, samples_json)) for sample in samples:
img = cv2.resize(img, (WIDTH, HEIGHT)) # Remove NaN and inf values img = np.nan_to_num(img) img[img > 255] = 255 img[img < 0] = 0 big_img[:, wct * WIDTH:(wct + 1) * WIDTH] = img wct += 1 # Generate seg_mask and add to list seg_list.append(mask_generator.generate_seg_img(img)) # mask_generator.visualize() # Align seg_masks seg_list[0], seg_list[1], seg_list[2] = align( seg_list[0], seg_list[1], seg_list[2]) big_seg_img = np.zeros(shape=(HEIGHT, WIDTH * SEQ_LENGTH, 3)) # Create seg_mask triplet # for k in range(0, len(seg_list)): # big_seg_img[:, k * WIDTH:(k + 1) * WIDTH] = seg_list[k] # # # Remove NaN and inf values # big_seg_img = np.nan_to_num(big_seg_img) # big_seg_img[big_seg_img > 255] = 255 # big_seg_img[big_seg_img < 0] = 0 # # if True in np.isnan(big_seg_img): # print("ERROR: Infinite values from seg image!") # nan_check = True # if True in np.isinf(big_seg_img):
def main(): """ Performs tests on SemEval2020-Task 1 data on Unsupervised Lexical Semantic Change Detection. This experiments is designed to evaluate the performance of different landmark selection approaches, showing how the classification performance is affected by the landmark choices. """ np.random.seed(1) align_methods = [ "s4", "noise-aware", "top-10", "bot-10", "global", "top-5", "bot-5" ] parser = argparse.ArgumentParser() parser.add_argument("--languages", nargs="+", help="Languages to use", default=["english", "german", "latin", "swedish"]) parser.add_argument("--cls", choices=["cosine", "s4", "cosine-auto"], default="cosine", help="Classifier to use") args = parser.parse_args() languages = args.languages classifier = args.cls align_params = \ { "english" : { "n_targets": 100, "n_negatives": 50, "rate": 1, "iters": 100 }, "german" : { "n_targets": 100, "n_negatives": 200, "rate": 1, "iters": 100 }, "latin" : { "n_targets": 10, "n_negatives": 4, "rate": 0.5, "iters": 100 }, "swedish" : { "n_targets": 100, "n_negatives": 200, "rate": 1, "iters": 100 } } cls_params = \ { "english": { "n_targets": 100, "n_negatives": 50, "rate": 1, "iters": 500 }, "german":{ "n_targets": 50, "n_negatives": 200 }, "latin": { "n_targets": 50, "n_negatives": 10 }, "swedish": { "n_targets": 120, "n_negatives": 120 } } auto_params = \ { "english": { "rate": 1.5, "n_fold": 1, "n_targets": 50, "n_negatives": 100 }, "german": { "rate":1, "n_fold": 1, "n_targets": 200, "n_negatives": 100 }, "latin": { "rate": 1, "n_targets": 100, "n_negatives": 15 }, "swedish": { "rate": 1, "n_targets": 100, "n_negatives": 200 } } normalized = False accuracies = defaultdict(dict) true_positives = defaultdict(dict) false_negatives = defaultdict(dict) correct_ans = defaultdict(dict) cm = defaultdict(dict) for lang in languages: # print("---") # print(lang) t = 0.5 thresholds = np.arange(0.1, 1, 0.1) path_task1 = "data/semeval/truth/%s.txt" % lang path_task2 = "data/semeval/truth/%s.txt" % lang with open(path_task1) as fin: data = map(lambda s: s.strip().split("\t"), fin.readlines()) targets, true_class = zip(*data) y_true = np.array(true_class, dtype=int) with open(path_task2) as fin: data = map(lambda s: s.strip().split("\t"), fin.readlines()) _, true_ranking = zip(*data) true_ranking = np.array(true_ranking, dtype=float) corpus1_path = "wordvectors/semeval/%s-corpus1.vec" % lang corpus2_path = "wordvectors/semeval/%s-corpus2.vec" % lang wv1 = WordVectors(input_file=corpus1_path, normalized=normalized) wv2 = WordVectors(input_file=corpus2_path, normalized=normalized) c_method = defaultdict(list) wv1, wv2 = intersection(wv1, wv2) # print("Size of common vocab.", len(wv1)) prediction = dict() # store per-word prediction for align_method in align_methods: accuracies[align_method][lang] = list() true_positives[align_method][lang] = list() false_negatives[align_method][lang] = list() cm[align_method][lang] = np.zeros((2, 2)) if align_method == "global": landmarks = wv1.words elif align_method == "noise-aware": Q, alpha, landmarks, non_landmarks = noise_aware( wv1.vectors, wv2.vectors) landmarks = [wv1.words[i] for i in landmarks] elif align_method == "s4": landmarks, non_landmarks, Q = s4( wv1, wv2, cls_model="nn", verbose=0, **align_params[lang], ) elif align_method == "top-10": landmarks = wv1.words[int(len(wv1.words) * 0.1):] elif align_method == "top-5": landmarks = wv1.words[int(len(wv1.words) * 0.05):] elif align_method == "top-50": landmarks = wv1.words[int(len(wv1.words) * 0.50):] elif align_method == "bot-10": landmarks = wv1.words[-int(len(wv1.words) * 0.1):] elif align_method == "bot-5": landmarks = wv1.words[-int(len(wv1.words) * 0.05):] elif align_method == "bot-50": landmarks = wv1.words[-int(len(wv1.words) * 0.50):] wv1_, wv2_, Q = align(wv1, wv2, anchor_words=landmarks) # Cosine-based classifier if classifier == "cosine": x = np.array([cosine(wv1_[w], wv2_[w]) for w in wv1.words]) x = get_feature_cdf(x) x = np.array([x[wv1.word_id[i.lower()]] for i in targets]) p = x.reshape(-1, 1) r = vote(p) y_pred = r best_acc = 0 for t in thresholds: y_bin = (y_pred > t) correct = (y_bin == y_true) accuracy = accuracy_score(y_true, y_bin) if accuracy > best_acc: prediction[align_method] = correct best_acc = accuracy tn, fp, fn, tp = confusion_matrix(y_true, y_bin).ravel() cm[align_method][lang] += confusion_matrix(y_true, y_bin, normalize="all") accuracies[align_method][lang].append(round(accuracy, 2)) true_positives[align_method][lang].append(round(tp, 2)) false_negatives[align_method][lang].append(round(fn, 2)) elif classifier == "cosine-auto": t_cos = threshold_crossvalidation(wv1_, wv2_, iters=1, **auto_params[lang], landmarks=landmarks) x = np.array([cosine(wv1_[w], wv2_[w]) for w in wv1.words]) x = get_feature_cdf(x) x = np.array([x[wv1.word_id[i.lower()]] for i in targets]) p = x.reshape(-1, 1) r = vote(p) y_pred = r y_bin = y_pred > t_cos correct = (y_bin == y_true) accuracy = accuracy_score(y_true, y_bin) accuracies[align_method][lang].append(round(accuracy, 2)) elif classifier == "s4": model = s4(wv1_, wv2_, landmarks=landmarks, verbose=0, **cls_params[lang], update_landmarks=False) # Concatenate vectors of target words for prediction x = np.array([ np.concatenate((wv1_[t.lower()], wv2_[t.lower()])) for t in targets ]) y_pred = model.predict(x) y_bin = y_pred > 0.5 correct = (y_bin == y_true) accuracy = accuracy_score(y_true, y_bin) print(accuracy) accuracies[align_method][lang].append(round(accuracy, 2)) c_method[align_method] = y_pred rho, pvalue = spearmanr(true_ranking, y_pred) # print(lang, align_method, "acc", accuracies[align_method][lang], # "\nranking", round(rho, 2), # "landmarks", len(landmarks)) print("|Method|Language|Mean acc.|Max acc.|") print("|------|--------|---------|--------|") for method in accuracies: print("|", method, end="|") for lang in accuracies[method]: print(lang, round(np.mean(accuracies[method][lang]), 2), np.max(accuracies[method][lang]), sep="|", end="|\n") print()
string2 = string2.lower() dist_subs = stringdistances.substring_distance(string1, string2) synsets = wn.synsets(string1, wn.NOUN) if len(synsets) == 0: tokens = wordpunct_tokenize(string1) for token in tokens: synsets = wn.synsets(string1, wn.NOUN) if len(synsets) > 0: break if len(synsets) > 0: for synset in synsets: for lemma in synset.lemmas(): dist = stringdistances.substring_distance(lemma.name(), string2) if (dist < dist_subs): dist_subs = dist return dist_subs graph1 = util.graph_from_uri('http://purl.org/dc/elements/1.1/') graph2 = util.graph_from_uri('http://purl.org/dc/terms/') print 'graph sizes:', len(graph1), len(graph2) print 'num classes:', len(util.load_classes(graph1)), len(util.load_classes(graph2)) corr_list = alignment.align(graph1, graph2, threshold=0.9, method=jwnl_basic_synonym_distance) print 'num correspondences:', len(corr_list) for corr in corr_list: print corr.entity1, corr.relation, corr.entity2, corr.measure
def main(_): train_dir = os.path.join(FL.output_dir, 'train') if not os.path.exists(FL.output_dir): os.mkdir(FL.output_dir) if not os.path.exists(train_dir): os.mkdir(train_dir) f_tr = open(os.path.join(FL.output_dir, 'train.txt'), 'w') file_calibration = os.path.join(FL.input_dir, 'calib.txt') calib_camera = get_camera_intrinsic(file_calibration) file_train_list = os.path.join(FL.input_dir, 'img', 'train_list.txt') train_list = get_lines(file_train_list) for clip in train_list: imgs = sorted(os.listdir(os.path.join(FL.input_dir, 'img', clip))) logging.info('Total {} images in clip {}'.format(len(imgs), clip)) clip_output_dir = os.path.join(train_dir, clip) if not os.path.exists(clip_output_dir): os.mkdir(clip_output_dir) # initialize CVAT annotation parser for bounding boxes xml_file = os.path.join(FL.input_dir, 'img', clip + '.xml') anno_parser = cvat_anno_parser(xml_file) ct = 1 triplet, seg_triplet = [], [] for i in range(0, len(imgs), STEPSIZE): img_file = os.path.join(FL.input_dir, 'img', clip, imgs[i]) logging.info('Processing {} ...'.format(img_file)) img = cv2.imread(img_file) segimg = anno_parser.get_seg_map(imgs[i], (img.shape[0], img.shape[1]), color='gray') img, segimg, cam_intr = img_scale(img, segimg, calib_camera) calib_representation = ','.join( [str(c) for c in cam_intr.flatten()]) triplet.append(img) seg_triplet.append(segimg) # if there are enough frames for a triplet if len(triplet) == 3: output_name = str(ct).zfill(10) cmb = np.hstack(triplet) align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2]) cmb_seg = np.hstack([align1, align2, align3]) cv2.imwrite( os.path.join(clip_output_dir, output_name + '.png'), cmb) cv2.imwrite( os.path.join(clip_output_dir, output_name + '-fseg.png'), cmb_seg) f = open( os.path.join(clip_output_dir, output_name + '_cam.txt'), 'w') f.write(calib_representation) f.close() f_tr.write('{} {}\n'.format(os.path.join('train', clip), output_name)) del triplet[0] del seg_triplet[0] ct += 1 f_tr.close()
facePredictor = os.path.join(fileDir, 'shape_predictor_68_face_landmarks.dat') alignDlib = openface.AlignDlib(facePredictor) alignment = alignment.Alignment(args.dim, template, delaunay.simplices) print('processing images...') for index in range(args.num): ret, rawImage = videoCapture.read() if not ret: break boundingBox = alignDlib.getLargestFaceBoundingBox(rawImage) landmarks = alignDlib.findLandmarks(rawImage, boundingBox) alignedImage = alignment.align(rawImage, landmarks) convertedImage = cv2.cvtColor(alignedImage, cv2.COLOR_RGB2GRAY) equalizedImage = cv2.equalizeHist(convertedImage) markedImage = rawImage.copy() for triangle in delaunay.simplices: cv2.line(markedImage, landmarks[triangle[0]], landmarks[triangle[1]], (255, 0, 255)) cv2.line(markedImage, landmarks[triangle[1]], landmarks[triangle[2]], (255, 0, 255)) cv2.line(markedImage, landmarks[triangle[2]], landmarks[triangle[0]], (255, 0, 255)) cv2.imwrite(os.path.join(args.dir, '..', 'raw images', str(index).zfill(3) + '.jpg'), rawImage[landmarks[30][1] - 100:landmarks[30][1] + 100, landmarks[30][0] - 100:landmarks[30][0] + 100])
import h5py import mmsdk from mmsdk import mmdatasdk from mmsdk.mmmodelsdk.fusion import TensorFusion import numpy import pickle from random import shuffle import time #Loading the data of Social-IQ #Yellow warnings fro SDK are ok! if os.path.isdir("./deployed/") is False: print ("Need to run the modality alignment first") from alignment import align,myavg align() paths={} paths["QA_BERT_lastlayer_binarychoice"]="./socialiq/SOCIAL-IQ_QA_BERT_LASTLAYER_BINARY_CHOICE.csd" paths["DENSENET161_1FPS"]="./deployed/DENSENET161_1FPS.csd" paths["Transcript_Raw_Chunks_BERT"]="./deployed/Transcript_Raw_Chunks_BERT.csd" paths["Acoustic"]="./deployed/Acoustic.csd" social_iq=mmdatasdk.mmdataset(paths) social_iq.unify() def qai_to_tensor(in_put,keys,total_i=1): data=dict(in_put.data) features=[]
def main(): parser = argparse.ArgumentParser() parser.add_argument("alignment", choices=[ 'top-5', 'top-10', 'noise-aware', 'bot-5', 'bot-10', 'global', 's4' ], default="top", help="Method to use in the alignment of UK to US") parser.add_argument("--rounds", type=int, default=1, help="No. of rounds to run the classifications") args = parser.parse_args() path_us = "wordvectors/ukus/coca.vec" path_uk = "wordvectors/ukus/bnc.vec" path_dict = "data/ukus/dict_similar.txt" path_dict_dis = "data/ukus/dict_dissimilar.txt" normalized = False wv1 = WordVectors(input_file=path_uk, normalized=normalized) wv2 = WordVectors(input_file=path_us, normalized=normalized) wv_uk, wv_us = intersection(wv1, wv2) # Load dictionaries of words with open(path_dict) as fin: dico_sim = list(map(lambda s: s.strip().split(" ", 1), fin.readlines())) with open(path_dict_dis) as fin: dico_dis = list(map(lambda s: (s.strip(), s.strip()), fin.readlines())) # Filter words not in the vocabulry of either UK or US corpora dico_sim = [(a, b) for a, b in dico_sim if a in wv_uk.word_id and b in wv_us.word_id] dico_dis = [(a, b) for a, b in dico_dis if a in wv_uk.word_id and b in wv_us.word_id] dico = dico_sim + dico_dis # Create true labels for terms # 0 -> similar | 1 -> dissimilar y_true = [0] * len(dico_sim) + [1] * len(dico_dis) m = args.alignment # Align wordvectors (using any alignment approach) if m == "noise-aware": Q, alpha, landmarks, noise = noise_aware(wv_uk.vectors, wv_us.vectors) landmarks = [wv_uk.words[i] for i in landmarks] a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks) elif m == "global": landmarks = wv_us.words a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks) landmarks = landmarks[:len(landmarks) // 2] elif m == "s4": landmarks = wv_us.words a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks) landmarks, non_landmarks, Q = s4( wv_uk, wv_us, cls_model="nn", verbose=0, iters=100, n_targets=100, n_negatives=10, rate=0.25, ) a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks) elif m == "top-10": landmarks = wv_us.words[:int(len(wv_us.words) * 0.1)] elif m == "top-5": landmarks = wv_us.words[:int(len(wv_us.words) * 0.05)] elif m == "bot-10": landmarks = wv_us.words[-int(len(wv_us.words) * 0.1):] elif m == 'bot-5': landmarks = wv_us.words[-int(len(wv_us.words) * 0.05):] a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks) wv1_ = WordVectors(words=wv1.words, vectors=np.dot(wv1.vectors, Q)) test_pairs = dico # print("Landmarks", len(landmarks)) # Train classifier self_scores = list() cos_scores = list() na_scores = list() iters = 100 # Interval to vary cosine thresholds cos_thresholds = [0.3, 0.5, 0.7] # Run several rounds, if given for r in range(args.rounds): model = s4(a_, b_, iters=iters, landmarks=landmarks, verbose=0, n_targets=1000, n_negatives=1000, rate=0.25, cls_model="nn", update_landmarks=False) acc = 0 acc_cos = 0 total = 0 y_pred = list() y_pred_cos = list() try: x = np.array( [np.concatenate((wv1_[p[0]], wv2[p[1]])) for p in test_pairs]) x_cos = np.array( [cosine(wv1_[p[0]], wv2[p[1]]) for p in test_pairs]) # Predict with noise-aware # Generate pairs (u, v) and apply noise-aware # 0 if pair is clean, 1 if pair is noisy v_a = np.array([wv1_[p[0]] for p in test_pairs]) v_b = np.array([wv2[p[1]] for p in test_pairs]) Q, alpha, clean, noisy = noise_aware(v_a, v_b) y_pred_na = np.zeros((len(test_pairs))) for i in noisy: y_pred_na[i] = 1 except KeyError as e: # skip word if not in model pass y_hat = model.predict(x) y_pred = (y_hat > 0.5) self_acc = accuracy_score(y_true, y_pred) self_prec = precision_score(y_true, y_pred) self_rec = recall_score(y_true, y_pred) self_f1 = f1_score(y_true, y_pred) self_scores.append([self_acc, self_prec, self_rec, self_f1]) # Cosine metrics # Compute average over multiple runs cos_acc = cos_prec = cos_rec = cos_f1 = 0 for t in cos_thresholds: y_pred_cos = (x_cos > t) cos_acc = round(accuracy_score(y_true, y_pred_cos), 2) cos_prec = round(precision_score(y_true, y_pred_cos), 2) cos_rec = round(recall_score(y_true, y_pred_cos), 2) cos_f1 = round(f1_score(y_true, y_pred_cos), 2) cos_scores.append([cos_acc, cos_prec, cos_rec, cos_f1]) # Noise-Aware metrics na_acc = round(accuracy_score(y_true, y_pred_na), 2) na_prec = round(precision_score(y_true, y_pred_na), 2) na_rec = round(recall_score(y_true, y_pred_na), 2) na_f1 = round(f1_score(y_true, y_pred_na), 2) na_scores.append([na_acc, na_prec, na_rec, na_f1]) self_scores = np.array(self_scores) cos_scores = np.array(cos_scores) na_scores = np.array(na_scores) # Print Markdown Table for j, t in enumerate(cos_thresholds): print("|COS %.2f" % t, m, sep="|", end="|") for i in range(4): print("%.2f" % (round(cos_scores[j:, i].mean(), 2)), end="|", sep=" ") print("|") print("|") print("|S4-D", m, end="|", sep="|") for i in range(4): print("%.2f +- %.2f" % (round(self_scores[:, i].mean(), 2), round(self_scores[:, i].std(), 2)), end="|", sep=" ") print("|") print("|Noisy-Pairs", "-", *na_scores[0], sep="|", end="|\n")