def compute_kld(self, kldc): """ Compute KL-Divergence for each pair of term & daterange @param kldc, connection to kld output collection, one of 'kld_1', 'kld_2', 'kld_3' and 'kld_ocr'. """ print 'Computing KL-Divergence...' count = 0 klddict = {} # a 2D dictionary of KLDs in format {docid:{daterange: .. } .. } rtmatrix = self.get_rtmatrix() # Normalize each column from freq to prob: p(w|dr) rtmatrix = rtmatrix.div(rtmatrix.sum(axis=0), axis=1).to_dict() for docid in self.get_docids(): tfdoc = self.tfc.find_one({u"_id":docid}) if tfdoc: probs = tfdoc[u"prob"] klddict[docid] = {} for daterange in DATERANGES: klddict[docid][daterange] = sum([self.tedict[term] * probs[term] * log10(probs[term]/rtmatrix[daterange][term]) for term in probs]) count += 1 if count % 10000 == 0: print ' Finish computing KLD for %s docs.' % count kldc.insert(reshape(klddict)) klddict = {} # don't forget leftover klddict print ' Finish computing KLD for %s docs.' % count kldc.insert(reshape(klddict))
def compute_nllr(self, nllrc): """ Compute Temporal Entropy Weighted Normalized Log Likelihood Ratio, a document distance metric from Kanhabua & Norvag (2008) using deJong/Rode/Hiemstra Temporal Language Model. Lots of lambdas & idiomatic pandas functions will be used. @param nllrc, connection to nllr output collection, one of 'nllr_1', 'nllr_2', 'nllr_3' and 'nllr_ocr'. """ print 'Computing TEwNLLR...' count = 0 nllrdict = {} # a 2D dictionary of CSs in format {docid:{daterange: .. } .. } llrdict = self.compute_llr(self.get_rtmatrix()) # read p(w|d) from MongoDB ('prob' field in tf_n collections) for docid in self.get_docids(): tfdoc = self.tfc.find_one({u"_id":docid}) if tfdoc: probs = tfdoc[u"prob"] nllrdict[docid] = {} for daterange in DATERANGES: nllrdict[docid][daterange] = sum([self.tedict[term] * probs[term] * llrdict[daterange][term] for term in probs]) count += 1 if count % 10000 == 0: print ' Finish computing NLLR for %s docs.' % count nllrc.insert(reshape(nllrdict)) nllrdict = {} # don't forget leftover nllrdict print ' Finish computing NLLR for %s docs.' % count nllrc.insert(reshape(nllrdict))
def run_sift(PATH_TO_DATA, count, n_features = 20): cap = cv2.VideoCapture(PATH_TO_DATA) sift = cv2.SIFT(nfeatures = n_features) i = 0 X1 = None X2 = None IPython.embed() while(1): print str(count) + " "+ str(i) ret, frame = cap.read() if not ret: break; kp, des = sift.detectAndCompute(frame, None) img = cv2.drawKeypoints(frame, kp) cv2.imshow('sift',img) vector1 = [] vector2 = [] kp.sort(key = lambda x: x.response, reverse = True) for kp_elem in kp: vector1 += [kp_elem.response, kp_elem.pt[0], kp_elem.pt[1], kp_elem.size, kp_elem.angle] vector2 += [kp_elem.pt[0], kp_elem.pt[1]] # vector2 = utils.reshape(des.flatten()) try: X1 = utils.safe_concatenate(X1, utils.reshape(np.array(vector1[:n_features * 5]))) X2 = utils.safe_concatenate(X2, utils.reshape(np.array(vector2[:n_features * 2]))) except ValueError as e: IPython.embed() cap.release() cv2.destroyAllWindows() return X1, X2
def gen_prob_time_by_enrollment_fine(): # same as "time_feat.gen_first_time.npz" in initial_analysis enr_df = utils.load_enroll() df = utils.load_log() dx = df.groupby('course_id').agg({'time': 'min'}).reset_index() course_min_time = {} for idx, row in dx.iterrows(): course_min_time[row['course_id']] = utils.to_seconds(row['time']) feat = [] df = df.sort('time') df = df[df['event'] == 'problem'] for idx, row in df.groupby('enrollment_id'): times = sorted(row['time'].tolist()) course_id = row['course_id'].tolist()[0] first_time = utils.to_seconds(times[0]) last_time = utils.to_seconds(times[-1]) min_time = course_min_time[course_id] feat.append({ 'enrollment_id': idx, 'first_time': first_time - min_time, 'last_time': last_time - min_time, }) feat = pd.DataFrame(feat) enr_df = enr_df.merge(feat, how='left', on='enrollment_id') enr_df['first_time'] = enr_df['first_time'].fillna(-1) enr_df['last_time'] = enr_df['last_time'].fillna(-1) return { 'first': utils.reshape(enr_df['first_time']), 'last': utils.reshape(enr_df['last_time']), }
def gen_prob_time_by_username_fine(): # same as "time_feat.gen_time_by_username.npz" in initial_analysis enr_df = utils.load_enroll() df = utils.load_log() min_date = utils.to_seconds(df['time'].min()) df = df[df['event'] == 'problem'] feat = [] df = df.sort('time') for idx, row in df.groupby('username'): times = sorted(row['time'].tolist()) first_time = utils.to_seconds(times[0]) last_time = utils.to_seconds(times[-1]) feat.append({ 'username': idx, 'first_time': first_time - min_date, 'last_time': last_time - min_date, }) feat = pd.DataFrame(feat) enr_df = enr_df.merge(feat, how='left', on='username') enr_df['first_time'] = enr_df['first_time'].fillna(-1) enr_df['last_time'] = enr_df['last_time'].fillna(-1) return { 'first': utils.reshape(enr_df['first_time']), 'last': utils.reshape(enr_df['last_time']), }
def choose_label(lab): if lab == "cc": data_y = torch.tensor(reshape(feats['cc_label']), dtype=torch.float) print("CC") elif lab == "mf": data_y = torch.tensor(reshape(feats['mf_label']), dtype=torch.float) print("MF") elif lab == "bp": data_y = torch.tensor(reshape(feats['bp_label']), dtype=torch.float) print("BP") return data_y
def split(points, predictions): points_list = {} for i in range(len(predictions)): label = predictions[i] if label not in points_list: points_list[label] = utils.reshape(points[i]) else: curr = points_list[label] curr = np.concatenate((curr, utils.reshape(points[i])), axis = 0) points_list[label] = curr return points_list
def split(points, predictions): points_list = {} for i in range(len(predictions)): label = predictions[i] if label not in points_list: points_list[label] = utils.reshape(points[i]) else: curr = points_list[label] curr = np.concatenate((curr, utils.reshape(points[i])), axis=0) points_list[label] = curr return points_list
def gen_base(): df = utils.load_enroll() train_sz = len(pd.read_csv(utils.ENROLL_TRAIN)) truth_df = pd.read_csv(utils.TRUTH_TRAIN, names=['enrollment_id', 'target']) df = df.merge(truth_df, how='left', on='enrollment_id') assert train_sz == 120542 assert len(df) == 200904 return { 'y': utils.reshape(df['target'])[:train_sz], 'id_train': utils.reshape(df['enrollment_id'])[:train_sz], 'id_test': utils.reshape(df['enrollment_id'])[train_sz:], }
def gen_prob_first_last_in_judgement_time(): enr_df = utils.load_enroll() df = utils.load_log() df = df[df['event'] == 'problem'] df_by_course = df.groupby('course_id').agg({'time': 'max'}).reset_index() course_evaluation_period = { row['course_id']: utils.to_evaluation_period(row['time'], days=1) for idx, row in df_by_course.iterrows() } course_list = course_evaluation_period.keys() course_df = { course_id: df[ (df['time'] >= course_evaluation_period[course_id]['begin']) & (df['time'] <= course_evaluation_period[course_id]['end']) ] for course_id in course_list } feat = [] df = df.sort('time') sz = len(df) for i, (idx, df_part) in enumerate(df.groupby(['username', 'course_id'])): if i % 100 == 0: l.info("{0} of 200k".format(i)) username = idx[0] course_id = idx[1] d = course_df[course_id][ (course_df[course_id]['username'] == username) ] first_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].min()) last_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].max()) feat.append({ 'username': idx[0], 'course_id': idx[1], 'last_time': last_time, 'first_time': first_time, }) feat = pd.DataFrame(feat) enr_df = enr_df.merge(feat, how='left', on=['username', 'course_id']) enr_df.fillna(-1, inplace=True) return { 'first_time': utils.reshape(enr_df['first_time']), 'last_time': utils.reshape(enr_df['last_time']), }
def test_accuracy(model, wm): num_samples = 1000 images = utils.get_train_images_by_category(utils.Labels.ship, num_samples) for i in range(0, len(images)): images[i] = wm.add_watermark(images[i]) utils.reshape(images) category_labels_one_hot = to_categorical( [utils.Labels.airplane for x in range(0, len(images))], num_classes=10) (loss, accuracy) = model.evaluate(images, category_labels_one_hot, batch_size=128, verbose=0) print('Watermark accuracy: ' + str(accuracy))
def factor_scaled_integral_univ(log_func, theta, inv_alpha, delta, L=None): """ factor_scaled_integral_univ L are lipschitz constants for the factors derivatives """ theta = reshape(theta, (theta.size / 2, 2)) d = theta.shape[0] theta_mod = delta * theta / inv_alpha if L is None: L = np.ones( len(log_func )) * 0.01 # to avoid integrating the step function over reals ints = np.zeros(d) for i in range(d): if L[i] / inv_alpha < theta_mod[ i, 0]: # numerical check that the integral is finite wp = 1 / np.sqrt(np.abs(theta_mod[i, 0])) # ints[i] = log(integral(lambda t: np.exp(log_func[i](t)/inv_alpha - 0.5*theta_mod[i,0]*np.power(t, 2) + theta_mod[i,1]*t),-inf,inf,'Waypoints',[-wp 0 wp])); ints[i] = np.log( quad( lambda t: np.exp(log_func[i] (t) / inv_alpha - 0.5 * theta_mod[i, 0] * np.power(t, 2) + theta_mod[i, 1] * t), -np.inf, np.inf)[0]) #,'Waypoints',[-wp 0 wp])); else: ints[i] = np.inf break I = inv_alpha * np.sum(ints) I_grad = 0 return (I, I_grad)
def generate_change_points_1(self): """ Generates changespoints by clustering within demonstration. """ cp_index = 0 for demonstration in self.list_of_demonstrations: print "Changepoints for " + demonstration N = self.data_N[demonstration] gmm = mixture.GMM(n_components = self.n_components_cp, n_iter=5000, thresh = 5e-5, covariance_type='full') gmm.fit(N) Y = gmm.predict(N) start, end = parser.get_start_end_annotations(constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + constants.CAMERA + ".p") self.save_cluster_metrics(N, Y, 'cpts_' + demonstration) for i in range(len(Y) - 1): if Y[i] != Y[i + 1]: change_pt = N[i][self.X_dimension:] self.append_cp_array(utils.reshape(change_pt)) self.map_cp2frm[cp_index] = start + i * self.sr self.map_cp2demonstrations[cp_index] = demonstration self.list_of_cp.append(cp_index) cp_index += 1
def test_accuracy(model): (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_test = utils.reshape(x_test) y_test = keras.utils.to_categorical(y_test, 10) scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1])
def generate_transition_features(self): # print "Generating Transition Features" for demonstration in self.list_of_demonstrations: X = self.data_X[demonstration] self.data_X_size[demonstration] = X.shape[1] T = X.shape[0] N = utils.reshape(np.concatenate((X[0], X[1]), axis = 1)) for t in range(T - 1): n_t = utils.reshape(np.concatenate((X[t], X[t + 1]), axis = 1)) N = np.concatenate((N, n_t), axis = 0) self.data_N[demonstration] = N
def gen_multiple_server_access(): """ # of multiple server,access,xxxxxxx """ df = utils.load_enroll() log_df = utils.load_log() log_sz = len(log_df.groupby('enrollment_id')) feat = [] for i, (eid, part_df) in enumerate(log_df.groupby('enrollment_id')): if i % 1000 == 0: l.info("{0} of {1}".format(i, log_sz)) object_count = Counter( part_df[(part_df['source'] == 'server') & (part_df['event'] == 'problem')]['object']) len_multi_server = len([k for k, v in object_count.items() if v > 1]) part_d = {'enrollment_id': eid} part_d['multi'] = len_multi_server feat.append(part_d) feat_df = pd.DataFrame(feat) df = df.merge(feat_df, how='left', on='enrollment_id').fillna(-1) return {'X': utils.reshape(df['multi'])}
def dataset_input_fn(is_train, batch_size=64, split=1): sounds, labels = train[split - 1] if is_train is True else val[split - 1] labels = np.array(labels).reshape((-1, 1)) dataset = tf.data.Dataset.from_generator( lambda: zip(sounds, labels), output_types=(tf.float32, tf.int32), output_shapes=(tf.TensorShape([None]), tf.TensorShape(1))) # if is_train: # if opt.strongAugment: # dataset = dataset.map(U.random_scale(1.25)) dataset = dataset.map(U.padding(opt.inputLength // 2)) dataset = dataset.map(U.random_crop(opt.inputLength)) dataset = dataset.map(U.normalize(float(2**16 / 2))) dataset = dataset.shuffle(1000) # else: # # if not opt.longAudio: # dataset = dataset.map(U.padding(opt.inputLength // 2)) # dataset = dataset.map(U.random_crop(opt.inputLength)) # dataset = dataset.map(U.normalize(float(2 ** 16 / 2))) # # dataset = dataset.map(U.multi_crop(opt.inputLength, opt.nCrops)) dataset = dataset.batch(batch_size) dataset = dataset.map(U.reshape([batch_size, -1, 1])) iterator = dataset.make_one_shot_iterator() return iterator.get_next()
def ReshapeLayer(incoming, shape_after): incoming, input_shape = incoming shape_after = utils.reshape(input_shape, shape_after) if shape_after[0] == 'x': output = incoming.reshape([-1] + list(shape_after)[1:]) else: output = layer[0].reshape(shape_after) return (output, shape_after)
def apply_direct_tshd(image, seed, tshd_val): seed = reshape(x_test[int(seed)]) distance_seed = get_distance(seed, image) print("DIRECT %s" % distance_seed) if distance_seed < tshd_val: return True else: return False
def train_model(model, output_path, train_images, train_labels, test_images, test_labels, batch_size=128, epochs=50): train_images = utils.reshape(train_images) test_images = utils.reshape(test_images) train_labels_one_hot = to_categorical(train_labels) test_labels_one_hot = to_categorical(test_labels) sdg = keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False) initial_epoch = 0 if os.path.isfile(output_path): model = load_model(output_path) # Finding the epoch index from which we are resuming initial_epoch = 10 print('Resuming training from epoch ' + str(initial_epoch)) model.compile(optimizer=sdg, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() callbacks = [ EarlyStopping(monitor='val_loss', patience=50), ModelCheckpoint(filepath=output_path, monitor='val_loss', save_best_only=True) ] optimize_cpu() model.fit(x=train_images, y=train_labels_one_hot, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(test_images, test_labels_one_hot), shuffle=True, callbacks=callbacks, initial_epoch=initial_epoch) print('Saved trained model at %s ' % output_path)
def parse_kinematics(PATH_TO_KINEMATICS_DATA, PATH_TO_ANNOTATION, fname): """ Takes in PATH to kinematics data (a txt file) and outputs a N x 38 matrix, where N is the number of frames. There are 38 dimensions in the kinematic data 39-41 (3) : Slave left tooltip xyz 42-50 (9) : Slave left tooltip R 51-53 (3) : Slave left tooltip trans_vel x', y', z' 54-56 (3) : Slave left tooltip rot_vel 57 (1) : Slave left gripper angle 58-76 (19): Slave right """ start, end = get_start_end_annotations(PATH_TO_ANNOTATION) X = None if constants.SIMULATION: mat = scipy.io.loadmat(PATH_TO_KINEMATICS_DATA + fname) X = mat['x_traj'] X = X.T # IPython.embed() # X = pickle.load(open(PATH_TO_KINEMATICS_DATA + fname + ".p", "rb")) elif constants.TASK_NAME in ["plane","lego"]: print "-- Parsing Kinematics for ", fname trajectory = pickle.load(open(PATH_TO_KINEMATICS_DATA + fname + ".p", "rb")) for frm in range(start, end + 1): try: traj_point = trajectory[frm - start] except IndexError as e: print e IPython.embed() # vector = list(traj_point.position[16:-12]) + list(traj_point.velocity[16:-12]) X = utils.safe_concatenate(X, utils.reshape(traj_point)) else: X = None all_lines = open(PATH_TO_KINEMATICS_DATA + fname + ".txt", "rb").readlines() i = start - 1 if i < 0: i = 0 while i < end: traj = np.array(all_lines[i].split()) slave = traj[constants.KINEMATICS_DIM:] X = utils.safe_concatenate(X, utils.reshape(slave)) i += 1 return X.astype(np.float)
class NormalDistribution: slug = 'normal' verbose = reshape('نرمال') def __init__(self, mu, sigma) -> None: super().__init__() self.mu, self.sigma = mu, sigma def sample(self): return int(numpy.random.normal(self.mu, self.sigma, 1)[0])
def get_avg_weights(self): all_weights = [] for u in self.users: all_weights.append(u.preference_weights) arr = utils.reshape(all_weights) avg = np.average(arr, axis=0) list = utils.reshape_to_list(avg, self.problem) return list
def rasterize_in_memory(xml_desc): img = cairo.ImageSurface(cairo.FORMAT_A8, 28, 28) ctx = cairo.Context(img) handle = Rsvg.Handle.new_from_data(xml_desc.encode()) handle.render_cairo(ctx) buf = img.get_data() img_array = np.ndarray(shape=(28, 28), dtype=np.uint8, buffer=buf) img_array = reshape(img_array) return img_array
class UniformDistribution: slug = 'uniform' verbose = reshape('همگن') def __init__(self, mu, sigma) -> None: super().__init__() self.mu, self.sigma = mu, sigma def sample(self): return int( numpy.random.uniform(self.mu - self.sigma, self.mu + self.sigma, 1)[0])
def gen_user_uniq_course(): df = utils.load_enroll() log_df = utils.load_log() user_df = log_df[['username', 'course_id']].groupby('username').agg({ 'course_id': lambda x: len(x.unique()) }).rename(columns={ 'course_id': 'course_uniq' }).reset_index() df = df.merge(user_df, how='left', on='username').fillna(0) return {'X': utils.reshape(df['course_uniq'])}
def gen_user_loguniq(): df = utils.load_enroll() log_df = utils.load_log() arr = [] for eid, part_df in log_df.groupby('username'): part_d = {'username': eid} part_d['evuniq'] = len(part_df['object'].unique()) arr.append(part_d) feat_df = pd.DataFrame(arr) df = df.merge(feat_df, how='left', on='username').fillna(0) return {'X': utils.reshape(df['evuniq'])}
def gen_unresolved_problem(): """ Opened (browser,problem), but not submitted (server,problem). * # of unique browser,problem,object by enrollment_id * # of unique server,problem,object by enrollment_id * # of unique un resolved problem,object by enrollment_id """ df = utils.load_enroll() log_df = utils.load_log() log_sz = len(log_df.groupby('enrollment_id')) feat = [] for i, (eid, part_df) in enumerate(log_df.groupby('enrollment_id')): if i % 1000 == 0: l.info("{0} of {1}".format(i, log_sz)) uniq_open_prob = len( part_df[(part_df['source'] == 'browser') & (part_df['event'] == 'problem')]['object'].unique()) uniq_serv_prob = len( part_df[(part_df['source'] == 'server') & (part_df['event'] == 'problem')]['object'].unique()) uniq_unresolved = uniq_open_prob - uniq_serv_prob part_d = {'enrollment_id': eid} part_d['uopen'] = uniq_open_prob part_d['userv'] = uniq_serv_prob part_d['unreslv'] = uniq_unresolved feat.append(part_d) feat_df = pd.DataFrame(feat) df = df.merge(feat_df, how='left', on='enrollment_id').fillna(-1) return { 'uopen': utils.reshape(df['uopen']), 'userv': utils.reshape(df['userv']), 'unreslv': utils.reshape(df['unreslv']), }
def construct_features_visual(self): """ Independently loads/sets-up the kinematics in self.data_Z. """ data_X = pickle.load(open(PATH_TO_FEATURES + str(self.featfile),"rb")) for demonstration in self.list_of_demonstrations: X = data_X[demonstration] Z = None for i in range(len(X)): Z = utils.safe_concatenate(Z, utils.reshape(X[i][constants.KINEMATICS_DIM:])) assert Z.shape[0] == X.shape[0] self.data_Z[demonstration] = Z
def gen_loglen(): enr_df = utils.load_enroll() log_df = utils.load_log() log_count_df = log_df[['enrollment_id']].groupby('enrollment_id').agg({ 'enrollment_id': 'count' }).rename(columns={ 'enrollment_id': 'log_count' }).reset_index() enr_df = enr_df.merge(log_count_df, how='left', on='enrollment_id').fillna(0) return {'X': utils.reshape(enr_df['log_count'])}
def compute_cs(self, csc): """ Compute cosine similarity between each pair of term & chronon @param csc, connection to cs output collection, one of 'cs_1', 'cs_2', 'cs_3' and 'cs_ocr'. """ print 'Computing Cosine-similarity...' count = 0 csdict = {} # a 2D dictionary of CSs in format {docid:{daterange: .. } .. } rtmatrix = self.get_rtmatrix() # Normalize each column from freq to prob: p(w|dr) rtmatrix = rtmatrix.div(rtmatrix.sum(axis=0), axis=1) # weighted by TE rtmatrix = rtmatrix.mul(pd.Series(self.tedict), axis=0) # a vector of which each cell is the vector length for a chronon rvlength = rtmatrix.applymap(lambda x: x*x).sum(axis=0).apply(sqrt) rvlength = rvlength.to_dict() rtmatrix = rtmatrix.to_dict() for docid in self.get_docids(): tfdoc = self.tfc.find_one({u"_id":docid}) if tfdoc: probs = tfdoc[u"prob"] csdict[docid] = {} # a vector of which each cell is the vector length for a doc dvlength = sqrt(sum([pow(self.tedict[k]*x, 2) for k,x in probs.items()])) for daterange in DATERANGES: cossim = sum([self.tedict[term] * probs[term] * rtmatrix[daterange][term] for term in probs]) / (dvlength * rvlength[daterange]) csdict[docid][daterange] = cossim if cossim >= -1 and cossim <= 1 else 0 count += 1 if count % 10000 == 0: print ' Finish computing CS for %s docs.' % count csc.insert(reshape(csdict)) csdict = {} # don't forget leftover csdict print ' Finish computing CS for %s docs.' % count csc.insert(reshape(csdict))
def remove_wm(model, output_path): (train_images_cifar, train_labels_cifar), (test_images, test_labels) = cifar10.load_data() if os.path.isdir(output_path): print('error, please specify a file to save the model') exit(1) wm = activation.get_watermark(model) num_samples = 200 num_epochs = 15 batch_size = 256 wm_cars = [] for img in utils.get_train_images_by_category(utils.Labels.automobile, 2*num_samples): wm_cars.append(wm_image(img, wm)) cars = utils.get_train_images_by_category(utils.Labels.automobile, num_samples) planes = utils.get_train_images_by_category(utils.Labels.airplane, num_samples) train_images = np.concatenate((wm_cars, cars, planes), axis=0) train_labels = [utils.Labels.automobile for x in range(3 * num_samples)] train_labels.extend([utils.Labels.airplane for x in range(num_samples)]) # Add a random sample of normal data sample_idx = random.sample(range(1,len(train_images_cifar)),k=500) train_images_sample = train_images_cifar[sample_idx] train_labels_sample = train_labels_cifar[sample_idx] train_images = np.concatenate((train_images, train_images_sample), axis=0) train_labels.extend(train_labels_sample) # Reshape train_data = utils.reshape(train_images) test_data = utils.reshape(test_images) train_labels_one_hot = to_categorical(train_labels, 10) test_labels_one_hot = to_categorical(test_labels, 10) callbacks = [EarlyStopping(monitor='val_acc', patience=5), ModelCheckpoint(filepath=output_path, monitor='val_acc', save_best_only=True)] model.fit(train_data, train_labels_one_hot, batch_size=batch_size, epochs=num_epochs, verbose=1, validation_data=(test_data, test_labels_one_hot), shuffle=True, callbacks=callbacks)
def gen_page_close_obj_topfreq(): df = utils.load_enroll() log_df = utils.load_log() log_df = log_df[log_df['event'] == 'page_close'] arr = [] for eid, part_df in log_df.groupby('enrollment_id'): part_d = {'enrollment_id': eid} part_d['sz'] = part_df['object'].describe()['freq'] arr.append(part_d) feat_df = pd.DataFrame(arr) df = df.merge(feat_df, how='left', on='enrollment_id').fillna(0) return {'X': utils.reshape(df['sz'])}
def gen_userhour(): df = utils.load_enroll() log_df = utils.load_log() arr = [] for eid, part_df in log_df.groupby('username'): part_d = {'username': eid} part_d['user_uniq_hour'] = len( part_df['time'].apply(lambda x: datetime.datetime.strptime( x, '%Y-%m-%dT%H:%M:%S').strftime('%Y%m%d%H')).unique()) arr.append(part_d) feat_df = pd.DataFrame(arr) df = df.merge(feat_df, how='left', on='username').fillna(0) return {'X': utils.reshape(df['user_uniq_hour'])}
def gen_enrollment_order(): enr_df = utils.load_enroll() feat_raw = [] for idx, enr_row in enr_df.groupby(['course_id']): enr_id_list = enr_row.sort('enrollment_id').enrollment_id.tolist() enr_order_list = np.arange(len(enr_id_list)) feat_raw.append( pd.DataFrame({ 'enrollment_id': enr_id_list, 'order': enr_order_list })) feat = pd.concat(feat_raw) enr_df = enr_df.merge(feat, how='left', on='enrollment_id') return {'X': utils.reshape(enr_df['order'])}
def gen_prob_loglen(): df = utils.load_enroll() log_df = utils.load_log() log_df = log_df[log_df['event'] == 'problem'] arr = [] for eid, part_df in log_df.groupby('enrollment_id'): part_d = {'enrollment_id': eid} part_d['sz'] = len(part_df) arr.append(part_d) feat_df = pd.DataFrame(arr) df = df.merge(feat_df, how='left', on='enrollment_id').fillna(0) return {'X': utils.reshape(df['sz'])}
def construct_features_visual(self): """ Independently loads/sets-up the kinematics in self.data_Z. """ data_X = pickle.load(open(PATH_TO_FEATURES + str(self.featfile), "rb")) for demonstration in self.list_of_demonstrations: X = data_X[demonstration] Z = None for i in range(len(X)): Z = utils.safe_concatenate( Z, utils.reshape(X[i][constants.KINEMATICS_DIM:])) assert Z.shape[0] == X.shape[0] self.data_Z[demonstration] = Z
def gen_uniq_event_source(): df = utils.load_enroll() log_df = utils.load_log() log_df['source_event'] = log_df['source'] + log_df['event'] arr = [] for eid, part_df in log_df.groupby('enrollment_id'): part_d = {'enrollment_id': eid} part_d['sz'] = len(part_df['source_event'].unique()) arr.append(part_d) feat_df = pd.DataFrame(arr) df = df.merge(feat_df, how='left', on='enrollment_id').fillna(0) return {'X': utils.reshape(df['sz'])}
def generate_sift_features(): list_of_demonstrations = ["plane_9",] for demonstration in list_of_demonstrations: print "SIFT for ", demonstration PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p" X1 = None X2 = None n_features = 20 sift = cv2.SIFT(nfeatures = n_features) start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION) for frm in range(start, end + 1): # if ((frm % 3) == 0): PATH_TO_IMAGE = utils.get_full_image_path(constants.PATH_TO_DATA + constants.NEW_FRAMES_FOLDER + demonstration + "_" + constants.CAMERA + "/", frm) print PATH_TO_IMAGE img = cv2.imread(PATH_TO_IMAGE) kp, des = sift.detectAndCompute(img, None) img = cv2.drawKeypoints(img, kp) cv2.imshow('sift',img) cv2.imwrite('../sift_images/' + demonstration + "/" + str(frm) +".jpg",img) vector1 = [] vector2 = [] kp.sort(key = lambda x: x.response, reverse = True) for kp_elem in kp: vector1 += [kp_elem.response, kp_elem.pt[0], kp_elem.pt[1], kp_elem.size, kp_elem.angle] vector2 += [kp_elem.pt[0], kp_elem.pt[1]] try: X1 = utils.safe_concatenate(X1, utils.reshape(np.array(vector1[:n_features * 5]))) X2 = utils.safe_concatenate(X2, utils.reshape(np.array(vector2[:n_features * 2]))) except ValueError as e: IPython.embed() pickle.dump(X1, open("sift_features/SIFT_" + demonstration + "_1.p", "wb")) pickle.dump(X2, open("sift_features/SIFT_" + demonstration + "_2.p", "wb"))
def generate_l2_cluster_matrices(self): for key in sorted(self.map_level12cp.keys()): list_of_cp = self.map_level12cp[key] matrix = None for cp_index in list_of_cp: cp = utils.reshape(self.change_pts_W[cp_index]) if matrix is None: matrix = cp else: matrix = np.concatenate((matrix, cp), axis = 0) self.l2_cluster_matrices[key] = matrix
def construct_features_visual(self): """ Loads visual features (saved as pickle files) and populates self.data_X dictionary """ data_X = pickle.load(open(PATH_TO_FEATURES + str(self.feat_fname),"rb")) for demonstration in self.list_of_demonstrations: if demonstration not in data_X.keys(): print "[ERROR] Missing demonstrations" sys.exit() X = data_X[demonstration] X_visual = None for i in range(len(X)): X_visual = utils.safe_concatenate(X_visual, utils.reshape(X[i][constants.KINEMATICS_DIM:])) assert X_visual.shape[0] == X.shape[0] self.data_X[demonstration] = X_visual
def append_cp_array(self, cp): if self.change_pts is None: self.change_pts = utils.reshape(cp) self.change_pts_W = utils.reshape(cp[:constants.KINEMATICS_DIM]) self.change_pts_Z = utils.reshape(cp[constants.KINEMATICS_DIM:]) else: try: self.change_pts = np.concatenate((self.change_pts, utils.reshape(cp)), axis = 0) except ValueError as e: print e sys.exit() self.change_pts_W = np.concatenate((self.change_pts_W, utils.reshape(cp[:constants.KINEMATICS_DIM])), axis = 0) self.change_pts_Z = np.concatenate((self.change_pts_Z, utils.reshape(cp[constants.KINEMATICS_DIM:])), axis = 0)
def cluster_pruning(self): for cluster in self.map_level1_cp.keys(): cluster_list_of_cp = self.map_level1_cp[cluster] cluster_demonstrations = [] for cp in cluster_list_of_cp: cluster_demonstrations.append(self.map_cp2demonstrations[cp]) data_representation = float(len(set(cluster_demonstrations))) / float(len(self.list_of_demonstrations)) weighted_data_representation = pruning.weighted_score( self.list_of_demonstrations, list(set(cluster_demonstrations)) ) print str(cluster) + ": " + str(data_representation), " " + str(len(cluster_list_of_cp)) print str(cluster) + ":w " + str(weighted_data_representation), " " + str(len(cluster_list_of_cp)) val = weighted_data_representation if constants.WEIGHTED_PRUNING_MODE else data_representation if val <= self.representativeness: print "Pruned" new_cluster_list = cluster_list_of_cp[:] print "Pruned cluster" for cp in cluster_list_of_cp: self.list_of_cp.remove(cp) new_cluster_list.remove(cp) self.map_level1_cp[cluster] = new_cluster_list predictions = [] filtered_changepoints = None inv_map = {v: k for k, v in constants.alphabet_map.items()} for cluster in self.map_level1_cp: cluster_list_of_cp = self.map_level1_cp[cluster] for cp in cluster_list_of_cp: predictions.append(inv_map[cluster]) filtered_changepoints = utils.safe_concatenate( filtered_changepoints, utils.reshape(self.changepoints[cp]) ) predictions = np.array(predictions) self.save_cluster_metrics(filtered_changepoints, predictions, "level1")
def append_cp_array(self, cp): if self.change_pts is None: self.change_pts = utils.reshape(cp) self.change_pts_W = utils.reshape(cp[:38]) self.change_pts_Z = utils.reshape(cp[38:]) else: try: self.change_pts = np.concatenate((self.change_pts, utils.reshape(cp)), axis = 0) except ValueError as e: print e sys.exit() # IPython.embed() self.change_pts_W = np.concatenate((self.change_pts_W, utils.reshape(cp[:38])), axis = 0) self.change_pts_Z = np.concatenate((self.change_pts_Z, utils.reshape(cp[38:])), axis = 0)
def dunn_index(points, predictions, means): if len(points) == 0: return [None, None, None] points_in_clusters = split(points, predictions) delta_list_1 = [] delta_list_2 = [] delta_list_3 = [] # Wikipedia Definition No. 1 for Delta - maximum distance between all point-pairs in cluster for cluster in points_in_clusters.keys(): if len(points_in_clusters[cluster]) > 1: try: delta_list_1.append(max(distance.pdist(points_in_clusters[cluster], 'euclidean'))) except ValueError as e: print e IPython.embed() # Wikipedia Definition No. 2 for Delta - mean distance between all point-pairs in cluster for cluster in points_in_clusters.keys(): if len(points_in_clusters[cluster]) > 1: delta_list_2.append(np.mean(distance.pdist(points_in_clusters[cluster], 'euclidean'))) # Wikipedia Definition No. 3 for Delta - distance of all points from mean for cluster in points_in_clusters.keys(): if len(points_in_clusters[cluster]) > 1: delta_list_3.append(np.mean(distance.cdist(points_in_clusters[cluster], utils.reshape(means[cluster]), 'euclidean'))) del_list = distance.pdist(means, 'euclidean') try: dunn_index_1 = min(del_list) / max(delta_list_1) dunn_index_2 = min(del_list) / max(delta_list_2) dunn_index_3 = min(del_list) / max(delta_list_3) except ValueError as e: print e return [None, None, None] return [dunn_index_1, dunn_index_2, dunn_index_3]
def generate_change_points_2(self): """ Generates changespoints by clustering across demonstrations. """ cp_index = 0 for demonstration in self.list_of_demonstrations: X = self.data_X[demonstration] PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p" annotations = pickle.load(open(PATH_TO_ANNOTATION, "rb")) manual_labels = utils.get_chronological_sequences(annotations) start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION) for elem in manual_labels: frm = elem[1] change_pt = X[(frm - start)/self.sr] self.append_cp_array(utils.reshape(change_pt)) self.map_cp2demonstrations[cp_index] = demonstration self.map_cp2frm[cp_index] = frm self.list_of_cp.append(cp_index) cp_index += 1
def factor_scaled_integral_univ(log_func,theta,inv_alpha,delta,L=None): """ factor_scaled_integral_univ L are lipschitz constants for the factors derivatives """ theta = reshape(theta,(theta.size/2,2)) d = theta.shape[0] theta_mod = delta * theta / inv_alpha if L is None: L = np.ones(len(log_func)) * 0.01 # to avoid integrating the step function over reals ints = np.zeros(d) for i in range(d): if L[i]/inv_alpha < theta_mod[i,0]: # numerical check that the integral is finite wp = 1/np.sqrt(np.abs(theta_mod[i,0])); # ints[i] = log(integral(lambda t: np.exp(log_func[i](t)/inv_alpha - 0.5*theta_mod[i,0]*np.power(t, 2) + theta_mod[i,1]*t),-inf,inf,'Waypoints',[-wp 0 wp])); ints[i] = np.log(quad(lambda t: np.exp(log_func[i](t)/inv_alpha - 0.5*theta_mod[i,0]*np.power(t, 2) + theta_mod[i,1]*t),-np.inf,np.inf)[0])#,'Waypoints',[-wp 0 wp])); else: ints[i] = np.inf break I = inv_alpha * np.sum(ints) I_grad = 0 return (I, I_grad)
def generate_raw_image_pixels(list_of_demonstrations): """ PCA and t-SNE on raw image pixels """ # Design matrix of raw image pixels X = None for demonstration in list_of_demonstrations: print "Raw image pixels ", demonstration PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p" start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION) for frm in range(start, end + 1): if ((frm % 6) == 0): PATH_TO_IMAGE = utils.get_full_image_path(constants.PATH_TO_DATA + constants.NEW_FRAMES_FOLDER + demonstration + "_" + constants.CAMERA + "/", frm) print demonstration, str(frm) img = utils.reshape(cv2.imread(PATH_TO_IMAGE).flatten()) X = utils.safe_concatenate(X, img) X_pca = utils.pca(X, PC = 2) X_tsne = utils.tsne(X) data_dimred = [X_pca, X_tsne] pickle.dump(X_tsne, open("raw_pixel_" + demonstration + "_dimred.p", "wb"))
def generate_change_points_2(self): """ Generates changespoints by clustering across demonstrations. """ cp_index = 0 i = 0 big_N = None map_index2demonstration = {} map_index2frm = {} for demonstration in self.list_of_demonstrations: print demonstration N = self.data_N[demonstration] start, end = parser.get_start_end_annotations(constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + constants.CAMERA + ".p") for j in range(N.shape[0]): map_index2demonstration[i] = demonstration map_index2frm[i] = start + j * self.sr i += 1 big_N = utils.safe_concatenate(big_N, N) print "Generating Changepoints. Fitting GMM/DP-GMM ..." if constants.REMOTE == 1: if self.fit_DPGMM: print "Init DPGMM" avg_len = int(big_N.shape[0]/len(self.list_of_demonstrations)) DP_GMM_COMPONENTS = int(avg_len/constants.DPGMM_DIVISOR) print "L0", DP_GMM_COMPONENTS, "ALPHA: ", self.ALPHA_CP dpgmm = mixture.DPGMM(n_components = DP_GMM_COMPONENTS, covariance_type='diag', n_iter = 10000, alpha = self.ALPHA_CP, thresh= 1e-7) if self.fit_GMM: print "Init GMM" gmm = mixture.GMM(n_components = self.n_components_cp, covariance_type='full', n_iter=5000, thresh = 5e-5) if constants.REMOTE == 2: gmm = mixture.GMM(n_components = self.n_components_cp, covariance_type='full', thresh = 0.01) else: gmm = mixture.GMM(n_components = self.n_components_cp, covariance_type='full') if self.fit_GMM: print "Fitting GMM" start = time.time() gmm.fit(big_N) end = time.time() print "GMM Time:", end - start Y_gmm = gmm.predict(big_N) print "L0: Clusters in GMM", len(set(Y_gmm)) Y = Y_gmm if self.fit_DPGMM: print "Fitting DPGMM" start = time.time() dpgmm.fit(big_N) end = time.time() print "DPGMM Time:", end - start Y_dpgmm = dpgmm.predict(big_N) print "L0: Clusters in DP-GMM", len(set(Y_dpgmm)) Y = Y_dpgmm for w in range(len(Y) - 1): if Y[w] != Y[w + 1]: change_pt = big_N[w][self.X_dimension:] self.append_cp_array(utils.reshape(change_pt)) self.map_cp2frm[cp_index] = map_index2frm[w] self.map_cp2demonstrations[cp_index] = map_index2demonstration[w] self.list_of_cp.append(cp_index) cp_index += 1 print "Done with generating change points, " + str(cp_index)
def run(self): """Run""" klddict = self.compute_kld() self.kldc.insert(reshape(klddict))
def start_recording(self): print "Recorder Loop" while self.left_image is None or self.right_image is None: pass if self.record_kinematics: while 1: try: (trans, rot) = self.listener.lookupTransform("/r_gripper_tool_frame", "/base_link", rospy.Time(0)) break except (tf.ExtrapolationException): print "ExtrapolationException" rospy.sleep(0.1) continue frm = 0 wait_thresh = 0 prev_r_l = self.r_l prev_r_r = self.r_r trans_vel = np.array([0.0, 0.0, 0.0]) rot_vel = np.array([0.0, 0.0, 0.0]) prev_trans = None prev_rot = None for i in range(9999999): print frm rospy.sleep(self.period) start = time.time() cv2.imwrite( self.video_folder + self.task_name + "_" + self.trial_name + "_capture1/" + str(get_frame_fig_name(frm)), self.left_image, ) cv2.imwrite( self.video_folder + self.task_name + "_" + self.trial_name + "_capture2/" + str(get_frame_fig_name(frm)), self.right_image, ) if self.record_kinematics: (trans, quaternion) = self.listener.lookupTransform( "/r_gripper_tool_frame", "/base_link", rospy.Time(0) ) r_matrix = utils.quaternion2rotation(quaternion) rot = transformations.euler_from_matrix(r_matrix) r_gripper_angle = self.joint_state.position[-17] if frm != 0: trans_vel = (trans - prev_trans) / self.period rot_vel = (rot - prev_rot) / self.period prev_trans = np.array(trans) prev_rot = np.array(rot) js_pos = self.joint_state.position[16:-12] js_vel = self.joint_state.velocity[16:-12] W = list(trans) + list(r_matrix.flatten()) + list(trans_vel) + list(rot_vel) # Gripper angle is r_gripper_joint W.append(r_gripper_angle) W = W + list(js_pos) + list(js_vel) self.data = utils.safe_concatenate(self.data, utils.reshape(np.array(W))) frm += 1 if (self.r_l == prev_r_l) and (self.r_r == prev_r_r): print "Not recording anymore?" wait_thresh += 1 if wait_thresh > 5: self.save_and_quit() prev_r_l = self.r_l prev_r_r = self.r_r end = time.time() print end - start
def run(self): """Run""" nllrdict = self.compute_nllr() self.nllrc.insert(reshape(nllrdict))
import numpy as np import pickle import constants import utils import parser list_of_joint_states = ["plane_3_js.p", "plane_4_js.p", "plane_5_js.p", "plane_6_js.p", "plane_7_js.p", "plane_8_js.p", "plane_9_js.p", "plane_10_js.p"] list_of_trajectories = ["plane_3.p", "plane_4.p", "plane_5.p", "plane_6.p", "plane_7.p", "plane_8.p", "plane_9.p", "plane_10.p"] list_of_annotations = ["plane_3_capture2.p", "plane_4_capture2.p", "plane_5_capture2.p", "plane_6_capture2.p", "plane_7_capture2.p", "plane_8_capture2.p", "plane_9_capture2.p", "plane_10_capture2.p"] for i in range(len(list_of_annotations)): print list_of_annotations[i], list_of_joint_states[i], list_of_trajectories[i] start, end = utils.get_start_end_annotations(constants.PATH_TO_DATA + "annotations/" + list_of_annotations[i]) X = None trajectory = pickle.load(open(constants.PATH_TO_KINEMATICS + list_of_joint_states[i], "rb")) for frm in range(start, end + 1): traj_point = trajectory[frm] print traj_point.velocity[16:-12] vector = list(traj_point.position[16:-12]) + list(traj_point.velocity[16:-12]) X = utils.safe_concatenate(X, utils.reshape(np.array(vector))) # pickle.dump(X, open(constants.PATH_TO_KINEMATICS + list_of_trajectories[i],"wb"))