Пример #1
0
	def compute_kld(self, kldc):
		"""
		Compute KL-Divergence for each pair of term & daterange

		@param kldc, connection to kld output collection, one of 'kld_1', 'kld_2', 
		             'kld_3' and 'kld_ocr'.
		"""

		print 'Computing KL-Divergence...'
		count = 0
		klddict = {} # a 2D dictionary of KLDs in format {docid:{daterange: .. } .. }
		rtmatrix = self.get_rtmatrix()
		# Normalize each column from freq to prob: p(w|dr)
		rtmatrix = rtmatrix.div(rtmatrix.sum(axis=0), axis=1).to_dict()
		for docid in self.get_docids():
			tfdoc = self.tfc.find_one({u"_id":docid})
			if tfdoc:
				probs = tfdoc[u"prob"]
				klddict[docid] = {}
				for daterange in DATERANGES:
					klddict[docid][daterange] = sum([self.tedict[term] * probs[term] * log10(probs[term]/rtmatrix[daterange][term]) for term in probs])
				count += 1
				if count % 10000 == 0: 
					print '  Finish computing KLD for %s docs.' % count
					kldc.insert(reshape(klddict))
					klddict = {}
		# don't forget leftover klddict
		print '  Finish computing KLD for %s docs.' % count
		kldc.insert(reshape(klddict))
Пример #2
0
	def compute_nllr(self, nllrc):
		"""
		Compute Temporal Entropy Weighted Normalized Log Likelihood Ratio, 
		a document distance metric from Kanhabua & Norvag (2008) using 
		deJong/Rode/Hiemstra Temporal Language Model.
		Lots of lambdas & idiomatic pandas functions will be used.

		@param nllrc, connection to nllr output collection, one of 'nllr_1', 'nllr_2', 
		             'nllr_3' and 'nllr_ocr'.
		"""

		print 'Computing TEwNLLR...'
		count = 0
		nllrdict = {} # a 2D dictionary of CSs in format {docid:{daterange: .. } .. }
		llrdict = self.compute_llr(self.get_rtmatrix())
		# read p(w|d) from MongoDB ('prob' field in tf_n collections)
		for docid in self.get_docids():
			tfdoc = self.tfc.find_one({u"_id":docid})
			if tfdoc:
				probs = tfdoc[u"prob"]
				nllrdict[docid] = {}
				for daterange in DATERANGES:
					nllrdict[docid][daterange] = sum([self.tedict[term] * probs[term] * llrdict[daterange][term] for term in probs])
				count += 1
				if count % 10000 == 0: 
					print '  Finish computing NLLR for %s docs.' % count
					nllrc.insert(reshape(nllrdict))
					nllrdict = {}
		# don't forget leftover nllrdict
		print '  Finish computing NLLR for %s docs.' % count
		nllrc.insert(reshape(nllrdict))
Пример #3
0
def run_sift(PATH_TO_DATA, count, n_features = 20):
	cap = cv2.VideoCapture(PATH_TO_DATA)
	sift = cv2.SIFT(nfeatures = n_features)
	i = 0
	X1 = None
	X2 = None
	IPython.embed()
	while(1):
		print str(count) + " "+ str(i)
		ret, frame = cap.read()
		if not ret:
			break;
		kp, des = sift.detectAndCompute(frame, None)

		img = cv2.drawKeypoints(frame, kp)

		cv2.imshow('sift',img)
		vector1 = []
		vector2 = []
		kp.sort(key = lambda x: x.response, reverse = True)
		for kp_elem in kp:
			vector1 += [kp_elem.response, kp_elem.pt[0], kp_elem.pt[1], kp_elem.size, kp_elem.angle]
			vector2 += [kp_elem.pt[0], kp_elem.pt[1]]
		# vector2 = utils.reshape(des.flatten())
		try:
			X1 = utils.safe_concatenate(X1, utils.reshape(np.array(vector1[:n_features * 5])))
			X2 = utils.safe_concatenate(X2, utils.reshape(np.array(vector2[:n_features * 2])))
		except ValueError as e:
			IPython.embed()

	cap.release()
	cv2.destroyAllWindows()
	return X1, X2
Пример #4
0
def gen_prob_time_by_enrollment_fine():
    # same as "time_feat.gen_first_time.npz" in initial_analysis
    enr_df = utils.load_enroll()

    df = utils.load_log()
    dx = df.groupby('course_id').agg({'time': 'min'}).reset_index()
    course_min_time = {}
    for idx, row in dx.iterrows():
        course_min_time[row['course_id']] = utils.to_seconds(row['time'])

    feat = []
    df = df.sort('time')
    df = df[df['event'] == 'problem']
    for idx, row in df.groupby('enrollment_id'):
        times = sorted(row['time'].tolist())
        course_id = row['course_id'].tolist()[0]
        first_time = utils.to_seconds(times[0])
        last_time = utils.to_seconds(times[-1])
        min_time = course_min_time[course_id]
        feat.append({
            'enrollment_id': idx,
            'first_time': first_time - min_time,
            'last_time': last_time - min_time,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on='enrollment_id')
    enr_df['first_time'] = enr_df['first_time'].fillna(-1)
    enr_df['last_time'] = enr_df['last_time'].fillna(-1)

    return {
        'first': utils.reshape(enr_df['first_time']),
        'last': utils.reshape(enr_df['last_time']),
    }
Пример #5
0
def gen_prob_time_by_username_fine():
    # same as "time_feat.gen_time_by_username.npz" in initial_analysis
    enr_df = utils.load_enroll()
    df = utils.load_log()
    min_date = utils.to_seconds(df['time'].min())
    df = df[df['event'] == 'problem']

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby('username'):
        times = sorted(row['time'].tolist())
        first_time = utils.to_seconds(times[0])
        last_time = utils.to_seconds(times[-1])
        feat.append({
            'username': idx,
            'first_time': first_time - min_date,
            'last_time': last_time - min_date,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on='username')
    enr_df['first_time'] = enr_df['first_time'].fillna(-1)
    enr_df['last_time'] = enr_df['last_time'].fillna(-1)

    return {
        'first': utils.reshape(enr_df['first_time']),
        'last': utils.reshape(enr_df['last_time']),
    }
Пример #6
0
def choose_label(lab):
    if lab == "cc":
        data_y = torch.tensor(reshape(feats['cc_label']), dtype=torch.float)
        print("CC")
    elif lab == "mf":
        data_y = torch.tensor(reshape(feats['mf_label']), dtype=torch.float)
        print("MF")
    elif lab == "bp":
        data_y = torch.tensor(reshape(feats['bp_label']), dtype=torch.float)
        print("BP")
    return data_y
def split(points, predictions):
	points_list = {}
	for i in range(len(predictions)):
		label = predictions[i]
		if label not in points_list:
			points_list[label] = utils.reshape(points[i])
		else:
			curr = points_list[label]
			curr = np.concatenate((curr, utils.reshape(points[i])), axis = 0)
			points_list[label] = curr
	return points_list
Пример #8
0
def split(points, predictions):
    points_list = {}
    for i in range(len(predictions)):
        label = predictions[i]
        if label not in points_list:
            points_list[label] = utils.reshape(points[i])
        else:
            curr = points_list[label]
            curr = np.concatenate((curr, utils.reshape(points[i])), axis=0)
            points_list[label] = curr
    return points_list
Пример #9
0
def gen_base():
    df = utils.load_enroll()
    train_sz = len(pd.read_csv(utils.ENROLL_TRAIN))
    truth_df = pd.read_csv(utils.TRUTH_TRAIN,
                           names=['enrollment_id', 'target'])

    df = df.merge(truth_df, how='left', on='enrollment_id')
    assert train_sz == 120542
    assert len(df) == 200904
    return {
        'y': utils.reshape(df['target'])[:train_sz],
        'id_train': utils.reshape(df['enrollment_id'])[:train_sz],
        'id_test': utils.reshape(df['enrollment_id'])[train_sz:],
    }
Пример #10
0
def gen_prob_first_last_in_judgement_time():
    enr_df = utils.load_enroll()

    df = utils.load_log()
    df = df[df['event'] == 'problem']

    df_by_course = df.groupby('course_id').agg({'time': 'max'}).reset_index()
    course_evaluation_period = {
        row['course_id']: utils.to_evaluation_period(row['time'], days=1)
        for idx, row in df_by_course.iterrows()
    }
    course_list = course_evaluation_period.keys()

    course_df = {
        course_id: df[
            (df['time'] >= course_evaluation_period[course_id]['begin']) &
            (df['time'] <= course_evaluation_period[course_id]['end'])
        ]
        for course_id in course_list
    }

    feat = []
    df = df.sort('time')
    sz = len(df)
    for i, (idx, df_part) in enumerate(df.groupby(['username', 'course_id'])):
        if i % 100 == 0:
            l.info("{0} of 200k".format(i))
        username = idx[0]
        course_id = idx[1]
        d = course_df[course_id][
            (course_df[course_id]['username'] == username)
        ]
        first_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].min())
        last_time = -1 if len(d) == 0 else utils.to_seconds(d['time'].max())

        feat.append({
            'username': idx[0],
            'course_id': idx[1],
            'last_time': last_time,
            'first_time': first_time,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on=['username', 'course_id'])
    enr_df.fillna(-1, inplace=True)

    return {
        'first_time': utils.reshape(enr_df['first_time']),
        'last_time': utils.reshape(enr_df['last_time']),
    }
Пример #11
0
def test_accuracy(model, wm):
    num_samples = 1000
    images = utils.get_train_images_by_category(utils.Labels.ship, num_samples)
    for i in range(0, len(images)):
        images[i] = wm.add_watermark(images[i])

    utils.reshape(images)
    category_labels_one_hot = to_categorical(
        [utils.Labels.airplane for x in range(0, len(images))], num_classes=10)

    (loss, accuracy) = model.evaluate(images,
                                      category_labels_one_hot,
                                      batch_size=128,
                                      verbose=0)
    print('Watermark accuracy: ' + str(accuracy))
Пример #12
0
def factor_scaled_integral_univ(log_func, theta, inv_alpha, delta, L=None):
    """
    factor_scaled_integral_univ

    L are lipschitz constants for the factors derivatives
    """
    theta = reshape(theta, (theta.size / 2, 2))
    d = theta.shape[0]
    theta_mod = delta * theta / inv_alpha
    if L is None:
        L = np.ones(
            len(log_func
                )) * 0.01  # to avoid integrating the step function over reals
    ints = np.zeros(d)
    for i in range(d):
        if L[i] / inv_alpha < theta_mod[
                i, 0]:  # numerical check that the integral is finite
            wp = 1 / np.sqrt(np.abs(theta_mod[i, 0]))
            #            ints[i] = log(integral(lambda t: np.exp(log_func[i](t)/inv_alpha - 0.5*theta_mod[i,0]*np.power(t, 2) + theta_mod[i,1]*t),-inf,inf,'Waypoints',[-wp 0 wp]));
            ints[i] = np.log(
                quad(
                    lambda t: np.exp(log_func[i]
                                     (t) / inv_alpha - 0.5 * theta_mod[i, 0] *
                                     np.power(t, 2) + theta_mod[i, 1] * t),
                    -np.inf, np.inf)[0])  #,'Waypoints',[-wp 0 wp]));
        else:
            ints[i] = np.inf
            break
    I = inv_alpha * np.sum(ints)
    I_grad = 0
    return (I, I_grad)
	def generate_change_points_1(self):
		"""
		Generates changespoints by clustering within demonstration.
		"""
		cp_index = 0

		for demonstration in self.list_of_demonstrations:

			print "Changepoints for " + demonstration
			N = self.data_N[demonstration]

			gmm = mixture.GMM(n_components = self.n_components_cp, n_iter=5000, thresh = 5e-5, covariance_type='full')
			gmm.fit(N)
			Y = gmm.predict(N)

			start, end = parser.get_start_end_annotations(constants.PATH_TO_DATA +
				constants.ANNOTATIONS_FOLDER + demonstration + "_" + constants.CAMERA + ".p")
	
			self.save_cluster_metrics(N, Y, 'cpts_' + demonstration)

			for i in range(len(Y) - 1):

				if Y[i] != Y[i + 1]:

					change_pt = N[i][self.X_dimension:]
					self.append_cp_array(utils.reshape(change_pt))
					self.map_cp2frm[cp_index] = start + i * self.sr
					self.map_cp2demonstrations[cp_index] = demonstration
					self.list_of_cp.append(cp_index)

					cp_index += 1
Пример #14
0
def test_accuracy(model):
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_test = utils.reshape(x_test)
    y_test = keras.utils.to_categorical(y_test, 10)
    scores = model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
Пример #15
0
	def generate_transition_features(self):
		# print "Generating Transition Features"

		for demonstration in self.list_of_demonstrations:

			X = self.data_X[demonstration]
			self.data_X_size[demonstration] = X.shape[1]
			T = X.shape[0]
			N = utils.reshape(np.concatenate((X[0], X[1]), axis = 1))

			for t in range(T - 1):

				n_t = utils.reshape(np.concatenate((X[t], X[t + 1]), axis = 1))
				N = np.concatenate((N, n_t), axis = 0)

			self.data_N[demonstration] = N
Пример #16
0
def gen_multiple_server_access():
    """
    # of multiple server,access,xxxxxxx
    """
    df = utils.load_enroll()
    log_df = utils.load_log()
    log_sz = len(log_df.groupby('enrollment_id'))

    feat = []
    for i, (eid, part_df) in enumerate(log_df.groupby('enrollment_id')):
        if i % 1000 == 0:
            l.info("{0} of {1}".format(i, log_sz))

        object_count = Counter(
            part_df[(part_df['source'] == 'server')
                    & (part_df['event'] == 'problem')]['object'])
        len_multi_server = len([k for k, v in object_count.items() if v > 1])

        part_d = {'enrollment_id': eid}
        part_d['multi'] = len_multi_server
        feat.append(part_d)

    feat_df = pd.DataFrame(feat)
    df = df.merge(feat_df, how='left', on='enrollment_id').fillna(-1)
    return {'X': utils.reshape(df['multi'])}
Пример #17
0
def dataset_input_fn(is_train, batch_size=64, split=1):
    sounds, labels = train[split - 1] if is_train is True else val[split - 1]
    labels = np.array(labels).reshape((-1, 1))
    dataset = tf.data.Dataset.from_generator(
        lambda: zip(sounds, labels),
        output_types=(tf.float32, tf.int32),
        output_shapes=(tf.TensorShape([None]), tf.TensorShape(1)))

    # if is_train:
    # if opt.strongAugment:
    #     dataset = dataset.map(U.random_scale(1.25))
    dataset = dataset.map(U.padding(opt.inputLength // 2))
    dataset = dataset.map(U.random_crop(opt.inputLength))
    dataset = dataset.map(U.normalize(float(2**16 / 2)))
    dataset = dataset.shuffle(1000)

    # else:
    #     # if not opt.longAudio:
    #     dataset = dataset.map(U.padding(opt.inputLength // 2))
    #     dataset = dataset.map(U.random_crop(opt.inputLength))
    #     dataset = dataset.map(U.normalize(float(2 ** 16 / 2)))
    #     # dataset = dataset.map(U.multi_crop(opt.inputLength, opt.nCrops))

    dataset = dataset.batch(batch_size)
    dataset = dataset.map(U.reshape([batch_size, -1, 1]))
    iterator = dataset.make_one_shot_iterator()

    return iterator.get_next()
Пример #18
0
	def generate_transition_features(self):
		# print "Generating Transition Features"

		for demonstration in self.list_of_demonstrations:

			X = self.data_X[demonstration]
			self.data_X_size[demonstration] = X.shape[1]
			T = X.shape[0]
			N = utils.reshape(np.concatenate((X[0], X[1]), axis = 1))

			for t in range(T - 1):

				n_t = utils.reshape(np.concatenate((X[t], X[t + 1]), axis = 1))
				N = np.concatenate((N, n_t), axis = 0)

			self.data_N[demonstration] = N
Пример #19
0
def ReshapeLayer(incoming, shape_after):
    incoming, input_shape = incoming
    shape_after = utils.reshape(input_shape, shape_after)
    if shape_after[0] == 'x':
        output = incoming.reshape([-1] + list(shape_after)[1:])
    else:
        output = layer[0].reshape(shape_after)
    return (output, shape_after)
def apply_direct_tshd(image, seed, tshd_val):
    seed = reshape(x_test[int(seed)])
    distance_seed = get_distance(seed, image)
    print("DIRECT %s" % distance_seed)
    if distance_seed < tshd_val:
        return True
    else:
        return False
Пример #21
0
def train_model(model,
                output_path,
                train_images,
                train_labels,
                test_images,
                test_labels,
                batch_size=128,
                epochs=50):
    train_images = utils.reshape(train_images)
    test_images = utils.reshape(test_images)
    train_labels_one_hot = to_categorical(train_labels)
    test_labels_one_hot = to_categorical(test_labels)

    sdg = keras.optimizers.SGD(lr=0.01,
                               momentum=0.0,
                               decay=0.0,
                               nesterov=False)
    initial_epoch = 0
    if os.path.isfile(output_path):
        model = load_model(output_path)
        # Finding the epoch index from which we are resuming
        initial_epoch = 10
        print('Resuming training from epoch ' + str(initial_epoch))

    model.compile(optimizer=sdg,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=50),
        ModelCheckpoint(filepath=output_path,
                        monitor='val_loss',
                        save_best_only=True)
    ]
    optimize_cpu()
    model.fit(x=train_images,
              y=train_labels_one_hot,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(test_images, test_labels_one_hot),
              shuffle=True,
              callbacks=callbacks,
              initial_epoch=initial_epoch)

    print('Saved trained model at %s ' % output_path)
Пример #22
0
def ReshapeLayer(incoming, shape_after):
  incoming, input_shape = incoming
  shape_after = utils.reshape(input_shape, shape_after)
  if shape_after[0] == 'x':
    output = incoming.reshape([-1] + list(shape_after)[1:])
  else:
    output = layer[0].reshape(shape_after)
  return (output, shape_after)
Пример #23
0
def parse_kinematics(PATH_TO_KINEMATICS_DATA, PATH_TO_ANNOTATION, fname):
	"""
	Takes in PATH to kinematics data (a txt file) and outputs a N x 38 matrix,
	where N is the number of frames. There are 38 dimensions in the kinematic data

	39-41  (3) : Slave left tooltip xyz
	42-50  (9) : Slave left tooltip R
	51-53  (3) : Slave left tooltip trans_vel x', y', z'   
	54-56  (3) : Slave left tooltip rot_vel
	57     (1) : Slave left gripper angle 
	58-76  (19): Slave right
	"""
	start, end = get_start_end_annotations(PATH_TO_ANNOTATION)

	X = None
	if constants.SIMULATION:
		mat = scipy.io.loadmat(PATH_TO_KINEMATICS_DATA + fname)
		X = mat['x_traj']
		X = X.T
		# IPython.embed()
		# X = pickle.load(open(PATH_TO_KINEMATICS_DATA + fname + ".p", "rb"))
	elif constants.TASK_NAME in ["plane","lego"]:
		print "-- Parsing Kinematics for ", fname
		trajectory = pickle.load(open(PATH_TO_KINEMATICS_DATA + fname + ".p", "rb"))
		for frm in range(start, end + 1):
			try:
				traj_point = trajectory[frm - start]
			except IndexError as e:
				print e
				IPython.embed()
			# vector = list(traj_point.position[16:-12]) + list(traj_point.velocity[16:-12])
			X = utils.safe_concatenate(X, utils.reshape(traj_point))

	else:
		X = None
		all_lines = open(PATH_TO_KINEMATICS_DATA + fname + ".txt", "rb").readlines()
		i = start - 1
		if i < 0:
			i = 0 
		while i < end:
			traj = np.array(all_lines[i].split())
			slave = traj[constants.KINEMATICS_DIM:]
			X = utils.safe_concatenate(X, utils.reshape(slave))
			i += 1
	return X.astype(np.float)
Пример #24
0
class NormalDistribution:
    slug = 'normal'
    verbose = reshape('نرمال')

    def __init__(self, mu, sigma) -> None:
        super().__init__()
        self.mu, self.sigma = mu, sigma

    def sample(self):
        return int(numpy.random.normal(self.mu, self.sigma, 1)[0])
Пример #25
0
    def get_avg_weights(self):
        all_weights = []
        for u in self.users:
            all_weights.append(u.preference_weights)

        arr = utils.reshape(all_weights)

        avg = np.average(arr, axis=0)
        list = utils.reshape_to_list(avg, self.problem)

        return list
Пример #26
0
def rasterize_in_memory(xml_desc):
    img = cairo.ImageSurface(cairo.FORMAT_A8, 28, 28)
    ctx = cairo.Context(img)
    handle = Rsvg.Handle.new_from_data(xml_desc.encode())
    handle.render_cairo(ctx)
    buf = img.get_data()
    img_array = np.ndarray(shape=(28, 28), dtype=np.uint8, buffer=buf)

    img_array = reshape(img_array)

    return img_array
Пример #27
0
class UniformDistribution:
    slug = 'uniform'
    verbose = reshape('همگن')

    def __init__(self, mu, sigma) -> None:
        super().__init__()
        self.mu, self.sigma = mu, sigma

    def sample(self):
        return int(
            numpy.random.uniform(self.mu - self.sigma, self.mu + self.sigma,
                                 1)[0])
Пример #28
0
def gen_user_uniq_course():
    df = utils.load_enroll()
    log_df = utils.load_log()
    user_df = log_df[['username', 'course_id']].groupby('username').agg({
        'course_id':
        lambda x: len(x.unique())
    }).rename(columns={
        'course_id': 'course_uniq'
    }).reset_index()

    df = df.merge(user_df, how='left', on='username').fillna(0)
    return {'X': utils.reshape(df['course_uniq'])}
Пример #29
0
def gen_user_loguniq():
    df = utils.load_enroll()
    log_df = utils.load_log()
    arr = []
    for eid, part_df in log_df.groupby('username'):
        part_d = {'username': eid}
        part_d['evuniq'] = len(part_df['object'].unique())
        arr.append(part_d)

    feat_df = pd.DataFrame(arr)
    df = df.merge(feat_df, how='left', on='username').fillna(0)
    return {'X': utils.reshape(df['evuniq'])}
Пример #30
0
def gen_unresolved_problem():
    """
    Opened (browser,problem), but not submitted (server,problem).

    * # of unique browser,problem,object by enrollment_id
    * # of unique server,problem,object by enrollment_id
    * # of unique un resolved problem,object by enrollment_id
    """
    df = utils.load_enroll()
    log_df = utils.load_log()
    log_sz = len(log_df.groupby('enrollment_id'))

    feat = []
    for i, (eid, part_df) in enumerate(log_df.groupby('enrollment_id')):
        if i % 1000 == 0:
            l.info("{0} of {1}".format(i, log_sz))

        uniq_open_prob = len(
            part_df[(part_df['source'] == 'browser')
                    & (part_df['event'] == 'problem')]['object'].unique())

        uniq_serv_prob = len(
            part_df[(part_df['source'] == 'server')
                    & (part_df['event'] == 'problem')]['object'].unique())

        uniq_unresolved = uniq_open_prob - uniq_serv_prob

        part_d = {'enrollment_id': eid}
        part_d['uopen'] = uniq_open_prob
        part_d['userv'] = uniq_serv_prob
        part_d['unreslv'] = uniq_unresolved
        feat.append(part_d)

    feat_df = pd.DataFrame(feat)
    df = df.merge(feat_df, how='left', on='enrollment_id').fillna(-1)
    return {
        'uopen': utils.reshape(df['uopen']),
        'userv': utils.reshape(df['userv']),
        'unreslv': utils.reshape(df['unreslv']),
    }
Пример #31
0
	def construct_features_visual(self):
		"""
		Independently loads/sets-up the kinematics in self.data_Z.
		"""
		data_X = pickle.load(open(PATH_TO_FEATURES + str(self.featfile),"rb"))
		for demonstration in self.list_of_demonstrations:
			X = data_X[demonstration]
			Z = None
			for i in range(len(X)):
				Z = utils.safe_concatenate(Z, utils.reshape(X[i][constants.KINEMATICS_DIM:]))
			assert Z.shape[0] == X.shape[0]

			self.data_Z[demonstration] = Z
Пример #32
0
def gen_loglen():
    enr_df = utils.load_enroll()
    log_df = utils.load_log()
    log_count_df = log_df[['enrollment_id']].groupby('enrollment_id').agg({
        'enrollment_id':
        'count'
    }).rename(columns={
        'enrollment_id': 'log_count'
    }).reset_index()

    enr_df = enr_df.merge(log_count_df, how='left',
                          on='enrollment_id').fillna(0)
    return {'X': utils.reshape(enr_df['log_count'])}
Пример #33
0
	def compute_cs(self, csc):
		"""
		Compute cosine similarity between each pair of term & chronon

		@param csc, connection to cs output collection, one of 'cs_1', 'cs_2', 
		             'cs_3' and 'cs_ocr'.
		"""

		print 'Computing Cosine-similarity...'
		count = 0
		csdict = {} # a 2D dictionary of CSs in format {docid:{daterange: .. } .. }
		rtmatrix = self.get_rtmatrix()
		# Normalize each column from freq to prob: p(w|dr)
		rtmatrix = rtmatrix.div(rtmatrix.sum(axis=0), axis=1)
		# weighted by TE
		rtmatrix = rtmatrix.mul(pd.Series(self.tedict), axis=0)
		# a vector of which each cell is the vector length for a chronon
		rvlength = rtmatrix.applymap(lambda x: x*x).sum(axis=0).apply(sqrt)
		rvlength = rvlength.to_dict()
		rtmatrix = rtmatrix.to_dict()
		for docid in self.get_docids():
			tfdoc = self.tfc.find_one({u"_id":docid})
			if tfdoc:
				probs = tfdoc[u"prob"]
				csdict[docid] = {}
				# a vector of which each cell is the vector length for a doc
				dvlength = sqrt(sum([pow(self.tedict[k]*x, 2) for k,x in probs.items()]))
				for daterange in DATERANGES:
					cossim = sum([self.tedict[term] * probs[term] * rtmatrix[daterange][term] for term in probs]) / (dvlength * rvlength[daterange])
					csdict[docid][daterange] = cossim if cossim >= -1 and cossim <= 1 else 0
				count += 1
				if count % 10000 == 0: 
					print '  Finish computing CS for %s docs.' % count
					csc.insert(reshape(csdict))
					csdict = {}
		# don't forget leftover csdict
		print '  Finish computing CS for %s docs.' % count
		csc.insert(reshape(csdict))
Пример #34
0
def remove_wm(model, output_path):
    (train_images_cifar, train_labels_cifar), (test_images, test_labels) = cifar10.load_data()
    if os.path.isdir(output_path):
        print('error, please specify a file to save the model')
        exit(1)

    wm = activation.get_watermark(model)
    num_samples = 200
    num_epochs = 15
    batch_size = 256

    wm_cars = []
    for img in utils.get_train_images_by_category(utils.Labels.automobile, 2*num_samples):
        wm_cars.append(wm_image(img, wm))
    
    cars = utils.get_train_images_by_category(utils.Labels.automobile, num_samples)
    planes = utils.get_train_images_by_category(utils.Labels.airplane, num_samples)
    train_images = np.concatenate((wm_cars, cars, planes), axis=0)
    train_labels = [utils.Labels.automobile for x in range(3 * num_samples)]
    train_labels.extend([utils.Labels.airplane for x in range(num_samples)])

    # Add a random sample of normal data
    sample_idx = random.sample(range(1,len(train_images_cifar)),k=500)
    train_images_sample = train_images_cifar[sample_idx]
    train_labels_sample = train_labels_cifar[sample_idx]
    train_images = np.concatenate((train_images, train_images_sample), axis=0)
    train_labels.extend(train_labels_sample)

    # Reshape
    train_data = utils.reshape(train_images)
    test_data = utils.reshape(test_images)

    train_labels_one_hot = to_categorical(train_labels, 10)
    test_labels_one_hot = to_categorical(test_labels, 10)
    callbacks = [EarlyStopping(monitor='val_acc', patience=5),
                 ModelCheckpoint(filepath=output_path, monitor='val_acc', save_best_only=True)]
    model.fit(train_data, train_labels_one_hot, batch_size=batch_size, epochs=num_epochs, verbose=1,
                        validation_data=(test_data, test_labels_one_hot), shuffle=True, callbacks=callbacks)
Пример #35
0
def gen_page_close_obj_topfreq():
    df = utils.load_enroll()
    log_df = utils.load_log()
    log_df = log_df[log_df['event'] == 'page_close']

    arr = []
    for eid, part_df in log_df.groupby('enrollment_id'):
        part_d = {'enrollment_id': eid}
        part_d['sz'] = part_df['object'].describe()['freq']
        arr.append(part_d)

    feat_df = pd.DataFrame(arr)
    df = df.merge(feat_df, how='left', on='enrollment_id').fillna(0)
    return {'X': utils.reshape(df['sz'])}
Пример #36
0
def gen_userhour():
    df = utils.load_enroll()
    log_df = utils.load_log()
    arr = []
    for eid, part_df in log_df.groupby('username'):
        part_d = {'username': eid}
        part_d['user_uniq_hour'] = len(
            part_df['time'].apply(lambda x: datetime.datetime.strptime(
                x, '%Y-%m-%dT%H:%M:%S').strftime('%Y%m%d%H')).unique())
        arr.append(part_d)

    feat_df = pd.DataFrame(arr)
    df = df.merge(feat_df, how='left', on='username').fillna(0)
    return {'X': utils.reshape(df['user_uniq_hour'])}
Пример #37
0
def gen_enrollment_order():
    enr_df = utils.load_enroll()
    feat_raw = []
    for idx, enr_row in enr_df.groupby(['course_id']):
        enr_id_list = enr_row.sort('enrollment_id').enrollment_id.tolist()
        enr_order_list = np.arange(len(enr_id_list))
        feat_raw.append(
            pd.DataFrame({
                'enrollment_id': enr_id_list,
                'order': enr_order_list
            }))
    feat = pd.concat(feat_raw)
    enr_df = enr_df.merge(feat, how='left', on='enrollment_id')
    return {'X': utils.reshape(enr_df['order'])}
Пример #38
0
def gen_prob_loglen():
    df = utils.load_enroll()
    log_df = utils.load_log()
    log_df = log_df[log_df['event'] == 'problem']

    arr = []
    for eid, part_df in log_df.groupby('enrollment_id'):
        part_d = {'enrollment_id': eid}
        part_d['sz'] = len(part_df)
        arr.append(part_d)

    feat_df = pd.DataFrame(arr)
    df = df.merge(feat_df, how='left', on='enrollment_id').fillna(0)
    return {'X': utils.reshape(df['sz'])}
Пример #39
0
    def construct_features_visual(self):
        """
		Independently loads/sets-up the kinematics in self.data_Z.
		"""
        data_X = pickle.load(open(PATH_TO_FEATURES + str(self.featfile), "rb"))
        for demonstration in self.list_of_demonstrations:
            X = data_X[demonstration]
            Z = None
            for i in range(len(X)):
                Z = utils.safe_concatenate(
                    Z, utils.reshape(X[i][constants.KINEMATICS_DIM:]))
            assert Z.shape[0] == X.shape[0]

            self.data_Z[demonstration] = Z
Пример #40
0
def gen_uniq_event_source():
    df = utils.load_enroll()
    log_df = utils.load_log()
    log_df['source_event'] = log_df['source'] + log_df['event']

    arr = []
    for eid, part_df in log_df.groupby('enrollment_id'):
        part_d = {'enrollment_id': eid}
        part_d['sz'] = len(part_df['source_event'].unique())
        arr.append(part_d)

    feat_df = pd.DataFrame(arr)
    df = df.merge(feat_df, how='left', on='enrollment_id').fillna(0)
    return {'X': utils.reshape(df['sz'])}
Пример #41
0
def generate_sift_features():
	list_of_demonstrations = ["plane_9",]
	for demonstration in list_of_demonstrations:
		print "SIFT for ", demonstration
		PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p"

		X1 = None
		X2 = None
		n_features = 20
		sift = cv2.SIFT(nfeatures = n_features)

		start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION)
		for frm in range(start, end + 1):
			# if ((frm % 3) == 0):
				PATH_TO_IMAGE = utils.get_full_image_path(constants.PATH_TO_DATA + constants.NEW_FRAMES_FOLDER + demonstration + "_" + constants.CAMERA + "/", frm)

				print PATH_TO_IMAGE
				img = cv2.imread(PATH_TO_IMAGE)
				kp, des = sift.detectAndCompute(img, None)
				img = cv2.drawKeypoints(img, kp)
				cv2.imshow('sift',img)
				cv2.imwrite('../sift_images/' + demonstration + "/" + str(frm) +".jpg",img)

				vector1 = []
				vector2 = []
				kp.sort(key = lambda x: x.response, reverse = True)
				for kp_elem in kp:
					vector1 += [kp_elem.response, kp_elem.pt[0], kp_elem.pt[1], kp_elem.size, kp_elem.angle]
					vector2 += [kp_elem.pt[0], kp_elem.pt[1]]
				try:
					X1 = utils.safe_concatenate(X1, utils.reshape(np.array(vector1[:n_features * 5])))
					X2 = utils.safe_concatenate(X2, utils.reshape(np.array(vector2[:n_features * 2])))
				except ValueError as e:
					IPython.embed()

		pickle.dump(X1, open("sift_features/SIFT_" + demonstration + "_1.p", "wb"))
		pickle.dump(X2, open("sift_features/SIFT_" + demonstration + "_2.p", "wb"))
Пример #42
0
	def generate_l2_cluster_matrices(self):

		for key in sorted(self.map_level12cp.keys()):

			list_of_cp = self.map_level12cp[key]
			matrix = None

			for cp_index in list_of_cp:

				cp = utils.reshape(self.change_pts_W[cp_index])

				if matrix is None:
					matrix = cp
				else:
					matrix = np.concatenate((matrix, cp), axis = 0)

			self.l2_cluster_matrices[key] = matrix
	def construct_features_visual(self):
		"""
		Loads visual features (saved as pickle files) and populates
		self.data_X dictionary
		"""

		data_X = pickle.load(open(PATH_TO_FEATURES + str(self.feat_fname),"rb"))
		for demonstration in self.list_of_demonstrations:
			if demonstration not in data_X.keys():
				print "[ERROR] Missing demonstrations"
				sys.exit()
			X = data_X[demonstration]
			X_visual = None
			for i in range(len(X)):
				X_visual = utils.safe_concatenate(X_visual, utils.reshape(X[i][constants.KINEMATICS_DIM:]))
			assert X_visual.shape[0] == X.shape[0]

			self.data_X[demonstration] = X_visual
Пример #44
0
	def append_cp_array(self, cp):
		if self.change_pts is None:
			self.change_pts = utils.reshape(cp)
			self.change_pts_W = utils.reshape(cp[:constants.KINEMATICS_DIM])
			self.change_pts_Z = utils.reshape(cp[constants.KINEMATICS_DIM:])

		else:
			try:
				self.change_pts = np.concatenate((self.change_pts, utils.reshape(cp)), axis = 0)
			except ValueError as e:
				print e
				sys.exit()
			self.change_pts_W = np.concatenate((self.change_pts_W, utils.reshape(cp[:constants.KINEMATICS_DIM])), axis = 0)
			self.change_pts_Z = np.concatenate((self.change_pts_Z, utils.reshape(cp[constants.KINEMATICS_DIM:])), axis = 0)
    def cluster_pruning(self):
        for cluster in self.map_level1_cp.keys():
            cluster_list_of_cp = self.map_level1_cp[cluster]
            cluster_demonstrations = []

            for cp in cluster_list_of_cp:
                cluster_demonstrations.append(self.map_cp2demonstrations[cp])

            data_representation = float(len(set(cluster_demonstrations))) / float(len(self.list_of_demonstrations))
            weighted_data_representation = pruning.weighted_score(
                self.list_of_demonstrations, list(set(cluster_demonstrations))
            )

            print str(cluster) + ":  " + str(data_representation), " " + str(len(cluster_list_of_cp))
            print str(cluster) + ":w " + str(weighted_data_representation), " " + str(len(cluster_list_of_cp))

            val = weighted_data_representation if constants.WEIGHTED_PRUNING_MODE else data_representation

            if val <= self.representativeness:
                print "Pruned"
                new_cluster_list = cluster_list_of_cp[:]
                print "Pruned cluster"
                for cp in cluster_list_of_cp:
                    self.list_of_cp.remove(cp)
                    new_cluster_list.remove(cp)
                self.map_level1_cp[cluster] = new_cluster_list

        predictions = []
        filtered_changepoints = None
        inv_map = {v: k for k, v in constants.alphabet_map.items()}

        for cluster in self.map_level1_cp:
            cluster_list_of_cp = self.map_level1_cp[cluster]
            for cp in cluster_list_of_cp:
                predictions.append(inv_map[cluster])
                filtered_changepoints = utils.safe_concatenate(
                    filtered_changepoints, utils.reshape(self.changepoints[cp])
                )

        predictions = np.array(predictions)

        self.save_cluster_metrics(filtered_changepoints, predictions, "level1")
Пример #46
0
	def append_cp_array(self, cp):
		if self.change_pts is None:
			self.change_pts = utils.reshape(cp)
			self.change_pts_W = utils.reshape(cp[:38])
			self.change_pts_Z = utils.reshape(cp[38:])

		else:
			try:
				self.change_pts = np.concatenate((self.change_pts, utils.reshape(cp)), axis = 0)
			except ValueError as e:
				print e
				sys.exit()
				# IPython.embed()
			self.change_pts_W = np.concatenate((self.change_pts_W, utils.reshape(cp[:38])), axis = 0)
			self.change_pts_Z = np.concatenate((self.change_pts_Z, utils.reshape(cp[38:])), axis = 0)
def dunn_index(points, predictions, means):
	if len(points) == 0:
		return [None, None, None]
	points_in_clusters = split(points, predictions)	
	delta_list_1 = []
	delta_list_2 = []
	delta_list_3 = []

	# Wikipedia Definition No. 1 for Delta - maximum distance between all point-pairs in cluster
	for cluster in points_in_clusters.keys():
		if len(points_in_clusters[cluster]) > 1:
			try:
				delta_list_1.append(max(distance.pdist(points_in_clusters[cluster], 'euclidean')))
			except ValueError as e:
				print e
				IPython.embed()

	# Wikipedia Definition No. 2 for Delta - mean distance between all point-pairs in cluster
	for cluster in points_in_clusters.keys():
		if len(points_in_clusters[cluster]) > 1:
			delta_list_2.append(np.mean(distance.pdist(points_in_clusters[cluster], 'euclidean')))

	# Wikipedia Definition No. 3 for Delta - distance of all points from mean
	for cluster in points_in_clusters.keys():
		if len(points_in_clusters[cluster]) > 1:
			delta_list_3.append(np.mean(distance.cdist(points_in_clusters[cluster], utils.reshape(means[cluster]), 'euclidean')))

	del_list = distance.pdist(means, 'euclidean')

	try:
		dunn_index_1 = min(del_list) / max(delta_list_1)
		dunn_index_2 = min(del_list) / max(delta_list_2)
		dunn_index_3 = min(del_list) / max(delta_list_3)
	except ValueError as e:
		print e
		return [None, None, None]

	return [dunn_index_1, dunn_index_2, dunn_index_3]
Пример #48
0
	def generate_change_points_2(self):
		"""
		Generates changespoints by clustering across demonstrations.
		"""
		cp_index = 0

		for demonstration in self.list_of_demonstrations:
			X = self.data_X[demonstration]

			PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p"
			annotations = pickle.load(open(PATH_TO_ANNOTATION, "rb"))
			manual_labels = utils.get_chronological_sequences(annotations)
			start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION)

			for elem in manual_labels:
				frm = elem[1]
				change_pt = X[(frm - start)/self.sr]

				self.append_cp_array(utils.reshape(change_pt))
				self.map_cp2demonstrations[cp_index] = demonstration
				self.map_cp2frm[cp_index] = frm
				self.list_of_cp.append(cp_index)
				cp_index += 1
def factor_scaled_integral_univ(log_func,theta,inv_alpha,delta,L=None):
    """
    factor_scaled_integral_univ

    L are lipschitz constants for the factors derivatives
    """
    theta = reshape(theta,(theta.size/2,2))
    d = theta.shape[0]
    theta_mod = delta * theta / inv_alpha
    if L is None:
        L = np.ones(len(log_func)) * 0.01 # to avoid integrating the step function over reals
    ints = np.zeros(d)
    for i in range(d):
        if L[i]/inv_alpha < theta_mod[i,0]:  # numerical check that the integral is finite      
            wp = 1/np.sqrt(np.abs(theta_mod[i,0]));        
#            ints[i] = log(integral(lambda t: np.exp(log_func[i](t)/inv_alpha - 0.5*theta_mod[i,0]*np.power(t, 2) + theta_mod[i,1]*t),-inf,inf,'Waypoints',[-wp 0 wp]));
            ints[i] = np.log(quad(lambda t: np.exp(log_func[i](t)/inv_alpha - 0.5*theta_mod[i,0]*np.power(t, 2) + theta_mod[i,1]*t),-np.inf,np.inf)[0])#,'Waypoints',[-wp 0 wp]));
        else:   
            ints[i] = np.inf
            break
    I = inv_alpha * np.sum(ints)
    I_grad = 0
    return (I, I_grad)
Пример #50
0
def generate_raw_image_pixels(list_of_demonstrations):
	"""
	PCA and t-SNE on raw image pixels
    """

	# Design matrix of raw image pixels
	X = None

	for demonstration in list_of_demonstrations:
		print "Raw image pixels ", demonstration
		PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p"

		start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION)
		for frm in range(start, end + 1):
			if ((frm % 6) == 0):
				PATH_TO_IMAGE = utils.get_full_image_path(constants.PATH_TO_DATA + constants.NEW_FRAMES_FOLDER + demonstration + "_" + constants.CAMERA + "/", frm)
				print demonstration, str(frm)
				img = utils.reshape(cv2.imread(PATH_TO_IMAGE).flatten())
				X = utils.safe_concatenate(X, img)

	X_pca = utils.pca(X, PC = 2)
	X_tsne = utils.tsne(X)
	data_dimred = [X_pca, X_tsne]
	pickle.dump(X_tsne, open("raw_pixel_" + demonstration + "_dimred.p", "wb"))
	def generate_change_points_2(self):
		"""
		Generates changespoints by clustering across demonstrations.
		"""
		cp_index = 0
		i = 0
		big_N = None
		map_index2demonstration = {}
		map_index2frm = {}

		for demonstration in self.list_of_demonstrations:
			print demonstration
			N = self.data_N[demonstration]

			start, end = parser.get_start_end_annotations(constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER
				+ demonstration + "_" + constants.CAMERA + ".p")

			for j in range(N.shape[0]):
				map_index2demonstration[i] = demonstration
				map_index2frm[i] = start + j * self.sr
				i += 1

			big_N = utils.safe_concatenate(big_N, N)

		print "Generating Changepoints. Fitting GMM/DP-GMM ..."

		if constants.REMOTE == 1:
			if self.fit_DPGMM:
				print "Init DPGMM"
				avg_len = int(big_N.shape[0]/len(self.list_of_demonstrations))
				DP_GMM_COMPONENTS = int(avg_len/constants.DPGMM_DIVISOR)
				print "L0", DP_GMM_COMPONENTS, "ALPHA: ", self.ALPHA_CP
				dpgmm = mixture.DPGMM(n_components = DP_GMM_COMPONENTS, covariance_type='diag', n_iter = 10000, alpha = self.ALPHA_CP, thresh= 1e-7)

			if self.fit_GMM:
				print "Init GMM"
				gmm = mixture.GMM(n_components = self.n_components_cp, covariance_type='full', n_iter=5000, thresh = 5e-5)

		if constants.REMOTE == 2:
			gmm = mixture.GMM(n_components = self.n_components_cp, covariance_type='full', thresh = 0.01)

		else:
			gmm = mixture.GMM(n_components = self.n_components_cp, covariance_type='full')

		if self.fit_GMM:
			print "Fitting GMM"
			start = time.time()
			gmm.fit(big_N)
			end = time.time()
			print "GMM Time:", end - start

			Y_gmm = gmm.predict(big_N)
			print "L0: Clusters in GMM", len(set(Y_gmm))
			Y = Y_gmm

		if self.fit_DPGMM:
			print "Fitting DPGMM"
			start = time.time()
			dpgmm.fit(big_N)
			end = time.time()
			print "DPGMM Time:", end - start

			Y_dpgmm = dpgmm.predict(big_N)
			print "L0: Clusters in DP-GMM", len(set(Y_dpgmm))
			Y = Y_dpgmm

		for w in range(len(Y) - 1):

			if Y[w] != Y[w + 1]:
				change_pt = big_N[w][self.X_dimension:]
				self.append_cp_array(utils.reshape(change_pt))
				self.map_cp2frm[cp_index] = map_index2frm[w]
				self.map_cp2demonstrations[cp_index] = map_index2demonstration[w]
				self.list_of_cp.append(cp_index)

				cp_index += 1

		print "Done with generating change points, " + str(cp_index)
Пример #52
0
	def run(self):
		"""Run"""
		klddict = self.compute_kld()
		self.kldc.insert(reshape(klddict))
Пример #53
0
    def start_recording(self):

        print "Recorder Loop"
        while self.left_image is None or self.right_image is None:
            pass

        if self.record_kinematics:
            while 1:
                try:
                    (trans, rot) = self.listener.lookupTransform("/r_gripper_tool_frame", "/base_link", rospy.Time(0))
                    break
                except (tf.ExtrapolationException):
                    print "ExtrapolationException"
                    rospy.sleep(0.1)
                    continue

        frm = 0
        wait_thresh = 0
        prev_r_l = self.r_l
        prev_r_r = self.r_r

        trans_vel = np.array([0.0, 0.0, 0.0])
        rot_vel = np.array([0.0, 0.0, 0.0])

        prev_trans = None
        prev_rot = None

        for i in range(9999999):
            print frm
            rospy.sleep(self.period)

            start = time.time()

            cv2.imwrite(
                self.video_folder
                + self.task_name
                + "_"
                + self.trial_name
                + "_capture1/"
                + str(get_frame_fig_name(frm)),
                self.left_image,
            )
            cv2.imwrite(
                self.video_folder
                + self.task_name
                + "_"
                + self.trial_name
                + "_capture2/"
                + str(get_frame_fig_name(frm)),
                self.right_image,
            )

            if self.record_kinematics:

                (trans, quaternion) = self.listener.lookupTransform(
                    "/r_gripper_tool_frame", "/base_link", rospy.Time(0)
                )
                r_matrix = utils.quaternion2rotation(quaternion)
                rot = transformations.euler_from_matrix(r_matrix)
                r_gripper_angle = self.joint_state.position[-17]

                if frm != 0:
                    trans_vel = (trans - prev_trans) / self.period
                    rot_vel = (rot - prev_rot) / self.period

                prev_trans = np.array(trans)
                prev_rot = np.array(rot)

                js_pos = self.joint_state.position[16:-12]
                js_vel = self.joint_state.velocity[16:-12]

                W = list(trans) + list(r_matrix.flatten()) + list(trans_vel) + list(rot_vel)

                # Gripper angle is r_gripper_joint
                W.append(r_gripper_angle)

                W = W + list(js_pos) + list(js_vel)

                self.data = utils.safe_concatenate(self.data, utils.reshape(np.array(W)))

            frm += 1

            if (self.r_l == prev_r_l) and (self.r_r == prev_r_r):
                print "Not recording anymore?"
                wait_thresh += 1
                if wait_thresh > 5:
                    self.save_and_quit()

            prev_r_l = self.r_l
            prev_r_r = self.r_r

            end = time.time()

            print end - start
Пример #54
0
	def run(self):
		"""Run"""
		nllrdict = self.compute_nllr()
		self.nllrc.insert(reshape(nllrdict))
Пример #55
0
import numpy as np
import pickle

import constants
import utils
import parser

list_of_joint_states = ["plane_3_js.p", "plane_4_js.p", "plane_5_js.p",
		"plane_6_js.p", "plane_7_js.p", "plane_8_js.p", "plane_9_js.p", "plane_10_js.p"]

list_of_trajectories = ["plane_3.p", "plane_4.p", "plane_5.p",
		"plane_6.p", "plane_7.p", "plane_8.p", "plane_9.p", "plane_10.p"]

list_of_annotations = ["plane_3_capture2.p", "plane_4_capture2.p", "plane_5_capture2.p",
		"plane_6_capture2.p", "plane_7_capture2.p", "plane_8_capture2.p", "plane_9_capture2.p", "plane_10_capture2.p"]

for i in range(len(list_of_annotations)):
	print list_of_annotations[i], list_of_joint_states[i], list_of_trajectories[i]
	start, end = utils.get_start_end_annotations(constants.PATH_TO_DATA + "annotations/" + list_of_annotations[i])
	X = None
	trajectory = pickle.load(open(constants.PATH_TO_KINEMATICS + list_of_joint_states[i], "rb"))
	for frm in range(start, end + 1):
		traj_point = trajectory[frm]
		print traj_point.velocity[16:-12]
		vector = list(traj_point.position[16:-12]) + list(traj_point.velocity[16:-12])
		X = utils.safe_concatenate(X, utils.reshape(np.array(vector)))
	# pickle.dump(X, open(constants.PATH_TO_KINEMATICS + list_of_trajectories[i],"wb"))