Exemplo n.º 1
0
def createMiniDatasetMulti(train_size,test_size,t_affordances=[0,1,2,3,4],positives_file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',info_file='AffordancesDataset_augmented_names.txt'):
	# sample traing_size random for each class
	# check repeated
	the_affordances=np.expand_dims(np.asarray(t_affordances),0)
	names=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':')
	names=names[:,1]
	aff_initials=sorted(list(set([x[0] for x in names])))
	actual_initials=[]
	positive_data,positive_labels=load_h5(positives_file)
	negative_data,negative_labels=load_h5(negatives_file)
	for i in range(1,the_affordances.size):
		id_=the_affordances[0,i]
		thisIds=np.nonzero(positive_labels[:,id_])[0]
		print(thisIds.size)
		#select train and test
		np.random.shuffle(thisIds)
		train_ids=thisIds[:train_size]
		test_ids=thisIds[train_size:train_size+test_size]
		if i>1:
			#check for repeated
			new_=np.setdiff1d(train_ids,all_train_ids)
			all_train_ids=np.concatenate((all_train_ids,new_),axis=0)
			new_=np.setdiff1d(test_ids,all_test_ids)
			all_test_ids=np.concatenate((all_test_ids,new_),axis=0)
		else:
			all_train_ids=train_ids
			all_test_ids=test_ids
		actual_initials.append(aff_initials[id_-1])
	negative_ids_train=np.arange(train_size)
	negative_ids_test=np.arange(train_size,train_size+test_size)
	negative_labels_train=np.zeros((train_size,the_affordances.size))
	negative_labels_train[:,0]=1
	negative_labels_test=np.zeros((test_size,the_affordances.size))
	negative_labels_test[:,0]=1
	all_train_ids=all_train_ids.reshape(-1,1)
	all_test_ids=all_test_ids.reshape(-1,1)
	#print(all_train_ids.shape)

	train_data=np.concatenate((positive_data[all_train_ids.squeeze(),...],negative_data[negative_ids_train,...]),axis=0)
	train_labels=np.concatenate((positive_labels[all_train_ids,the_affordances],negative_labels_train),axis=0)
	#train_ids=np.arange(train_data.shape[0])
	#np.random.shuffle(train_ids)
	test_data=np.concatenate((positive_data[all_test_ids.squeeze(),...],negative_data[negative_ids_test,...]),axis=0)
	test_labels=np.concatenate((positive_labels[all_test_ids,the_affordances],negative_labels_test),axis=0)

	name='mini3_AffordancesDataset_train_'+''.join(actual_initials)+'_'+str(train_size)+'.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,train_data,train_labels,'float32','uint8')
	name='mini3_AffordancesDataset_test_'+''.join(actual_initials)+'_'+str(train_size)+'.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,test_data,test_labels,'float32','uint8')
	return train_data,train_labels,test_data,test_labels
Exemplo n.º 2
0
def split_data(list_of_files, train_size=0.8):
    for i in range(len(list_of_files)):
        tmp_data, tmp_labels = load_h5(list_of_files[i])
        if i > 0:

            data = np.concatenate((data, tmp_data), axis=0)
            labels = np.concatenate((labels, tmp_labels), axis=0)
        else:
            data = tmp_data
            labels = tmp_labels
        print(tmp_data.shape)

    print('All data %d' % (data.shape[0]))
    all_ids = np.arange(data.shape[0])
    np.random.shuffle(all_ids)
    train_ids_size = int(all_ids.size * train_size)
    print(train_ids_size)
    train_ids = all_ids[:train_ids_size]
    new_train_data = data[train_ids, ...]
    new_train_labels = labels[train_ids, ...]
    test_ids = all_ids[train_ids_size:]
    new_test_data = data[test_ids, ...]
    new_test_labels = labels[test_ids, ...]

    print('Train data %d' % new_train_labels.shape[0])
    print('Test data %d' % new_test_labels.shape[0])
    save_h5('MultilabelDataSet_splitTrain4.h5', new_train_data,
            new_train_labels, 'float32', 'uint8')
    save_h5('MultilabelDataSet_splitTest4.h5', new_test_data, new_test_labels,
            'float32', 'uint8')
    np.save('MultilabelDataSet_splitTest4.npy', test_ids)
Exemplo n.º 3
0
def extractSubset(dataSet, new_size=0.5):
    tmp_data, tmp_labels = load_h5(dataSet)
    print(tmp_labels.shape[0])
    all_ids = np.arange(tmp_labels.shape[0])
    newSize = int(all_ids.size * new_size)
    print('New %d' % newSize)
    np.random.shuffle(all_ids)
    newData = tmp_data[all_ids[:newSize], ...]
    newLabels = tmp_labels[all_ids[:newSize], ...]
    print('New Data size %d %d' % (newData.shape[0], newLabels.shape[0]))
    return newData, newLabels
Exemplo n.º 4
0
def getDataset(file):
	#split dataset into smaller batches/files
	all_data,all_labels=load_h5(file)
	#shuffle them to add 'randomness'
	all_ids=np.arange(all_data.shape[0])
	np.random.shuffle(all_ids)
	all_data=all_data[all_ids,...]
	all_labels=all_labels[all_ids]
	print(all_data.shape)
	print(all_labels.shape)
	n_splits=all_labels.shape[0]/(496*4)
	print(n_splits)
	for i in range(n_splits):
		name='AffordancesDataset_file'+str(i)+'.h5'
		start_id=i*(496*4)
		end_id=(i+1)*(496*4)
		toSaveData=all_data[start_id:end_id,...]
		toSaveLabels=all_labels[start_id:end_id]
		print('%s %d %d'%(name,start_id,end_id))
		if os.path.exists(name):
			os.system('rm %s' % (name))
		save_h5(name,toSaveData,toSaveLabels,'float32','uint8')
Exemplo n.º 5
0
 #print(tmp_ids.size)
 #ids_presented=ids_presented[tmp_ids]
 #get the original data
 original_data_points_file = NEW_DATA_DIR + 'dataPoints_' + str(
     interaction) + '.h5'
 original_data_clouds_file = NEW_DATA_DIR + 'dataClouds_' + str(
     interaction) + '.h5'
 input_clouds_file = INPUT_DATA_DIR + 'binaryOc_AffordancesDataset_test' + str(
     interaction) + '_' + str(traininig_examples) + '.h5'
 data_presented_original_ids_file = NEW_DATA_DIR + 'binaryOc_AffordancesDataset_test' + str(
     interaction) + '_' + str(traininig_examples) + '_shuffledIds.npy'
 #print(data_presented_original_ids_file)
 #this goes 0-1023
 #_,original_ids=load_h5(data_presented_original_ids_file)
 original_ids = np.load(data_presented_original_ids_file)
 input_clouds, _ = load_h5(input_clouds_file)
 original_points, _ = load_h5(original_data_points_file)
 #original_clouds,_=load_h5(original_data_clouds_file)
 #find indices of corresponding clouds
 #ids=np.nonzero(original_ids>511)[0]
 #for j in range(ids_presented.shape[0]):
 #print(original_ids[:10])
 #print(ids_presented[:10])
 for j in range(tmp_ids.size):
     anId = tmp_ids[j]
     #affordance 'positive' examples are the last 512 in the dataset
     real_id = original_ids[anId] - 512
     pointcloud_id = real_id
     if pointcloud_id != 511:
         continue
     one_original_cloud = input_clouds[anId, ...]
Exemplo n.º 6
0
def createMiniDatasets(train_size,test_size,positives_file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',info_file='AffordancesDataset_augmented_names.txt',target_affordance='Filling'):
	# This function creates binary datasets for every affordance in the csv file
	# train_size and test_size are per class
	positive_data,_=load_h5(positives_file)
	print(positive_data.shape)
	negative_data,negative_labels=load_h5(negatives_file)
	if train_size>negative_data.shape[0] or test_size>negative_data.shape[0]:
		print('Number of examples exceeded')
		sys.exit()
	info=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':')
	real_ids=np.array([int(x) for x in info[:,0]])
	bar = Bar('Processing', max=real_ids.shape[0])
	# if need all binary datasets, make target_affordance an empty string
	#target_affordance=''
	count=1
	if target_affordance:
		print('Getting data for %s'%(target_affordance))
	else:
		print('Getting all data ')
	data_train=np.array([],dtype=np.float32).reshape(0,n_points,3)
	data_test=np.array([],dtype=np.float32).reshape(0,n_points,3)
	labels_train=np.array([],dtype=np.uint8).reshape(0,1)
	labels_test=np.array([],dtype=np.uint8).reshape(0,1)
	for j in range(real_ids.shape[0]):
		current_aff=info[j,1]
		if target_affordance:
			if target_affordance not in current_aff:
				continue
		# this file is supposed to have 128 examples per affordance x 8 orientations
		start_i=j*(128*8)
		end_i=(j+1)*(128*8)
		thisAffordance_data=positive_data[start_i:end_i,...]
		train_ids=np.random.randint(thisAffordance_data.shape[0],size=train_size)
		test_ids=np.setdiff1d(np.arange(thisAffordance_data.shape[0]),train_ids)

		test_ids=test_ids[:test_size]		

		#save training data
		sample_negative=np.arange(negative_data.shape[0])
		np.random.shuffle(sample_negative)
		data=np.concatenate((thisAffordance_data[train_ids,...],negative_data[sample_negative[:train_size],...]),axis=0)
		labels=np.concatenate((np.ones((train_size,1)),np.zeros((train_size,1))),axis=0)
		if target_affordance:
			#concat tmp data with training data
			data_train=np.concatenate((data,data_train),axis=0)
			labels_train=np.concatenate((count*labels,labels_train),axis=0)
		else:
			data_train=data
			labels_train=labels
		#shuffle the data
		shuffle_ids=np.arange(labels_train.shape[0])
		np.random.shuffle(shuffle_ids)
		data_train=data_train[shuffle_ids,...]
		labels_train=labels_train[shuffle_ids]
		if not target_affordance:
			name='binary_AffordancesDataset_train'+str(j)+'_'+str(train_size)+'.h5'
			if os.path.exists(name):
				os.system('rm %s'%(name))
			save_h5(name,data_train,labels_train,'float32','uint8')


		# save test data
		data=np.concatenate((thisAffordance_data[test_ids,...],negative_data[sample_negative[train_size:train_size+test_size],...]),axis=0)
		#print(thisAffordance_data[test_ids,...].shape[0])
		labels=np.concatenate((np.ones((test_size,1)),np.zeros((test_size,1))),axis=0)
		if target_affordance:
			data_test=np.concatenate((data,data_test),axis=0)
			labels_test=np.concatenate((count*labels,labels_test),axis=0)
			#count+=1
		else:
			data_test=data
			labels_test=labels
		shuffle_ids=np.arange(labels_test.shape[0])
		np.random.shuffle(shuffle_ids)
		data_test=data_test[shuffle_ids,...]
		labels_test=labels_test[shuffle_ids]
		if not target_affordance:
			name='binary_AffordancesDataset_test'+str(j)+'_'+str(train_size)+'.h5'
			if os.path.exists(name):
				os.system('rm %s'%(name))
			save_h5(name,data_test,labels_test,'float32','uint8')
		bar.next()
	bar.finish()
	if target_affordance:
		print('Saving test data for %s '%(target_affordance))
		# before saving, remove unbalance in negatives
		# since there will be X (affordances) times more negatives
		'''ids_to_remove=np.nonzero(labels_test==0)[0]
		ids_to_remove=ids_to_remove[test_size:]
		ids_to_keep=np.setdiff1d(np.arange(labels_test.shape[0]),ids_to_remove)
		data_test=data_test[ids_to_keep,...]
		labels_test=labels_test[ids_to_keep]'''

		#Same for positives
		print(data_test.shape)
		print(labels_test.shape)
		name='miniAffordancesDataset_test_'+target_affordance+'_'+str(train_size)+'.h5'
		if os.path.exists(name):
			os.system('rm %s'%(name))
		save_h5(name,data_test,labels_test,'float32','uint8')
		name='miniAffordancesDataset_train_'+target_affordance+'_'+str(train_size)+'.h5'
		print('Saving train data for %s '%(target_affordance))
		'''ids_to_remove=np.nonzero(labels_train==0)[0]
		ids_to_remove=ids_to_remove[train_size:]
		ids_to_keep=np.setdiff1d(np.arange(labels_train.shape[0]),ids_to_remove)
		data_train=data_train[ids_to_keep,...]
		labels_train=labels_train[ids_to_keep]'''
		print(data_train.shape)
		print(labels_train.shape)
		if os.path.exists(name):
			os.system('rm %s'%(name))
		save_h5(name,data_train,labels_train,'float32','uint8')
Exemplo n.º 7
0
def getMiniDataset(class_ids,train_size,test_size,file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',return_data=False,info_file='AffordancesDataset_augmented_names.txt'):
	#if return data is true then no data is saved
	# and data/labels are returned to caller
	
	names=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':')
	#print(names)
	real_ids=np.array([int(x) for x in names[:,0]])
	#print(real_ids)
	all_data,all_labels=load_h5(file)
	#print(np.unique(all_labels))
	if (test_size+train_size)>all_labels.shape[0]:
		print('Max data size is '%all_labels.shape[0])
		sys.exit()
	if test_size<0:
		test_size=all_labels.shape[0]-train_size

	#print(all_data.shape)
	train_ids=np.zeros((class_ids.shape[0]*train_size,1),dtype=np.int32)
	test_ids=np.zeros((class_ids.shape[0]*test_size,1),dtype=np.int32)
	#some_ids_new=np.zeros((class_ids.shape[0],1),dtype=np.uint8)
	new_labels_train=np.zeros((class_ids.shape[0]*train_size,1),dtype=np.uint8)
	new_labels_test=np.zeros((class_ids.shape[0]*test_size,1),dtype=np.uint8)
	aff_initial=[]
	for i in range(class_ids.shape[0]):
		ids=np.nonzero(all_labels==class_ids[i])[0]
		#print(all_labels[ids])
		#take 32 from each class to test
		test=np.arange(ids.shape[0],dtype=np.int32)
		np.random.shuffle(test)
		start_id=i*train_size
		end_id=(i+1)*train_size
		train_ids[start_id:end_id,0]=ids[test[:train_size]]
		new_labels_train[start_id:end_id,0]=i+1
		start_id=i*test_size
		end_id=(i+1)*test_size
		test_ids[start_id:end_id,0]=ids[test[train_size:train_size+test_size]]
		new_labels_test[start_id:end_id,0]=i+1
		aff_initial.append(names[class_ids[i],1][0])
	print(aff_initial)
	#print(ids_train)
	#train_ids=np.asarray(ids_train,dtype=np.uint8).reshape(-1,1)
	train_ids=np.squeeze(train_ids)
	test_ids=np.squeeze(test_ids)
	#print(train_ids.T)
	#print(test_ids.T)
	#sys.exit()
	#test_ids=np.squeeze(np.asarray(ids_test,dtype=np.uint8).reshape(-1,1))
	print('Training set %d'%train_ids.shape[0])
	print('Testing set %d'%test_ids.shape[0])
	new_data_train=all_data[train_ids,...]
	new_data_test=all_data[test_ids,...]
	#concatenate here the negatives
	negative_data,negative_labels=load_h5(negatives_file)
	new_data_train=np.concatenate((new_data_train,negative_data[:train_size]),axis=0)
	new_labels_train=np.concatenate((new_labels_train,np.zeros((train_size,1))),axis=0)

	train_shuffle=np.arange(new_data_train.shape[0])
	np.random.shuffle(train_shuffle)
	new_data_train=new_data_train[train_shuffle,...]
	new_labels_train=new_labels_train[train_shuffle]


	
	name='mini_AffordancesDataset_train_'+''.join(aff_initial)+'_'+str(train_size)+'.h5'
	if not return_data:
		if os.path.exists(name):
				os.system('rm %s' % (name))
		save_h5(name,new_data_train,new_labels_train,'float32','uint8')

	new_data_test=np.concatenate((new_data_test,negative_data[train_size:train_size+test_size]),axis=0)
	new_labels_test=np.concatenate((new_labels_test,np.zeros((test_size,1))),axis=0)

	train_shuffle=np.arange(new_data_test.shape[0])
	np.random.shuffle(train_shuffle)
	new_data_test=new_data_test[train_shuffle,...]
	new_labels_test=new_labels_test[train_shuffle]

	print('Training data ')
	print(new_data_train.shape)
	print(new_labels_train)
	print('Test data ')
	print(new_data_test.shape)
	print(new_labels_test.shape)

	name='mini_AffordancesDataset_test_'+''.join(aff_initial)+'_'+str(train_size)+'.h5'
	if not return_data:
		if os.path.exists(name):
				os.system('rm %s' % (name))
		save_h5(name,new_data_test,new_labels_test,'float32','uint8')
		# save the original class ids to keep track of the affordances involved in this dataset
		name='mini_AffordancesDataset_names_'+''.join(aff_initial)+'_'+str(train_size)+'.txt'
		with open(name, "w") as text_file:
			for i in range(class_ids.shape[0]):
				print('%d:%s' % (i+1,names[class_ids[i],1]))
				text_file.write("%d:%s\n" % (i+1,names[class_ids[i],1]))
	else:
		for i in range(class_ids.shape[0]):
			print('%d:%s' % (i+1,names[class_ids[i],1]))


	'''fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.hold(False)
	for i in range(new_labels_test.shape[0]):
		ax.scatter(new_data_test[i,:,0],new_data_test[i,:,1],new_data_test[i,:,2],s=10)
		#print(names[class_ids[new_labels_test[i,0]],1])
		ax.set_title(names[class_ids[new_labels_test[i,0]],1]+' '+str(new_labels_test[i,0]))
		plt.pause(5)
		plt.draw()'''
	if return_data:
		return new_data_train,new_labels_train,new_data_test,new_labels_test
	else:
		return 0,0,0,0
Exemplo n.º 8
0
def extractSingleLabeledData(data_file):
    data, label = load_h5(data_file)
    print(label.shape)
    train_examples = 512
    test_examples = 128
    examples = train_examples + test_examples
    print(examples * label.shape[1], data.shape[1], 3)
    new_data_train = np.zeros(
        (train_examples * label.shape[1], data.shape[1], 3), dtype=np.float32)
    new_labels_train = np.zeros((train_examples * label.shape[1], 1),
                                dtype=np.int32)

    new_data_test = np.zeros(
        (test_examples * label.shape[1], data.shape[1], 3), dtype=np.float32)
    new_labels_test = np.zeros((test_examples * label.shape[1], 1),
                               dtype=np.int32)

    #for every affordance
    st = 0
    st2 = 0
    for i in range(label.shape[1]):
        #get the pointclouds of this affordance
        target_indices = np.nonzero(label[:, i])[0]
        #print('Aff %d %d'%(i,target_indices.size))
        to_sample_from = np.arange(target_indices.size)
        np.random.shuffle(to_sample_from)
        if to_sample_from.size < (train_examples + test_examples):
            real_train_examples = int(to_sample_from.size * .8 // 1)
            #print(real_train_examples)
            real_test_examples = to_sample_from.size - real_train_examples
            print('Less data from %d,%d' %
                  (real_train_examples, real_test_examples))
        else:
            real_train_examples = train_examples
            real_test_examples = test_examples

        ed = st + real_train_examples
        ed2 = st2 + real_test_examples

        real_sample = target_indices[to_sample_from[:real_train_examples]]
        real_sample_test = target_indices[
            to_sample_from[real_train_examples:real_train_examples +
                           real_test_examples]]

        new_data_train[st:ed, ...] = data[real_sample, ...]
        new_labels_train[st:ed, ...] = i

        new_data_test[st2:ed2, ...] = data[real_sample_test, ...]
        new_labels_test[st2:ed2, ...] = i

        st = ed
        st2 = ed2
    # get the real data in case some affordances had less examples than the target
    new_data_train = new_data_train[:ed, ...]
    new_labels_train = new_labels_train[:ed, ...]
    new_data_test = new_data_test[:ed2, ...]
    new_labels_test = new_labels_test[:ed2, ...]

    #shuffle things
    ids = np.arange(new_labels_train.shape[0])
    np.random.shuffle(ids)
    new_data_train = new_data_train[ids, ...]
    new_labels_train = new_labels_train[ids, ...]

    ids = np.arange(new_labels_test.shape[0])
    np.random.shuffle(ids)
    new_data_test = new_data_test[ids, ...]
    new_labels_test = new_labels_test[ids, ...]

    print('New binary train data %d' % new_labels_train.shape[0])
    print('New binary test data %d' % new_labels_test.shape[0])
    name = 'SinglelabelDataSet_train_' + data_file.split('.')[0].split(
        '_')[-1] + '.h5'
    print(name)
    save_h5(name, new_data_train, new_labels_train, 'float32', 'uint8')
    name = 'SinglelabelDataSet_test_' + data_file.split('.')[0].split(
        '_')[-1] + '.h5'
    print(name)
    save_h5(name, new_data_test, new_labels_test, 'float32', 'uint8')