Exemplo n.º 1
0
def split_data(list_of_files, train_size=0.8):
    for i in range(len(list_of_files)):
        tmp_data, tmp_labels = load_h5(list_of_files[i])
        if i > 0:

            data = np.concatenate((data, tmp_data), axis=0)
            labels = np.concatenate((labels, tmp_labels), axis=0)
        else:
            data = tmp_data
            labels = tmp_labels
        print(tmp_data.shape)

    print('All data %d' % (data.shape[0]))
    all_ids = np.arange(data.shape[0])
    np.random.shuffle(all_ids)
    train_ids_size = int(all_ids.size * train_size)
    print(train_ids_size)
    train_ids = all_ids[:train_ids_size]
    new_train_data = data[train_ids, ...]
    new_train_labels = labels[train_ids, ...]
    test_ids = all_ids[train_ids_size:]
    new_test_data = data[test_ids, ...]
    new_test_labels = labels[test_ids, ...]

    print('Train data %d' % new_train_labels.shape[0])
    print('Test data %d' % new_test_labels.shape[0])
    save_h5('MultilabelDataSet_splitTrain4.h5', new_train_data,
            new_train_labels, 'float32', 'uint8')
    save_h5('MultilabelDataSet_splitTest4.h5', new_test_data, new_test_labels,
            'float32', 'uint8')
    np.save('MultilabelDataSet_splitTest4.npy', test_ids)
Exemplo n.º 2
0
def createMiniDatasetMulti(train_size,test_size,t_affordances=[0,1,2,3,4],positives_file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',info_file='AffordancesDataset_augmented_names.txt'):
	# sample traing_size random for each class
	# check repeated
	the_affordances=np.expand_dims(np.asarray(t_affordances),0)
	names=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':')
	names=names[:,1]
	aff_initials=sorted(list(set([x[0] for x in names])))
	actual_initials=[]
	positive_data,positive_labels=load_h5(positives_file)
	negative_data,negative_labels=load_h5(negatives_file)
	for i in range(1,the_affordances.size):
		id_=the_affordances[0,i]
		thisIds=np.nonzero(positive_labels[:,id_])[0]
		print(thisIds.size)
		#select train and test
		np.random.shuffle(thisIds)
		train_ids=thisIds[:train_size]
		test_ids=thisIds[train_size:train_size+test_size]
		if i>1:
			#check for repeated
			new_=np.setdiff1d(train_ids,all_train_ids)
			all_train_ids=np.concatenate((all_train_ids,new_),axis=0)
			new_=np.setdiff1d(test_ids,all_test_ids)
			all_test_ids=np.concatenate((all_test_ids,new_),axis=0)
		else:
			all_train_ids=train_ids
			all_test_ids=test_ids
		actual_initials.append(aff_initials[id_-1])
	negative_ids_train=np.arange(train_size)
	negative_ids_test=np.arange(train_size,train_size+test_size)
	negative_labels_train=np.zeros((train_size,the_affordances.size))
	negative_labels_train[:,0]=1
	negative_labels_test=np.zeros((test_size,the_affordances.size))
	negative_labels_test[:,0]=1
	all_train_ids=all_train_ids.reshape(-1,1)
	all_test_ids=all_test_ids.reshape(-1,1)
	#print(all_train_ids.shape)

	train_data=np.concatenate((positive_data[all_train_ids.squeeze(),...],negative_data[negative_ids_train,...]),axis=0)
	train_labels=np.concatenate((positive_labels[all_train_ids,the_affordances],negative_labels_train),axis=0)
	#train_ids=np.arange(train_data.shape[0])
	#np.random.shuffle(train_ids)
	test_data=np.concatenate((positive_data[all_test_ids.squeeze(),...],negative_data[negative_ids_test,...]),axis=0)
	test_labels=np.concatenate((positive_labels[all_test_ids,the_affordances],negative_labels_test),axis=0)

	name='mini3_AffordancesDataset_train_'+''.join(actual_initials)+'_'+str(train_size)+'.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,train_data,train_labels,'float32','uint8')
	name='mini3_AffordancesDataset_test_'+''.join(actual_initials)+'_'+str(train_size)+'.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,test_data,test_labels,'float32','uint8')
	return train_data,train_labels,test_data,test_labels
Exemplo n.º 3
0
def getDataset(file):
	#split dataset into smaller batches/files
	all_data,all_labels=load_h5(file)
	#shuffle them to add 'randomness'
	all_ids=np.arange(all_data.shape[0])
	np.random.shuffle(all_ids)
	all_data=all_data[all_ids,...]
	all_labels=all_labels[all_ids]
	print(all_data.shape)
	print(all_labels.shape)
	n_splits=all_labels.shape[0]/(496*4)
	print(n_splits)
	for i in range(n_splits):
		name='AffordancesDataset_file'+str(i)+'.h5'
		start_id=i*(496*4)
		end_id=(i+1)*(496*4)
		toSaveData=all_data[start_id:end_id,...]
		toSaveLabels=all_labels[start_id:end_id]
		print('%s %d %d'%(name,start_id,end_id))
		if os.path.exists(name):
			os.system('rm %s' % (name))
		save_h5(name,toSaveData,toSaveLabels,'float32','uint8')
Exemplo n.º 4
0
def createMiniDatasets(train_size,test_size,positives_file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',info_file='AffordancesDataset_augmented_names.txt',target_affordance='Filling'):
	# This function creates binary datasets for every affordance in the csv file
	# train_size and test_size are per class
	positive_data,_=load_h5(positives_file)
	print(positive_data.shape)
	negative_data,negative_labels=load_h5(negatives_file)
	if train_size>negative_data.shape[0] or test_size>negative_data.shape[0]:
		print('Number of examples exceeded')
		sys.exit()
	info=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':')
	real_ids=np.array([int(x) for x in info[:,0]])
	bar = Bar('Processing', max=real_ids.shape[0])
	# if need all binary datasets, make target_affordance an empty string
	#target_affordance=''
	count=1
	if target_affordance:
		print('Getting data for %s'%(target_affordance))
	else:
		print('Getting all data ')
	data_train=np.array([],dtype=np.float32).reshape(0,n_points,3)
	data_test=np.array([],dtype=np.float32).reshape(0,n_points,3)
	labels_train=np.array([],dtype=np.uint8).reshape(0,1)
	labels_test=np.array([],dtype=np.uint8).reshape(0,1)
	for j in range(real_ids.shape[0]):
		current_aff=info[j,1]
		if target_affordance:
			if target_affordance not in current_aff:
				continue
		# this file is supposed to have 128 examples per affordance x 8 orientations
		start_i=j*(128*8)
		end_i=(j+1)*(128*8)
		thisAffordance_data=positive_data[start_i:end_i,...]
		train_ids=np.random.randint(thisAffordance_data.shape[0],size=train_size)
		test_ids=np.setdiff1d(np.arange(thisAffordance_data.shape[0]),train_ids)

		test_ids=test_ids[:test_size]		

		#save training data
		sample_negative=np.arange(negative_data.shape[0])
		np.random.shuffle(sample_negative)
		data=np.concatenate((thisAffordance_data[train_ids,...],negative_data[sample_negative[:train_size],...]),axis=0)
		labels=np.concatenate((np.ones((train_size,1)),np.zeros((train_size,1))),axis=0)
		if target_affordance:
			#concat tmp data with training data
			data_train=np.concatenate((data,data_train),axis=0)
			labels_train=np.concatenate((count*labels,labels_train),axis=0)
		else:
			data_train=data
			labels_train=labels
		#shuffle the data
		shuffle_ids=np.arange(labels_train.shape[0])
		np.random.shuffle(shuffle_ids)
		data_train=data_train[shuffle_ids,...]
		labels_train=labels_train[shuffle_ids]
		if not target_affordance:
			name='binary_AffordancesDataset_train'+str(j)+'_'+str(train_size)+'.h5'
			if os.path.exists(name):
				os.system('rm %s'%(name))
			save_h5(name,data_train,labels_train,'float32','uint8')


		# save test data
		data=np.concatenate((thisAffordance_data[test_ids,...],negative_data[sample_negative[train_size:train_size+test_size],...]),axis=0)
		#print(thisAffordance_data[test_ids,...].shape[0])
		labels=np.concatenate((np.ones((test_size,1)),np.zeros((test_size,1))),axis=0)
		if target_affordance:
			data_test=np.concatenate((data,data_test),axis=0)
			labels_test=np.concatenate((count*labels,labels_test),axis=0)
			#count+=1
		else:
			data_test=data
			labels_test=labels
		shuffle_ids=np.arange(labels_test.shape[0])
		np.random.shuffle(shuffle_ids)
		data_test=data_test[shuffle_ids,...]
		labels_test=labels_test[shuffle_ids]
		if not target_affordance:
			name='binary_AffordancesDataset_test'+str(j)+'_'+str(train_size)+'.h5'
			if os.path.exists(name):
				os.system('rm %s'%(name))
			save_h5(name,data_test,labels_test,'float32','uint8')
		bar.next()
	bar.finish()
	if target_affordance:
		print('Saving test data for %s '%(target_affordance))
		# before saving, remove unbalance in negatives
		# since there will be X (affordances) times more negatives
		'''ids_to_remove=np.nonzero(labels_test==0)[0]
		ids_to_remove=ids_to_remove[test_size:]
		ids_to_keep=np.setdiff1d(np.arange(labels_test.shape[0]),ids_to_remove)
		data_test=data_test[ids_to_keep,...]
		labels_test=labels_test[ids_to_keep]'''

		#Same for positives
		print(data_test.shape)
		print(labels_test.shape)
		name='miniAffordancesDataset_test_'+target_affordance+'_'+str(train_size)+'.h5'
		if os.path.exists(name):
			os.system('rm %s'%(name))
		save_h5(name,data_test,labels_test,'float32','uint8')
		name='miniAffordancesDataset_train_'+target_affordance+'_'+str(train_size)+'.h5'
		print('Saving train data for %s '%(target_affordance))
		'''ids_to_remove=np.nonzero(labels_train==0)[0]
		ids_to_remove=ids_to_remove[train_size:]
		ids_to_keep=np.setdiff1d(np.arange(labels_train.shape[0]),ids_to_remove)
		data_train=data_train[ids_to_keep,...]
		labels_train=labels_train[ids_to_keep]'''
		print(data_train.shape)
		print(labels_train.shape)
		if os.path.exists(name):
			os.system('rm %s'%(name))
		save_h5(name,data_train,labels_train,'float32','uint8')
Exemplo n.º 5
0
def getMiniDataset(class_ids,train_size,test_size,file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',return_data=False,info_file='AffordancesDataset_augmented_names.txt'):
	#if return data is true then no data is saved
	# and data/labels are returned to caller
	
	names=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':')
	#print(names)
	real_ids=np.array([int(x) for x in names[:,0]])
	#print(real_ids)
	all_data,all_labels=load_h5(file)
	#print(np.unique(all_labels))
	if (test_size+train_size)>all_labels.shape[0]:
		print('Max data size is '%all_labels.shape[0])
		sys.exit()
	if test_size<0:
		test_size=all_labels.shape[0]-train_size

	#print(all_data.shape)
	train_ids=np.zeros((class_ids.shape[0]*train_size,1),dtype=np.int32)
	test_ids=np.zeros((class_ids.shape[0]*test_size,1),dtype=np.int32)
	#some_ids_new=np.zeros((class_ids.shape[0],1),dtype=np.uint8)
	new_labels_train=np.zeros((class_ids.shape[0]*train_size,1),dtype=np.uint8)
	new_labels_test=np.zeros((class_ids.shape[0]*test_size,1),dtype=np.uint8)
	aff_initial=[]
	for i in range(class_ids.shape[0]):
		ids=np.nonzero(all_labels==class_ids[i])[0]
		#print(all_labels[ids])
		#take 32 from each class to test
		test=np.arange(ids.shape[0],dtype=np.int32)
		np.random.shuffle(test)
		start_id=i*train_size
		end_id=(i+1)*train_size
		train_ids[start_id:end_id,0]=ids[test[:train_size]]
		new_labels_train[start_id:end_id,0]=i+1
		start_id=i*test_size
		end_id=(i+1)*test_size
		test_ids[start_id:end_id,0]=ids[test[train_size:train_size+test_size]]
		new_labels_test[start_id:end_id,0]=i+1
		aff_initial.append(names[class_ids[i],1][0])
	print(aff_initial)
	#print(ids_train)
	#train_ids=np.asarray(ids_train,dtype=np.uint8).reshape(-1,1)
	train_ids=np.squeeze(train_ids)
	test_ids=np.squeeze(test_ids)
	#print(train_ids.T)
	#print(test_ids.T)
	#sys.exit()
	#test_ids=np.squeeze(np.asarray(ids_test,dtype=np.uint8).reshape(-1,1))
	print('Training set %d'%train_ids.shape[0])
	print('Testing set %d'%test_ids.shape[0])
	new_data_train=all_data[train_ids,...]
	new_data_test=all_data[test_ids,...]
	#concatenate here the negatives
	negative_data,negative_labels=load_h5(negatives_file)
	new_data_train=np.concatenate((new_data_train,negative_data[:train_size]),axis=0)
	new_labels_train=np.concatenate((new_labels_train,np.zeros((train_size,1))),axis=0)

	train_shuffle=np.arange(new_data_train.shape[0])
	np.random.shuffle(train_shuffle)
	new_data_train=new_data_train[train_shuffle,...]
	new_labels_train=new_labels_train[train_shuffle]


	
	name='mini_AffordancesDataset_train_'+''.join(aff_initial)+'_'+str(train_size)+'.h5'
	if not return_data:
		if os.path.exists(name):
				os.system('rm %s' % (name))
		save_h5(name,new_data_train,new_labels_train,'float32','uint8')

	new_data_test=np.concatenate((new_data_test,negative_data[train_size:train_size+test_size]),axis=0)
	new_labels_test=np.concatenate((new_labels_test,np.zeros((test_size,1))),axis=0)

	train_shuffle=np.arange(new_data_test.shape[0])
	np.random.shuffle(train_shuffle)
	new_data_test=new_data_test[train_shuffle,...]
	new_labels_test=new_labels_test[train_shuffle]

	print('Training data ')
	print(new_data_train.shape)
	print(new_labels_train)
	print('Test data ')
	print(new_data_test.shape)
	print(new_labels_test.shape)

	name='mini_AffordancesDataset_test_'+''.join(aff_initial)+'_'+str(train_size)+'.h5'
	if not return_data:
		if os.path.exists(name):
				os.system('rm %s' % (name))
		save_h5(name,new_data_test,new_labels_test,'float32','uint8')
		# save the original class ids to keep track of the affordances involved in this dataset
		name='mini_AffordancesDataset_names_'+''.join(aff_initial)+'_'+str(train_size)+'.txt'
		with open(name, "w") as text_file:
			for i in range(class_ids.shape[0]):
				print('%d:%s' % (i+1,names[class_ids[i],1]))
				text_file.write("%d:%s\n" % (i+1,names[class_ids[i],1]))
	else:
		for i in range(class_ids.shape[0]):
			print('%d:%s' % (i+1,names[class_ids[i],1]))


	'''fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.hold(False)
	for i in range(new_labels_test.shape[0]):
		ax.scatter(new_data_test[i,:,0],new_data_test[i,:,1],new_data_test[i,:,2],s=10)
		#print(names[class_ids[new_labels_test[i,0]],1])
		ax.set_title(names[class_ids[new_labels_test[i,0]],1]+' '+str(new_labels_test[i,0]))
		plt.pause(5)
		plt.draw()'''
	if return_data:
		return new_data_train,new_labels_train,new_data_test,new_labels_test
	else:
		return 0,0,0,0
Exemplo n.º 6
0
def getMultiAffordanceData(file):
	path=os.path.abspath(file)
	pos=path.rfind('/')
	tokens=path[pos+1:].split('_')
	descriptor_id=tokens[6]
	scene_name=tokens[2]
	scene_name=path[:pos]+'/'+scene_name+'_d.pcd'
	file_descriptor=path[:pos]+'/tmp'+descriptor_id+'.csv'
	labels=np.genfromtxt(file_descriptor,dtype='str',skip_header=1,delimiter=',')
	print('Affordances in descriptor %d'%labels.shape[0])
	fileId=tokens[-1]
	tokens=fileId.split('.')
	fileId=tokens[0]
	print(fileId)
	res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
	res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

	data=load_pcd_data(res_data_file,cols=None)
	#print(data.shape)
	points=load_pcd_data(res_points_file,cols=(0,1,2))
	real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
	#real_c_data=np.array(colors[:,-1],dtype=np.int32)
	red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

	real_c_data=np.concatenate((red,green,blue),axis=1)

	perPoint=np.sum(real_c_data,axis=1)
	bounds=np.cumsum(perPoint)


	howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
	for i in range(labels.shape[0]):
		success=np.nonzero(data[:,0]==i)[0]
		howMany[i]=success.size
	ids_target=np.nonzero(howMany>n_samples)[0]
	print('Real found: %d'%ids_target.size)
	# re
	st_i=0
	end_i=bounds[0]
	print('Getting single affordance-instance per point')
	bar = Bar('Processing', max=bounds.shape[0])
	for i in range(bounds.shape[0]-1):
		if points[i,-1]>0.3:
			if i>0:
				st_i=bounds[i]
				end_i=bounds[i+1]
			someData=data[st_i:end_i,...]
			#get unique aff_ids
			ids=np.unique(someData[:,0])
			aff_ids=np.intersect1d(ids,ids_target)
			if aff_ids.shape[0]==0:
				continue
			toKeep=np.zeros((aff_ids.shape[0],7))
			for j in range(aff_ids.shape[0]):
				affData=np.nonzero(someData[:,0]==aff_ids[j])[0]
				keep=np.argmax(someData[affData,2])
				toKeep[j,:3]=points[i,...]
				toKeep[j,3:6]=someData[affData[keep],:3]
				toKeep[j,6]=i
			if i>0:
				newData=np.concatenate((newData,toKeep),axis=0)
			else:
				newData=toKeep
		bar.next()
	bar.finish()

	print('Recompute real targets')
	for i in range(labels.shape[0]):
		success=np.nonzero(newData[:,3]==i)[0]
		howMany[i]=success.size
	ids_target=np.nonzero(howMany>n_samples)[0]

	print('Real found: %d'%ids_target.size)
	ids_target=np.nonzero(howMany>n_samples)[0]
	print('Real found: %d'%ids_target.size)
	if n_orientations>1:
		name='AffordancesDataset_augmented_names.txt'
	else:
		name='AffordancesDataset_names.txt'
	aff_initials=[]
	with open(name, "w") as text_file:
		for i in range(ids_target.shape[0]):
			text_file.write("%d:%s-%s\n" % (i,labels[ids_target[i],0],labels[ids_target[i],2]))
			#aff_initials.append(labels[ids_target[i],0][0])
	#aff_initials=set(aff_initials)
	#print(aff_initials)
	#sys.exit()


	#Test 4 affordances case, where all instances of interaction account for single affordance classe
	aff_lims=np.array([0,8,17,91,92])
	#sample 128 points for every affordance, regardsless of their id
	sampled_ids=np.zeros((ids_target.size,n_samples))
	for i in range(ids_target.shape[0]):
		interesting_ids=np.nonzero(newData[:,3]==ids_target[i])[0]
		sorted_ids=np.argsort(newData[interesting_ids,5])
		sorted_ids=interesting_ids[sorted_ids[::-1]]
		sampled_ids[i,...]=newData[sorted_ids[:n_samples],-1]


	t=np.unique(sampled_ids.reshape(1,-1))
	dataPoints=np.zeros((t.size,3),dtype=np.float32)
	dataPoints_labels=np.zeros((t.size,5),dtype=np.uint8)
	initials=[]
	for i in range(t.size):
		#get all affordances for this point
		ids=np.nonzero(newData[:,-1]==t[i])[0]
		labels=np.zeros(ids.shape[0],dtype=np.uint8)
		for j in range(ids.shape[0]):
			labels[j]=np.nonzero(aff_lims>newData[ids[j],3])[0][0]
		labels=np.unique(labels)
		dataPoints[i]=newData[ids[0],:3]
		dataPoints_labels[i,labels]=1
		#extract voxel
	if n_orientations>1:
		name='dataPointsAffordances_augmented.h5'
	else:
		name='dataPointsAffordances.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,dataPoints,dataPoints_labels,'float32','uint8')

	#get dense cloud
	dense_sceneCloud=pypcd.PointCloud.from_path(scene_name).pc_data
	pc_array = np.array([[x, y, z] for x,y,z in dense_sceneCloud])

	#generate pointclouds that were not detected to test against single example training
	good_points_file=path[:pos]+'/'+fileId+'_goodPointsIds.pcd'
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePointsIds.pcd'
	sampled_ids=np.sort(load_pcd_data(sampled_points_file,cols=(0,),dataType=np.int32))
	good_ids=np.sort(load_pcd_data(good_points_file,cols=(0,),dataType=np.int32))	
	non_affordance=np.setdiff1d(np.arange(sampled_ids.shape[0]),good_ids)
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePoints.pcd'
	sampled_points=load_pcd_data(sampled_points_file,cols=(0,1,2))
	np.random.shuffle(non_affordance)
	print('Getting 1024 negative examples ')
	#shuffle negative examples ids
	bar = Bar('Processing', max=1024)
	negative_examples=np.zeros((1024,n_points,3),dtype=np.float32)
	for i in range(1024):
		point=pc_array[non_affordance[i],...]
		voxel=getVoxel(point,max_rad,pc_array)
		sample=sample_cloud(voxel,n_points)
		negative_examples[i,...]=sample
		bar.next()
	bar.finish()
	negative_labels=100*np.ones((1024,1),dtype=np.uint8)
	print('Got %d negative examples'%(negative_examples.shape[0]))
	print(negative_examples[0,0,:])
	name='AffordancesDataset_negatives.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,negative_examples,negative_labels,'float32','uint8')


	print('Sampling actual voxels from %s of %d points'%(scene_name,pc_array.shape[0]))
	dataSet_data=np.zeros((dataPoints.shape[0]*n_orientations,n_points,3),dtype=np.float32)
	dataSet_labels=np.zeros((dataPoints_labels.shape[0]*n_orientations,dataPoints_labels.shape[1]),dtype=np.uint8)
	print(dataSet_data.shape)
	count=0
	#data_type 0->centered
	data_type=1
	aff_names=np.array(['Non','Filling','Hanging','Placing','Sitting'])
	#extract voxels and pointclouds for dataset
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.hold(False)
	for aff in range(dataPoints.shape[0]):
		t_names=np.nonzero(dataPoints_labels[aff])[0]
		print('%d/%d Training example for %s'%(aff,dataPoints.shape[0],np.array_str(aff_names[t_names])) )
		point=dataPoints[aff,:]
		#print(point.shape)
		voxel=getVoxel(point,max_rad,pc_array)
		if voxel.shape[0]<n_points:
			sample=aVoxel
		else:
			sample=sample_cloud(voxel,n_points)
		if data_type==0:
			centered_sample=sample-point
		else:
			centered_sample=sample
		#rotate this voxels n_orientations around Z (up)
		for j in range(n_orientations):
			rotated_voxel=rotate_point_cloud_by_angle(np.expand_dims(centered_sample,axis=0),j*2*np.pi/n_orientations).squeeze()
			dataSet_data[count,...]=rotated_voxel
			dataSet_labels[count,...]=dataPoints_labels[aff,...]
			count+=1
			if j==0:
				ax.scatter(rotated_voxel[:,0],rotated_voxel[:,1],rotated_voxel[:,2],s=3)
				plt.pause(0.2)
				plt.draw()			
	
	if n_orientations>1:
		name='AffordancesDataset_augmented.h5'
	else:
		name='AffordancesDataset.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,dataSet_data,dataSet_labels,'float32','uint8')

	
	return dataPoints,dataPoints_labels
Exemplo n.º 7
0
def createDataSet(file):
	path=os.path.abspath(file)
	pos=path.rfind('/')
	tokens=path[pos+1:].split('_')
	descriptor_id=tokens[6]
	scene_name=tokens[2]
	scene_name=path[:pos]+'/'+scene_name+'_d.pcd'
	file_descriptor=path[:pos]+'/tmp'+descriptor_id+'.csv'
	labels=np.genfromtxt(file_descriptor,dtype='str',skip_header=1,delimiter=',')
	print('Affordances in descriptor %d'%labels.shape[0])
	fileId=tokens[-1]
	tokens=fileId.split('.')
	fileId=tokens[0]
	print(fileId)
	res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
	res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

	data=load_pcd_data(res_data_file,cols=None)
	#print(data.shape)
	points=load_pcd_data(res_points_file,cols=(0,1,2))
	real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
	#real_c_data=np.array(colors[:,-1],dtype=np.int32)
	red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

	real_c_data=np.concatenate((red,green,blue),axis=1)

	perPoint=np.sum(real_c_data,axis=1)
	bounds=np.cumsum(perPoint)
	#print(bounds)
	howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
	all_data=np.zeros((data.shape[0],6))

	for i in range(all_data.shape[0]):
		point_id=np.nonzero(bounds>i)[0][0]
		all_data[i,:3]=points[point_id,:]
		all_data[i,3:]=data[i,:3]


	for i in range(labels.shape[0]):
		success=np.nonzero(all_data[:,3]==i)[0]
		success2=np.nonzero(all_data[success,2]>0.3)[0]
		howMany[i]=success2.size

	ids_target=np.nonzero(howMany>n_samples)[0]
	print('Real found: %d'%ids_target.size)
	if n_orientations>1:
		name='AffordancesDataset_augmented_names.txt'
	else:
		name='AffordancesDataset_names.txt'
	with open(name, "w") as text_file:
		for i in range(ids_target.shape[0]):
			text_file.write("%d:%s-%s\n" % (i,labels[ids_target[i],0],labels[ids_target[i],2]))
	#
	#print(labels[ids_target,1:])

	all_points=np.zeros((ids_target.size,n_samples,3))
	all_points_score=np.zeros((ids_target.size,n_samples))
	for i in range(ids_target.shape[0]):
		#get the 3D point for the response
		success=np.nonzero((all_data[:,3]==ids_target[i])&(all_data[:,2]>0.3))[0]
		sorted_ids=np.argsort(all_data[success,5])
		print('Sampling for %s %s in %d points(%f,%f)'%(labels[ids_target[i],0],labels[ids_target[i],2],success.size,np.max(all_data[success,5]),np.min(all_data[success,5])))
		sorted_ids=sorted_ids[::-1]
		for j in range(n_samples):
			all_points[i,j,:]=all_data[success[sorted_ids[j]],:3]
			all_points_score[i,j]=all_data[success[sorted_ids[j]],5]
		#print('Min %f max %f'%(all_points_score[i,0],all_points_score[i,-1]))
	labels_d=np.arange(ids_target.size)
	print('Sampled points maxZ %f minZ %f'%(np.max(all_points[:,:,2].reshape(1,-1)),np.min(all_points[:,:,2].reshape(1,-1))) )

	#sys.exit()

	if n_orientations>1:
		name='dataPointsAffordances_augmented.h5'
	else:
		name='dataPointsAffordances.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,all_points,labels_d,'float32','uint8')


	#get dense cloud
	dense_sceneCloud=pypcd.PointCloud.from_path(scene_name).pc_data
	pc_array = np.array([[x, y, z] for x,y,z in dense_sceneCloud])

	#generate pointclouds that were not detected to test against single example training
	good_points_file=path[:pos]+'/'+fileId+'_goodPointsIds.pcd'
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePointsIds.pcd'
	sampled_ids=np.sort(load_pcd_data(sampled_points_file,cols=(0,),dataType=np.int32))
	good_ids=np.sort(load_pcd_data(good_points_file,cols=(0,),dataType=np.int32))	
	non_affordance=np.setdiff1d(np.arange(sampled_ids.shape[0]),good_ids)
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePoints.pcd'
	sampled_points=load_pcd_data(sampled_points_file,cols=(0,1,2))
	np.random.shuffle(non_affordance)
	print('Getting 1024 negative examples ')
	#shuffle negative examples ids
	bar = Bar('Processing', max=1024)
	negative_examples=np.zeros((1024,n_points,3),dtype=np.float32)
	for i in range(1024):
		point=pc_array[non_affordance[i],...]
		voxel=getVoxel(point,max_rad,pc_array)
		minP=np.min(voxel,0);
		maxP=np.max(voxel,0);
		dist=np.linalg.norm(maxP-minP,axis=0)/2
		print('RAD %f rad %f estimation %f'%(dist,max_rad,max_rad*np.sqrt(3)))
		sample=sample_cloud(voxel,n_points)
		negative_examples[i,...]=sample
		bar.next()
	bar.finish()
	negative_labels=100*np.ones((1024,1),dtype=np.uint8)
	print('Got %d negative examples'%(negative_examples.shape[0]))
	print(negative_examples[0,0,:])
	name='AffordancesDataset_negatives.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,negative_examples,negative_labels,'float32','uint8')
	#sys.exit()


	print('Sampling actual voxels from %s of %d points'%(scene_name,pc_array.shape[0]))
	dataSet_data=np.zeros((all_points.shape[0]*all_points.shape[1]*n_orientations,n_points,3),dtype=np.float32)
	dataSet_labels=np.zeros((all_points.shape[0]*all_points.shape[1]*n_orientations,1),dtype=np.uint8)
	print(dataSet_data.shape)
	count=0
	#data_type 0->centered
	data_type=1
	#extract voxels and pointclouds for dataset
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.hold(False)
	for aff in range(all_points.shape[0]):
		print('Training examples for %s %s'%(labels[ids_target[aff],0],labels[ids_target[aff],2]))
		bar = Bar('Processing', max=all_points.shape[1])
		for n_sample in range(all_points.shape[1]):
			point=all_points[aff,n_sample,:].reshape(3,-1)
			#print(point.shape)
			voxel=getVoxel(point,max_rad,pc_array)
			if voxel.shape[0]<n_points:
				sample=aVoxel
			else:
				sample=sample_cloud(voxel,n_points)
			if data_type==0:
				centered_sample=sample-point
			else:
				centered_sample=sample
			#rotate this voxels n_orientations around Z (up)
			for j in range(n_orientations):
				rotated_voxel=rotate_point_cloud_by_angle(np.expand_dims(centered_sample,axis=0),j*2*np.pi/n_orientations).squeeze()
				dataSet_data[count,...]=rotated_voxel
				dataSet_labels[count]=labels_d[aff]
				count+=1
			if n_sample==0:
				ax.scatter(rotated_voxel[:,0],rotated_voxel[:,1],rotated_voxel[:,2],s=3)
				plt.pause(0.2)
				plt.draw()
			bar.next()
		bar.finish()
	if n_orientations>1:
		name='AffordancesDataset_augmented.h5'
	else:
		name='AffordancesDataset.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,dataSet_data,dataSet_labels,'float32','uint8')
Exemplo n.º 8
0
def extractSingleLabeledData(data_file):
    data, label = load_h5(data_file)
    print(label.shape)
    train_examples = 512
    test_examples = 128
    examples = train_examples + test_examples
    print(examples * label.shape[1], data.shape[1], 3)
    new_data_train = np.zeros(
        (train_examples * label.shape[1], data.shape[1], 3), dtype=np.float32)
    new_labels_train = np.zeros((train_examples * label.shape[1], 1),
                                dtype=np.int32)

    new_data_test = np.zeros(
        (test_examples * label.shape[1], data.shape[1], 3), dtype=np.float32)
    new_labels_test = np.zeros((test_examples * label.shape[1], 1),
                               dtype=np.int32)

    #for every affordance
    st = 0
    st2 = 0
    for i in range(label.shape[1]):
        #get the pointclouds of this affordance
        target_indices = np.nonzero(label[:, i])[0]
        #print('Aff %d %d'%(i,target_indices.size))
        to_sample_from = np.arange(target_indices.size)
        np.random.shuffle(to_sample_from)
        if to_sample_from.size < (train_examples + test_examples):
            real_train_examples = int(to_sample_from.size * .8 // 1)
            #print(real_train_examples)
            real_test_examples = to_sample_from.size - real_train_examples
            print('Less data from %d,%d' %
                  (real_train_examples, real_test_examples))
        else:
            real_train_examples = train_examples
            real_test_examples = test_examples

        ed = st + real_train_examples
        ed2 = st2 + real_test_examples

        real_sample = target_indices[to_sample_from[:real_train_examples]]
        real_sample_test = target_indices[
            to_sample_from[real_train_examples:real_train_examples +
                           real_test_examples]]

        new_data_train[st:ed, ...] = data[real_sample, ...]
        new_labels_train[st:ed, ...] = i

        new_data_test[st2:ed2, ...] = data[real_sample_test, ...]
        new_labels_test[st2:ed2, ...] = i

        st = ed
        st2 = ed2
    # get the real data in case some affordances had less examples than the target
    new_data_train = new_data_train[:ed, ...]
    new_labels_train = new_labels_train[:ed, ...]
    new_data_test = new_data_test[:ed2, ...]
    new_labels_test = new_labels_test[:ed2, ...]

    #shuffle things
    ids = np.arange(new_labels_train.shape[0])
    np.random.shuffle(ids)
    new_data_train = new_data_train[ids, ...]
    new_labels_train = new_labels_train[ids, ...]

    ids = np.arange(new_labels_test.shape[0])
    np.random.shuffle(ids)
    new_data_test = new_data_test[ids, ...]
    new_labels_test = new_labels_test[ids, ...]

    print('New binary train data %d' % new_labels_train.shape[0])
    print('New binary test data %d' % new_labels_test.shape[0])
    name = 'SinglelabelDataSet_train_' + data_file.split('.')[0].split(
        '_')[-1] + '.h5'
    print(name)
    save_h5(name, new_data_train, new_labels_train, 'float32', 'uint8')
    name = 'SinglelabelDataSet_test_' + data_file.split('.')[0].split(
        '_')[-1] + '.h5'
    print(name)
    save_h5(name, new_data_test, new_labels_test, 'float32', 'uint8')
Exemplo n.º 9
0
def getSingleTraining(file):
    path = os.path.abspath(file)
    pos = path.rfind('/')
    tokens = path[pos + 1:].split('_')
    descriptor_id = tokens[6]
    scene_name = tokens[2]
    scene_name = path[:pos] + '/' + scene_name + '_d.pcd'
    file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv'
    labels = np.genfromtxt(file_descriptor,
                           dtype='str',
                           skip_header=1,
                           delimiter=',')
    print('Affordances in descriptor %d' % labels.shape[0])
    fileId = tokens[-1]
    tokens = fileId.split('.')
    fileId = tokens[0]
    # print(fileId)
    # # Need only those affordances that have
    # # over 128 good predictions in this result file

    # res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
    # res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

    # data=load_pcd_data(res_data_file,cols=None)
    # #print(data.shape)
    # points,real_c_data=load_pcd_data_binary(res_points_file)
    # #real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
    # #real_c_data=np.array(colors[:,-1],dtype=np.int32)
    # red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
    # green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
    # blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

    # real_c_data=np.concatenate((red,green,blue),axis=1)

    # perPoint=np.sum(real_c_data,axis=1)
    # bounds=np.cumsum(perPoint)
    # #print(bounds)
    # howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
    # all_data=np.zeros((data.shape[0],6))

    # for i in range(all_data.shape[0]):
    # 	point_id=np.nonzero(bounds>i)[0][0]
    # 	all_data[i,:3]=points[point_id,:]
    # 	all_data[i,3:]=data[i,:3]

    # for i in range(labels.shape[0]):
    # 	success=np.nonzero(all_data[:,3]==i)[0]
    # 	#success2=np.nonzero(all_data[success,2]>0.2)[0]
    # 	howMany[i]=success.size

    # ids_target=np.nonzero(howMany>n_samples)[0]
    # print('Real found: %d'%ids_target.size)
    # print(ids_target)
    #sys.exit()

    new_c = np.genfromtxt('filtered_counts2.csv', delimiter=',', dtype='int')
    with open('file_lists2.csv', 'r') as f:
        reader = csv.reader(f)
        new_n = list(reader)

    samples = 32
    points = 4096
    ids_target = np.nonzero(new_c >= samples)[0]
    print('Actually using %d affordances' % (ids_target.size))

    fig = plt.figure()
    plt.ion()
    ax = fig.add_subplot(121, projection='3d')
    ax2 = fig.add_subplot(122, projection='3d')
    unique_scenes = dict()
    k = 10
    #ax.hold(False)
    if k > 1:
        bar = Bar('Creating original single example training dataset',
                  max=ids_target.shape[0])
        for i in range(ids_target.shape[0]):
            interaction = ids_target[i]
            path_to_data = os.path.abspath('../data')
            name = path_to_data + '/affordances/binaryOc_AffordancesDataset_train' + str(
                interaction) + '_' + str(TRAIN_EXAMPLES) + '.h5'
            if os.path.exists(name):
                continue
            #find training data
            aff_dir = labels[interaction, 0]
            query_object = labels[interaction, 2]
            data_file = path[:pos] + "/" + aff_dir + "/ibs_full_" + labels[
                interaction, 1] + "_" + query_object + ".txt"
            with open(data_file) as f:
                content = f.readlines()
                # you may also want to remove whitespace characters like `\n` at the end of each line
            content = [x.strip() for x in content]
            scene_file = content[0].split(":")[1]
            tmp = content[8].split(":")[1]
            datapoint = tmp.split(',')
            test_point = np.expand_dims(np.asarray(
                [float(x) for x in datapoint]),
                                        axis=0)
            data_file = path[:pos] + "/" + aff_dir + "/" + scene_file
            if '.pcd' in scene_file or '.ply' in scene_file:
                if os.path.exists(data_file):
                    data_file = data_file
            else:
                try_data_file = data_file + '.ply'
                if os.path.exists(try_data_file):
                    #print(try_data_file)
                    data_file = try_data_file
                #maybe pcd extension missing
                else:
                    try_data_file = data_file + '.pcd'
                    if os.path.exists(try_data_file):
                        data_file = try_data_file
            # if scene_file not in unique_scenes:
            # 	unique_scenes[scene_file]=interaction
            # else:
            # 	continue
            if '.pcd' in data_file:
                cloud_training = load_pcd_data(data_file)
            else:
                cloud_training = load_ply_data(data_file)
            data = np.zeros((2, n_points, 3), dtype=np.float32)
            data_labels = np.zeros((2, 1), dtype=np.int32)
            boundingBoxDiag = np.linalg.norm(
                np.min(cloud_training, 0) - np.max(cloud_training, 0))
            #print('%s Diagonal %f Points %d'%(scene_file,boundingBoxDiag,cloud_training.shape[0]))
            #sample a voxel with rad from test-point
            kdt = BallTree(cloud_training, leaf_size=5, metric='euclidean')
            voxel_ids = getVoxel(test_point, max_rad, kdt)
            voxel = cloud_training[voxel_ids, :]
            sample = sample_cloud(voxel, n_points)
            sample_cloud_training = sample_cloud(cloud_training, n_points * 2)
            #genereate a negative example with noise around test_point
            low = test_point[0, 0] - max_rad
            high = test_point[0, 0] + max_rad
            tmp1 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            low = test_point[0, 1] - max_rad
            high = test_point[0, 1] + max_rad
            tmp2 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            low = test_point[0, 2] - max_rad
            high = test_point[0, 2] + max_rad
            tmp3 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            negative_cloud_training = np.concatenate((tmp1, tmp2, tmp3),
                                                     axis=1)
            data[0, ...] = sample - test_point
            data_labels[0, ...] = np.zeros((1, 1), dtype=np.int32)
            data[1, ...] = negative_cloud_training - test_point
            data_labels[1, ...] = np.ones((1, 1), dtype=np.int32)
            #name=path_to_data+'/affordances/binaryOc_AffordancesDataset_train'+str(interaction)+'_'+str(TRAIN_EXAMPLES)+'.h5'
            #print(name)
            save_h5(name, data, data_labels, 'float32', 'uint8')
            ax.scatter(sample_cloud_training[:, 0],
                       sample_cloud_training[:, 1],
                       sample_cloud_training[:, 2],
                       s=1,
                       c='b')
            ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2], s=3, c='b')
            ax2.scatter(negative_cloud_training[:, 0],
                        negative_cloud_training[:, 1],
                        negative_cloud_training[:, 2],
                        s=3,
                        c='r')
            plt.pause(1)
            plt.draw()
            ax.clear()
            ax2.clear()
            bar.next()
        bar.finish()
    name = '../data/affordances/names.txt'
    with open(name, "w") as text_file:
        for i in range(ids_target.shape[0]):
            text_file.write(
                "%d:%s-%s\n" %
                (i, labels[ids_target[i], 0], labels[ids_target[i], 2]))