示例#1
0
def getMultiAffordanceData(file):
	path=os.path.abspath(file)
	pos=path.rfind('/')
	tokens=path[pos+1:].split('_')
	descriptor_id=tokens[6]
	scene_name=tokens[2]
	scene_name=path[:pos]+'/'+scene_name+'_d.pcd'
	file_descriptor=path[:pos]+'/tmp'+descriptor_id+'.csv'
	labels=np.genfromtxt(file_descriptor,dtype='str',skip_header=1,delimiter=',')
	print('Affordances in descriptor %d'%labels.shape[0])
	fileId=tokens[-1]
	tokens=fileId.split('.')
	fileId=tokens[0]
	print(fileId)
	res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
	res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

	data=load_pcd_data(res_data_file,cols=None)
	#print(data.shape)
	points=load_pcd_data(res_points_file,cols=(0,1,2))
	real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
	#real_c_data=np.array(colors[:,-1],dtype=np.int32)
	red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

	real_c_data=np.concatenate((red,green,blue),axis=1)

	perPoint=np.sum(real_c_data,axis=1)
	bounds=np.cumsum(perPoint)


	howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
	for i in range(labels.shape[0]):
		success=np.nonzero(data[:,0]==i)[0]
		howMany[i]=success.size
	ids_target=np.nonzero(howMany>n_samples)[0]
	print('Real found: %d'%ids_target.size)
	# re
	st_i=0
	end_i=bounds[0]
	print('Getting single affordance-instance per point')
	bar = Bar('Processing', max=bounds.shape[0])
	for i in range(bounds.shape[0]-1):
		if points[i,-1]>0.3:
			if i>0:
				st_i=bounds[i]
				end_i=bounds[i+1]
			someData=data[st_i:end_i,...]
			#get unique aff_ids
			ids=np.unique(someData[:,0])
			aff_ids=np.intersect1d(ids,ids_target)
			if aff_ids.shape[0]==0:
				continue
			toKeep=np.zeros((aff_ids.shape[0],7))
			for j in range(aff_ids.shape[0]):
				affData=np.nonzero(someData[:,0]==aff_ids[j])[0]
				keep=np.argmax(someData[affData,2])
				toKeep[j,:3]=points[i,...]
				toKeep[j,3:6]=someData[affData[keep],:3]
				toKeep[j,6]=i
			if i>0:
				newData=np.concatenate((newData,toKeep),axis=0)
			else:
				newData=toKeep
		bar.next()
	bar.finish()

	print('Recompute real targets')
	for i in range(labels.shape[0]):
		success=np.nonzero(newData[:,3]==i)[0]
		howMany[i]=success.size
	ids_target=np.nonzero(howMany>n_samples)[0]

	print('Real found: %d'%ids_target.size)
	ids_target=np.nonzero(howMany>n_samples)[0]
	print('Real found: %d'%ids_target.size)
	if n_orientations>1:
		name='AffordancesDataset_augmented_names.txt'
	else:
		name='AffordancesDataset_names.txt'
	aff_initials=[]
	with open(name, "w") as text_file:
		for i in range(ids_target.shape[0]):
			text_file.write("%d:%s-%s\n" % (i,labels[ids_target[i],0],labels[ids_target[i],2]))
			#aff_initials.append(labels[ids_target[i],0][0])
	#aff_initials=set(aff_initials)
	#print(aff_initials)
	#sys.exit()


	#Test 4 affordances case, where all instances of interaction account for single affordance classe
	aff_lims=np.array([0,8,17,91,92])
	#sample 128 points for every affordance, regardsless of their id
	sampled_ids=np.zeros((ids_target.size,n_samples))
	for i in range(ids_target.shape[0]):
		interesting_ids=np.nonzero(newData[:,3]==ids_target[i])[0]
		sorted_ids=np.argsort(newData[interesting_ids,5])
		sorted_ids=interesting_ids[sorted_ids[::-1]]
		sampled_ids[i,...]=newData[sorted_ids[:n_samples],-1]


	t=np.unique(sampled_ids.reshape(1,-1))
	dataPoints=np.zeros((t.size,3),dtype=np.float32)
	dataPoints_labels=np.zeros((t.size,5),dtype=np.uint8)
	initials=[]
	for i in range(t.size):
		#get all affordances for this point
		ids=np.nonzero(newData[:,-1]==t[i])[0]
		labels=np.zeros(ids.shape[0],dtype=np.uint8)
		for j in range(ids.shape[0]):
			labels[j]=np.nonzero(aff_lims>newData[ids[j],3])[0][0]
		labels=np.unique(labels)
		dataPoints[i]=newData[ids[0],:3]
		dataPoints_labels[i,labels]=1
		#extract voxel
	if n_orientations>1:
		name='dataPointsAffordances_augmented.h5'
	else:
		name='dataPointsAffordances.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,dataPoints,dataPoints_labels,'float32','uint8')

	#get dense cloud
	dense_sceneCloud=pypcd.PointCloud.from_path(scene_name).pc_data
	pc_array = np.array([[x, y, z] for x,y,z in dense_sceneCloud])

	#generate pointclouds that were not detected to test against single example training
	good_points_file=path[:pos]+'/'+fileId+'_goodPointsIds.pcd'
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePointsIds.pcd'
	sampled_ids=np.sort(load_pcd_data(sampled_points_file,cols=(0,),dataType=np.int32))
	good_ids=np.sort(load_pcd_data(good_points_file,cols=(0,),dataType=np.int32))	
	non_affordance=np.setdiff1d(np.arange(sampled_ids.shape[0]),good_ids)
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePoints.pcd'
	sampled_points=load_pcd_data(sampled_points_file,cols=(0,1,2))
	np.random.shuffle(non_affordance)
	print('Getting 1024 negative examples ')
	#shuffle negative examples ids
	bar = Bar('Processing', max=1024)
	negative_examples=np.zeros((1024,n_points,3),dtype=np.float32)
	for i in range(1024):
		point=pc_array[non_affordance[i],...]
		voxel=getVoxel(point,max_rad,pc_array)
		sample=sample_cloud(voxel,n_points)
		negative_examples[i,...]=sample
		bar.next()
	bar.finish()
	negative_labels=100*np.ones((1024,1),dtype=np.uint8)
	print('Got %d negative examples'%(negative_examples.shape[0]))
	print(negative_examples[0,0,:])
	name='AffordancesDataset_negatives.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,negative_examples,negative_labels,'float32','uint8')


	print('Sampling actual voxels from %s of %d points'%(scene_name,pc_array.shape[0]))
	dataSet_data=np.zeros((dataPoints.shape[0]*n_orientations,n_points,3),dtype=np.float32)
	dataSet_labels=np.zeros((dataPoints_labels.shape[0]*n_orientations,dataPoints_labels.shape[1]),dtype=np.uint8)
	print(dataSet_data.shape)
	count=0
	#data_type 0->centered
	data_type=1
	aff_names=np.array(['Non','Filling','Hanging','Placing','Sitting'])
	#extract voxels and pointclouds for dataset
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.hold(False)
	for aff in range(dataPoints.shape[0]):
		t_names=np.nonzero(dataPoints_labels[aff])[0]
		print('%d/%d Training example for %s'%(aff,dataPoints.shape[0],np.array_str(aff_names[t_names])) )
		point=dataPoints[aff,:]
		#print(point.shape)
		voxel=getVoxel(point,max_rad,pc_array)
		if voxel.shape[0]<n_points:
			sample=aVoxel
		else:
			sample=sample_cloud(voxel,n_points)
		if data_type==0:
			centered_sample=sample-point
		else:
			centered_sample=sample
		#rotate this voxels n_orientations around Z (up)
		for j in range(n_orientations):
			rotated_voxel=rotate_point_cloud_by_angle(np.expand_dims(centered_sample,axis=0),j*2*np.pi/n_orientations).squeeze()
			dataSet_data[count,...]=rotated_voxel
			dataSet_labels[count,...]=dataPoints_labels[aff,...]
			count+=1
			if j==0:
				ax.scatter(rotated_voxel[:,0],rotated_voxel[:,1],rotated_voxel[:,2],s=3)
				plt.pause(0.2)
				plt.draw()			
	
	if n_orientations>1:
		name='AffordancesDataset_augmented.h5'
	else:
		name='AffordancesDataset.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,dataSet_data,dataSet_labels,'float32','uint8')

	
	return dataPoints,dataPoints_labels
def getSingleTraining(file):
    path = os.path.abspath(file)
    pos = path.rfind('/')
    tokens = path[pos + 1:].split('_')
    descriptor_id = tokens[6]
    scene_name = tokens[2]
    scene_name = path[:pos] + '/' + scene_name + '_d.pcd'
    file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv'
    labels = np.genfromtxt(file_descriptor,
                           dtype='str',
                           skip_header=1,
                           delimiter=',')
    print('Affordances in descriptor %d' % labels.shape[0])
    fileId = tokens[-1]
    tokens = fileId.split('.')
    fileId = tokens[0]
    # print(fileId)
    # # Need only those affordances that have
    # # over 128 good predictions in this result file

    # res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
    # res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

    # data=load_pcd_data(res_data_file,cols=None)
    # #print(data.shape)
    # points,real_c_data=load_pcd_data_binary(res_points_file)
    # #real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
    # #real_c_data=np.array(colors[:,-1],dtype=np.int32)
    # red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
    # green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
    # blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

    # real_c_data=np.concatenate((red,green,blue),axis=1)

    # perPoint=np.sum(real_c_data,axis=1)
    # bounds=np.cumsum(perPoint)
    # #print(bounds)
    # howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
    # all_data=np.zeros((data.shape[0],6))

    # for i in range(all_data.shape[0]):
    # 	point_id=np.nonzero(bounds>i)[0][0]
    # 	all_data[i,:3]=points[point_id,:]
    # 	all_data[i,3:]=data[i,:3]

    # for i in range(labels.shape[0]):
    # 	success=np.nonzero(all_data[:,3]==i)[0]
    # 	#success2=np.nonzero(all_data[success,2]>0.2)[0]
    # 	howMany[i]=success.size

    # ids_target=np.nonzero(howMany>n_samples)[0]
    # print('Real found: %d'%ids_target.size)
    # print(ids_target)
    #sys.exit()

    new_c = np.genfromtxt('filtered_counts2.csv', delimiter=',', dtype='int')
    with open('file_lists2.csv', 'r') as f:
        reader = csv.reader(f)
        new_n = list(reader)

    samples = 32
    points = 4096
    ids_target = np.nonzero(new_c >= samples)[0]
    print('Actually using %d affordances' % (ids_target.size))

    fig = plt.figure()
    plt.ion()
    ax = fig.add_subplot(121, projection='3d')
    ax2 = fig.add_subplot(122, projection='3d')
    unique_scenes = dict()
    k = 10
    #ax.hold(False)
    if k > 1:
        bar = Bar('Creating original single example training dataset',
                  max=ids_target.shape[0])
        for i in range(ids_target.shape[0]):
            interaction = ids_target[i]
            path_to_data = os.path.abspath('../data')
            name = path_to_data + '/affordances/binaryOc_AffordancesDataset_train' + str(
                interaction) + '_' + str(TRAIN_EXAMPLES) + '.h5'
            if os.path.exists(name):
                continue
            #find training data
            aff_dir = labels[interaction, 0]
            query_object = labels[interaction, 2]
            data_file = path[:pos] + "/" + aff_dir + "/ibs_full_" + labels[
                interaction, 1] + "_" + query_object + ".txt"
            with open(data_file) as f:
                content = f.readlines()
                # you may also want to remove whitespace characters like `\n` at the end of each line
            content = [x.strip() for x in content]
            scene_file = content[0].split(":")[1]
            tmp = content[8].split(":")[1]
            datapoint = tmp.split(',')
            test_point = np.expand_dims(np.asarray(
                [float(x) for x in datapoint]),
                                        axis=0)
            data_file = path[:pos] + "/" + aff_dir + "/" + scene_file
            if '.pcd' in scene_file or '.ply' in scene_file:
                if os.path.exists(data_file):
                    data_file = data_file
            else:
                try_data_file = data_file + '.ply'
                if os.path.exists(try_data_file):
                    #print(try_data_file)
                    data_file = try_data_file
                #maybe pcd extension missing
                else:
                    try_data_file = data_file + '.pcd'
                    if os.path.exists(try_data_file):
                        data_file = try_data_file
            # if scene_file not in unique_scenes:
            # 	unique_scenes[scene_file]=interaction
            # else:
            # 	continue
            if '.pcd' in data_file:
                cloud_training = load_pcd_data(data_file)
            else:
                cloud_training = load_ply_data(data_file)
            data = np.zeros((2, n_points, 3), dtype=np.float32)
            data_labels = np.zeros((2, 1), dtype=np.int32)
            boundingBoxDiag = np.linalg.norm(
                np.min(cloud_training, 0) - np.max(cloud_training, 0))
            #print('%s Diagonal %f Points %d'%(scene_file,boundingBoxDiag,cloud_training.shape[0]))
            #sample a voxel with rad from test-point
            kdt = BallTree(cloud_training, leaf_size=5, metric='euclidean')
            voxel_ids = getVoxel(test_point, max_rad, kdt)
            voxel = cloud_training[voxel_ids, :]
            sample = sample_cloud(voxel, n_points)
            sample_cloud_training = sample_cloud(cloud_training, n_points * 2)
            #genereate a negative example with noise around test_point
            low = test_point[0, 0] - max_rad
            high = test_point[0, 0] + max_rad
            tmp1 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            low = test_point[0, 1] - max_rad
            high = test_point[0, 1] + max_rad
            tmp2 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            low = test_point[0, 2] - max_rad
            high = test_point[0, 2] + max_rad
            tmp3 = (high - low) * np.random.random_sample(
                (n_points, 1)) + (low)
            negative_cloud_training = np.concatenate((tmp1, tmp2, tmp3),
                                                     axis=1)
            data[0, ...] = sample - test_point
            data_labels[0, ...] = np.zeros((1, 1), dtype=np.int32)
            data[1, ...] = negative_cloud_training - test_point
            data_labels[1, ...] = np.ones((1, 1), dtype=np.int32)
            #name=path_to_data+'/affordances/binaryOc_AffordancesDataset_train'+str(interaction)+'_'+str(TRAIN_EXAMPLES)+'.h5'
            #print(name)
            save_h5(name, data, data_labels, 'float32', 'uint8')
            ax.scatter(sample_cloud_training[:, 0],
                       sample_cloud_training[:, 1],
                       sample_cloud_training[:, 2],
                       s=1,
                       c='b')
            ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2], s=3, c='b')
            ax2.scatter(negative_cloud_training[:, 0],
                        negative_cloud_training[:, 1],
                        negative_cloud_training[:, 2],
                        s=3,
                        c='r')
            plt.pause(1)
            plt.draw()
            ax.clear()
            ax2.clear()
            bar.next()
        bar.finish()
    name = '../data/affordances/names.txt'
    with open(name, "w") as text_file:
        for i in range(ids_target.shape[0]):
            text_file.write(
                "%d:%s-%s\n" %
                (i, labels[ids_target[i], 0], labels[ids_target[i], 2]))
示例#3
0
def createDataSet(file):
	path=os.path.abspath(file)
	pos=path.rfind('/')
	tokens=path[pos+1:].split('_')
	descriptor_id=tokens[6]
	scene_name=tokens[2]
	scene_name=path[:pos]+'/'+scene_name+'_d.pcd'
	file_descriptor=path[:pos]+'/tmp'+descriptor_id+'.csv'
	labels=np.genfromtxt(file_descriptor,dtype='str',skip_header=1,delimiter=',')
	print('Affordances in descriptor %d'%labels.shape[0])
	fileId=tokens[-1]
	tokens=fileId.split('.')
	fileId=tokens[0]
	print(fileId)
	res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd'
	res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd'

	data=load_pcd_data(res_data_file,cols=None)
	#print(data.shape)
	points=load_pcd_data(res_points_file,cols=(0,1,2))
	real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32)
	#real_c_data=np.array(colors[:,-1],dtype=np.int32)
	red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1)
	blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1)

	real_c_data=np.concatenate((red,green,blue),axis=1)

	perPoint=np.sum(real_c_data,axis=1)
	bounds=np.cumsum(perPoint)
	#print(bounds)
	howMany=np.zeros((labels.shape[0],1),dtype=np.int32)
	all_data=np.zeros((data.shape[0],6))

	for i in range(all_data.shape[0]):
		point_id=np.nonzero(bounds>i)[0][0]
		all_data[i,:3]=points[point_id,:]
		all_data[i,3:]=data[i,:3]


	for i in range(labels.shape[0]):
		success=np.nonzero(all_data[:,3]==i)[0]
		success2=np.nonzero(all_data[success,2]>0.3)[0]
		howMany[i]=success2.size

	ids_target=np.nonzero(howMany>n_samples)[0]
	print('Real found: %d'%ids_target.size)
	if n_orientations>1:
		name='AffordancesDataset_augmented_names.txt'
	else:
		name='AffordancesDataset_names.txt'
	with open(name, "w") as text_file:
		for i in range(ids_target.shape[0]):
			text_file.write("%d:%s-%s\n" % (i,labels[ids_target[i],0],labels[ids_target[i],2]))
	#
	#print(labels[ids_target,1:])

	all_points=np.zeros((ids_target.size,n_samples,3))
	all_points_score=np.zeros((ids_target.size,n_samples))
	for i in range(ids_target.shape[0]):
		#get the 3D point for the response
		success=np.nonzero((all_data[:,3]==ids_target[i])&(all_data[:,2]>0.3))[0]
		sorted_ids=np.argsort(all_data[success,5])
		print('Sampling for %s %s in %d points(%f,%f)'%(labels[ids_target[i],0],labels[ids_target[i],2],success.size,np.max(all_data[success,5]),np.min(all_data[success,5])))
		sorted_ids=sorted_ids[::-1]
		for j in range(n_samples):
			all_points[i,j,:]=all_data[success[sorted_ids[j]],:3]
			all_points_score[i,j]=all_data[success[sorted_ids[j]],5]
		#print('Min %f max %f'%(all_points_score[i,0],all_points_score[i,-1]))
	labels_d=np.arange(ids_target.size)
	print('Sampled points maxZ %f minZ %f'%(np.max(all_points[:,:,2].reshape(1,-1)),np.min(all_points[:,:,2].reshape(1,-1))) )

	#sys.exit()

	if n_orientations>1:
		name='dataPointsAffordances_augmented.h5'
	else:
		name='dataPointsAffordances.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,all_points,labels_d,'float32','uint8')


	#get dense cloud
	dense_sceneCloud=pypcd.PointCloud.from_path(scene_name).pc_data
	pc_array = np.array([[x, y, z] for x,y,z in dense_sceneCloud])

	#generate pointclouds that were not detected to test against single example training
	good_points_file=path[:pos]+'/'+fileId+'_goodPointsIds.pcd'
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePointsIds.pcd'
	sampled_ids=np.sort(load_pcd_data(sampled_points_file,cols=(0,),dataType=np.int32))
	good_ids=np.sort(load_pcd_data(good_points_file,cols=(0,),dataType=np.int32))	
	non_affordance=np.setdiff1d(np.arange(sampled_ids.shape[0]),good_ids)
	sampled_points_file=path[:pos]+'/'+fileId+'_samplePoints.pcd'
	sampled_points=load_pcd_data(sampled_points_file,cols=(0,1,2))
	np.random.shuffle(non_affordance)
	print('Getting 1024 negative examples ')
	#shuffle negative examples ids
	bar = Bar('Processing', max=1024)
	negative_examples=np.zeros((1024,n_points,3),dtype=np.float32)
	for i in range(1024):
		point=pc_array[non_affordance[i],...]
		voxel=getVoxel(point,max_rad,pc_array)
		minP=np.min(voxel,0);
		maxP=np.max(voxel,0);
		dist=np.linalg.norm(maxP-minP,axis=0)/2
		print('RAD %f rad %f estimation %f'%(dist,max_rad,max_rad*np.sqrt(3)))
		sample=sample_cloud(voxel,n_points)
		negative_examples[i,...]=sample
		bar.next()
	bar.finish()
	negative_labels=100*np.ones((1024,1),dtype=np.uint8)
	print('Got %d negative examples'%(negative_examples.shape[0]))
	print(negative_examples[0,0,:])
	name='AffordancesDataset_negatives.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,negative_examples,negative_labels,'float32','uint8')
	#sys.exit()


	print('Sampling actual voxels from %s of %d points'%(scene_name,pc_array.shape[0]))
	dataSet_data=np.zeros((all_points.shape[0]*all_points.shape[1]*n_orientations,n_points,3),dtype=np.float32)
	dataSet_labels=np.zeros((all_points.shape[0]*all_points.shape[1]*n_orientations,1),dtype=np.uint8)
	print(dataSet_data.shape)
	count=0
	#data_type 0->centered
	data_type=1
	#extract voxels and pointclouds for dataset
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.hold(False)
	for aff in range(all_points.shape[0]):
		print('Training examples for %s %s'%(labels[ids_target[aff],0],labels[ids_target[aff],2]))
		bar = Bar('Processing', max=all_points.shape[1])
		for n_sample in range(all_points.shape[1]):
			point=all_points[aff,n_sample,:].reshape(3,-1)
			#print(point.shape)
			voxel=getVoxel(point,max_rad,pc_array)
			if voxel.shape[0]<n_points:
				sample=aVoxel
			else:
				sample=sample_cloud(voxel,n_points)
			if data_type==0:
				centered_sample=sample-point
			else:
				centered_sample=sample
			#rotate this voxels n_orientations around Z (up)
			for j in range(n_orientations):
				rotated_voxel=rotate_point_cloud_by_angle(np.expand_dims(centered_sample,axis=0),j*2*np.pi/n_orientations).squeeze()
				dataSet_data[count,...]=rotated_voxel
				dataSet_labels[count]=labels_d[aff]
				count+=1
			if n_sample==0:
				ax.scatter(rotated_voxel[:,0],rotated_voxel[:,1],rotated_voxel[:,2],s=3)
				plt.pause(0.2)
				plt.draw()
			bar.next()
		bar.finish()
	if n_orientations>1:
		name='AffordancesDataset_augmented.h5'
	else:
		name='AffordancesDataset.h5'
	if os.path.exists(name):
		os.system('rm %s' % (name))
	save_h5(name,dataSet_data,dataSet_labels,'float32','uint8')
def sampleFromFile(affordance,
                   list_of_files,
                   number_of_samples,
                   pointsPerCloud=4096):
    file_options = np.arange(len(list_of_files))
    files_to_sample = np.random.randint(len(list_of_files),
                                        size=(1, number_of_samples))
    repeated = np.bincount(files_to_sample[0, :], minlength=len(list_of_files))
    actually_sample_files = np.nonzero(repeated)[0]
    dataPoints = np.empty((number_of_samples, 6), dtype=np.float)
    dataClouds = np.empty((number_of_samples, pointsPerCloud, 3),
                          dtype=np.float32)
    start_id = 0
    actually_sampled = 0
    outOfPoints = False
    bar = Bar('Sampling ', max=number_of_samples)
    for i in range(actually_sample_files.size):
        file = list_of_files[actually_sample_files[i]] + "_newData.csv"
        pos = file.rfind('/') + 1
        if "space/" in file:
            #Need to search for the exact file
            pos_id = list_of_files[actually_sample_files[i]].rfind('/') + 1
            target_file_id = list_of_files[actually_sample_files[i]][pos_id:]
            path_to_scene = file[:
                                 pos_id] + 'All_affordances_*_' + target_file_id + '.pcd'
            someFile = glob.glob(path_to_scene)
            tokens = someFile[0].split('_')
            cloud_file = list_of_files[
                actually_sample_files[i]][:pos_id] + tokens[2]
            if "real" in tokens[2]:
                cloud_file = cloud_file + ".pcd"
            else:
                cloud_file = cloud_file + "_d.pcd"
                #if "readingroom" in cloud_file:
                #print(list_of_files[actually_sample_files[i]])
                #print(cloud_file)
                #sys.exit()
        else:
            pos_id = list_of_files[actually_sample_files[i]].rfind('/') + 1
            target_file_id = list_of_files[actually_sample_files[i]][pos_id:]
            if "DATA" in file[:pos_id]:
                path_to_scene = file[:pos_id] + '*_clean.pcd'
                someFile = glob.glob(path_to_scene)
                cloud_file = someFile[0]
            else:
                path_to_scene = file[:
                                     pos_id] + 'All_affordances_*_' + target_file_id + '.pcd'
                someFile = glob.glob(path_to_scene)
                tokens = someFile[0].split('_')
                cloud_file = list_of_files[
                    actually_sample_files[i]][:pos_id] + tokens[2] + '.pcd'
                #print(cloud_file)
                #sys.exit()
        sample_from_file = repeated[actually_sample_files[i]]
        data = np.genfromtxt(file, delimiter=",", dtype='float32')
        target_ids = np.nonzero(data[:, A_ID].astype(int) == affordance)[0]
        sorted_subset = np.argsort(data[target_ids, SCORE])
        sorted_subset = sorted_subset[::-1]
        j = 0
        k = 0
        complete_sample = False
        if not os.path.exists(cloud_file):
            print('No input cloud %s' % (cloud_file))
            return np.empty((0, 6)), np.empty((0, 0, 0))
        cloud, _ = load_pcd_data_binary(cloud_file)
        kdt = BallTree(cloud, leaf_size=5, metric='euclidean')
        while not complete_sample:
            #take points until conplete set
            dataPoints[start_id + j, :] = data[target_ids[sorted_subset[k]], :]
            point = dataPoints[start_id + j, :3]
            voxel_ids = getVoxel(point, max_rad, kdt)
            voxel = cloud[voxel_ids, :]
            actual_voxel_size = voxel.shape[0]
            if actual_voxel_size < (pointsPerCloud / 4):
                #bad point, get a new one
                if k == 0:
                    print("\n File %s" % (cloud_file))
                outputText = "Voxel " + str(
                    voxel.shape[0]) + " " + str(k) + "/" + str(
                        sorted_subset.shape[0])
                print(outputText, end='\r')
                #print('\nFile: %s bad point %d/%d\r'%(someFile[0],k,sorted_subset.shape[0]))
                #print('bad point %d of %d Voxel: %d'%(k,sorted_subset.shape[0],voxel.shape[0]))
                k += 1
                if k >= sorted_subset.shape[0]:
                    outOfPoints = True
                    print('Exhausted File')
                    break
            else:
                if actual_voxel_size >= pointsPerCloud:
                    sample = sample_cloud(voxel, pointsPerCloud)
                else:
                    print('padding')
                    padding = point + np.zeros(
                        (pointsPerCloud - actual_voxel_size, 3),
                        dtype=np.float32)
                    sample = np.concatenate((padding, voxel), axis=0)
                #center cloud
                dataClouds[start_id + j, ...] = sample - point
                j += 1
                #print('\tVoxel size (%d,%d) SampleSize(%d,%d) start_id %d +j %d'%(voxel.shape[0],voxel.shape[1],sample.shape[0],sample.shape[1],start_id,j))
            if j == sample_from_file:
                complete_sample = True
        if not outOfPoints:
            start_id += sample_from_file
            actually_sampled += sample_from_file
            bar.next(sample_from_file)
        else:
            break
    bar.finish()
    if outOfPoints or actually_sampled != number_of_samples:
        return np.empty((0, 6)), np.empty((0, 0, 0))
    else:
        return dataPoints, dataClouds