def split_data(list_of_files, train_size=0.8): for i in range(len(list_of_files)): tmp_data, tmp_labels = load_h5(list_of_files[i]) if i > 0: data = np.concatenate((data, tmp_data), axis=0) labels = np.concatenate((labels, tmp_labels), axis=0) else: data = tmp_data labels = tmp_labels print(tmp_data.shape) print('All data %d' % (data.shape[0])) all_ids = np.arange(data.shape[0]) np.random.shuffle(all_ids) train_ids_size = int(all_ids.size * train_size) print(train_ids_size) train_ids = all_ids[:train_ids_size] new_train_data = data[train_ids, ...] new_train_labels = labels[train_ids, ...] test_ids = all_ids[train_ids_size:] new_test_data = data[test_ids, ...] new_test_labels = labels[test_ids, ...] print('Train data %d' % new_train_labels.shape[0]) print('Test data %d' % new_test_labels.shape[0]) save_h5('MultilabelDataSet_splitTrain4.h5', new_train_data, new_train_labels, 'float32', 'uint8') save_h5('MultilabelDataSet_splitTest4.h5', new_test_data, new_test_labels, 'float32', 'uint8') np.save('MultilabelDataSet_splitTest4.npy', test_ids)
def createMiniDatasetMulti(train_size,test_size,t_affordances=[0,1,2,3,4],positives_file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',info_file='AffordancesDataset_augmented_names.txt'): # sample traing_size random for each class # check repeated the_affordances=np.expand_dims(np.asarray(t_affordances),0) names=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':') names=names[:,1] aff_initials=sorted(list(set([x[0] for x in names]))) actual_initials=[] positive_data,positive_labels=load_h5(positives_file) negative_data,negative_labels=load_h5(negatives_file) for i in range(1,the_affordances.size): id_=the_affordances[0,i] thisIds=np.nonzero(positive_labels[:,id_])[0] print(thisIds.size) #select train and test np.random.shuffle(thisIds) train_ids=thisIds[:train_size] test_ids=thisIds[train_size:train_size+test_size] if i>1: #check for repeated new_=np.setdiff1d(train_ids,all_train_ids) all_train_ids=np.concatenate((all_train_ids,new_),axis=0) new_=np.setdiff1d(test_ids,all_test_ids) all_test_ids=np.concatenate((all_test_ids,new_),axis=0) else: all_train_ids=train_ids all_test_ids=test_ids actual_initials.append(aff_initials[id_-1]) negative_ids_train=np.arange(train_size) negative_ids_test=np.arange(train_size,train_size+test_size) negative_labels_train=np.zeros((train_size,the_affordances.size)) negative_labels_train[:,0]=1 negative_labels_test=np.zeros((test_size,the_affordances.size)) negative_labels_test[:,0]=1 all_train_ids=all_train_ids.reshape(-1,1) all_test_ids=all_test_ids.reshape(-1,1) #print(all_train_ids.shape) train_data=np.concatenate((positive_data[all_train_ids.squeeze(),...],negative_data[negative_ids_train,...]),axis=0) train_labels=np.concatenate((positive_labels[all_train_ids,the_affordances],negative_labels_train),axis=0) #train_ids=np.arange(train_data.shape[0]) #np.random.shuffle(train_ids) test_data=np.concatenate((positive_data[all_test_ids.squeeze(),...],negative_data[negative_ids_test,...]),axis=0) test_labels=np.concatenate((positive_labels[all_test_ids,the_affordances],negative_labels_test),axis=0) name='mini3_AffordancesDataset_train_'+''.join(actual_initials)+'_'+str(train_size)+'.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,train_data,train_labels,'float32','uint8') name='mini3_AffordancesDataset_test_'+''.join(actual_initials)+'_'+str(train_size)+'.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,test_data,test_labels,'float32','uint8') return train_data,train_labels,test_data,test_labels
def getDataset(file): #split dataset into smaller batches/files all_data,all_labels=load_h5(file) #shuffle them to add 'randomness' all_ids=np.arange(all_data.shape[0]) np.random.shuffle(all_ids) all_data=all_data[all_ids,...] all_labels=all_labels[all_ids] print(all_data.shape) print(all_labels.shape) n_splits=all_labels.shape[0]/(496*4) print(n_splits) for i in range(n_splits): name='AffordancesDataset_file'+str(i)+'.h5' start_id=i*(496*4) end_id=(i+1)*(496*4) toSaveData=all_data[start_id:end_id,...] toSaveLabels=all_labels[start_id:end_id] print('%s %d %d'%(name,start_id,end_id)) if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,toSaveData,toSaveLabels,'float32','uint8')
def createMiniDatasets(train_size,test_size,positives_file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',info_file='AffordancesDataset_augmented_names.txt',target_affordance='Filling'): # This function creates binary datasets for every affordance in the csv file # train_size and test_size are per class positive_data,_=load_h5(positives_file) print(positive_data.shape) negative_data,negative_labels=load_h5(negatives_file) if train_size>negative_data.shape[0] or test_size>negative_data.shape[0]: print('Number of examples exceeded') sys.exit() info=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':') real_ids=np.array([int(x) for x in info[:,0]]) bar = Bar('Processing', max=real_ids.shape[0]) # if need all binary datasets, make target_affordance an empty string #target_affordance='' count=1 if target_affordance: print('Getting data for %s'%(target_affordance)) else: print('Getting all data ') data_train=np.array([],dtype=np.float32).reshape(0,n_points,3) data_test=np.array([],dtype=np.float32).reshape(0,n_points,3) labels_train=np.array([],dtype=np.uint8).reshape(0,1) labels_test=np.array([],dtype=np.uint8).reshape(0,1) for j in range(real_ids.shape[0]): current_aff=info[j,1] if target_affordance: if target_affordance not in current_aff: continue # this file is supposed to have 128 examples per affordance x 8 orientations start_i=j*(128*8) end_i=(j+1)*(128*8) thisAffordance_data=positive_data[start_i:end_i,...] train_ids=np.random.randint(thisAffordance_data.shape[0],size=train_size) test_ids=np.setdiff1d(np.arange(thisAffordance_data.shape[0]),train_ids) test_ids=test_ids[:test_size] #save training data sample_negative=np.arange(negative_data.shape[0]) np.random.shuffle(sample_negative) data=np.concatenate((thisAffordance_data[train_ids,...],negative_data[sample_negative[:train_size],...]),axis=0) labels=np.concatenate((np.ones((train_size,1)),np.zeros((train_size,1))),axis=0) if target_affordance: #concat tmp data with training data data_train=np.concatenate((data,data_train),axis=0) labels_train=np.concatenate((count*labels,labels_train),axis=0) else: data_train=data labels_train=labels #shuffle the data shuffle_ids=np.arange(labels_train.shape[0]) np.random.shuffle(shuffle_ids) data_train=data_train[shuffle_ids,...] labels_train=labels_train[shuffle_ids] if not target_affordance: name='binary_AffordancesDataset_train'+str(j)+'_'+str(train_size)+'.h5' if os.path.exists(name): os.system('rm %s'%(name)) save_h5(name,data_train,labels_train,'float32','uint8') # save test data data=np.concatenate((thisAffordance_data[test_ids,...],negative_data[sample_negative[train_size:train_size+test_size],...]),axis=0) #print(thisAffordance_data[test_ids,...].shape[0]) labels=np.concatenate((np.ones((test_size,1)),np.zeros((test_size,1))),axis=0) if target_affordance: data_test=np.concatenate((data,data_test),axis=0) labels_test=np.concatenate((count*labels,labels_test),axis=0) #count+=1 else: data_test=data labels_test=labels shuffle_ids=np.arange(labels_test.shape[0]) np.random.shuffle(shuffle_ids) data_test=data_test[shuffle_ids,...] labels_test=labels_test[shuffle_ids] if not target_affordance: name='binary_AffordancesDataset_test'+str(j)+'_'+str(train_size)+'.h5' if os.path.exists(name): os.system('rm %s'%(name)) save_h5(name,data_test,labels_test,'float32','uint8') bar.next() bar.finish() if target_affordance: print('Saving test data for %s '%(target_affordance)) # before saving, remove unbalance in negatives # since there will be X (affordances) times more negatives '''ids_to_remove=np.nonzero(labels_test==0)[0] ids_to_remove=ids_to_remove[test_size:] ids_to_keep=np.setdiff1d(np.arange(labels_test.shape[0]),ids_to_remove) data_test=data_test[ids_to_keep,...] labels_test=labels_test[ids_to_keep]''' #Same for positives print(data_test.shape) print(labels_test.shape) name='miniAffordancesDataset_test_'+target_affordance+'_'+str(train_size)+'.h5' if os.path.exists(name): os.system('rm %s'%(name)) save_h5(name,data_test,labels_test,'float32','uint8') name='miniAffordancesDataset_train_'+target_affordance+'_'+str(train_size)+'.h5' print('Saving train data for %s '%(target_affordance)) '''ids_to_remove=np.nonzero(labels_train==0)[0] ids_to_remove=ids_to_remove[train_size:] ids_to_keep=np.setdiff1d(np.arange(labels_train.shape[0]),ids_to_remove) data_train=data_train[ids_to_keep,...] labels_train=labels_train[ids_to_keep]''' print(data_train.shape) print(labels_train.shape) if os.path.exists(name): os.system('rm %s'%(name)) save_h5(name,data_train,labels_train,'float32','uint8')
def getMiniDataset(class_ids,train_size,test_size,file='AffordancesDataset_augmented.h5',negatives_file='AffordancesDataset_negatives.h5',return_data=False,info_file='AffordancesDataset_augmented_names.txt'): #if return data is true then no data is saved # and data/labels are returned to caller names=np.genfromtxt(info_file,dtype='str',skip_header=0,delimiter=':') #print(names) real_ids=np.array([int(x) for x in names[:,0]]) #print(real_ids) all_data,all_labels=load_h5(file) #print(np.unique(all_labels)) if (test_size+train_size)>all_labels.shape[0]: print('Max data size is '%all_labels.shape[0]) sys.exit() if test_size<0: test_size=all_labels.shape[0]-train_size #print(all_data.shape) train_ids=np.zeros((class_ids.shape[0]*train_size,1),dtype=np.int32) test_ids=np.zeros((class_ids.shape[0]*test_size,1),dtype=np.int32) #some_ids_new=np.zeros((class_ids.shape[0],1),dtype=np.uint8) new_labels_train=np.zeros((class_ids.shape[0]*train_size,1),dtype=np.uint8) new_labels_test=np.zeros((class_ids.shape[0]*test_size,1),dtype=np.uint8) aff_initial=[] for i in range(class_ids.shape[0]): ids=np.nonzero(all_labels==class_ids[i])[0] #print(all_labels[ids]) #take 32 from each class to test test=np.arange(ids.shape[0],dtype=np.int32) np.random.shuffle(test) start_id=i*train_size end_id=(i+1)*train_size train_ids[start_id:end_id,0]=ids[test[:train_size]] new_labels_train[start_id:end_id,0]=i+1 start_id=i*test_size end_id=(i+1)*test_size test_ids[start_id:end_id,0]=ids[test[train_size:train_size+test_size]] new_labels_test[start_id:end_id,0]=i+1 aff_initial.append(names[class_ids[i],1][0]) print(aff_initial) #print(ids_train) #train_ids=np.asarray(ids_train,dtype=np.uint8).reshape(-1,1) train_ids=np.squeeze(train_ids) test_ids=np.squeeze(test_ids) #print(train_ids.T) #print(test_ids.T) #sys.exit() #test_ids=np.squeeze(np.asarray(ids_test,dtype=np.uint8).reshape(-1,1)) print('Training set %d'%train_ids.shape[0]) print('Testing set %d'%test_ids.shape[0]) new_data_train=all_data[train_ids,...] new_data_test=all_data[test_ids,...] #concatenate here the negatives negative_data,negative_labels=load_h5(negatives_file) new_data_train=np.concatenate((new_data_train,negative_data[:train_size]),axis=0) new_labels_train=np.concatenate((new_labels_train,np.zeros((train_size,1))),axis=0) train_shuffle=np.arange(new_data_train.shape[0]) np.random.shuffle(train_shuffle) new_data_train=new_data_train[train_shuffle,...] new_labels_train=new_labels_train[train_shuffle] name='mini_AffordancesDataset_train_'+''.join(aff_initial)+'_'+str(train_size)+'.h5' if not return_data: if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,new_data_train,new_labels_train,'float32','uint8') new_data_test=np.concatenate((new_data_test,negative_data[train_size:train_size+test_size]),axis=0) new_labels_test=np.concatenate((new_labels_test,np.zeros((test_size,1))),axis=0) train_shuffle=np.arange(new_data_test.shape[0]) np.random.shuffle(train_shuffle) new_data_test=new_data_test[train_shuffle,...] new_labels_test=new_labels_test[train_shuffle] print('Training data ') print(new_data_train.shape) print(new_labels_train) print('Test data ') print(new_data_test.shape) print(new_labels_test.shape) name='mini_AffordancesDataset_test_'+''.join(aff_initial)+'_'+str(train_size)+'.h5' if not return_data: if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,new_data_test,new_labels_test,'float32','uint8') # save the original class ids to keep track of the affordances involved in this dataset name='mini_AffordancesDataset_names_'+''.join(aff_initial)+'_'+str(train_size)+'.txt' with open(name, "w") as text_file: for i in range(class_ids.shape[0]): print('%d:%s' % (i+1,names[class_ids[i],1])) text_file.write("%d:%s\n" % (i+1,names[class_ids[i],1])) else: for i in range(class_ids.shape[0]): print('%d:%s' % (i+1,names[class_ids[i],1])) '''fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.hold(False) for i in range(new_labels_test.shape[0]): ax.scatter(new_data_test[i,:,0],new_data_test[i,:,1],new_data_test[i,:,2],s=10) #print(names[class_ids[new_labels_test[i,0]],1]) ax.set_title(names[class_ids[new_labels_test[i,0]],1]+' '+str(new_labels_test[i,0])) plt.pause(5) plt.draw()''' if return_data: return new_data_train,new_labels_train,new_data_test,new_labels_test else: return 0,0,0,0
def getMultiAffordanceData(file): path=os.path.abspath(file) pos=path.rfind('/') tokens=path[pos+1:].split('_') descriptor_id=tokens[6] scene_name=tokens[2] scene_name=path[:pos]+'/'+scene_name+'_d.pcd' file_descriptor=path[:pos]+'/tmp'+descriptor_id+'.csv' labels=np.genfromtxt(file_descriptor,dtype='str',skip_header=1,delimiter=',') print('Affordances in descriptor %d'%labels.shape[0]) fileId=tokens[-1] tokens=fileId.split('.') fileId=tokens[0] print(fileId) res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd' res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd' data=load_pcd_data(res_data_file,cols=None) #print(data.shape) points=load_pcd_data(res_points_file,cols=(0,1,2)) real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32) #real_c_data=np.array(colors[:,-1],dtype=np.int32) red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1) green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1) blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1) real_c_data=np.concatenate((red,green,blue),axis=1) perPoint=np.sum(real_c_data,axis=1) bounds=np.cumsum(perPoint) howMany=np.zeros((labels.shape[0],1),dtype=np.int32) for i in range(labels.shape[0]): success=np.nonzero(data[:,0]==i)[0] howMany[i]=success.size ids_target=np.nonzero(howMany>n_samples)[0] print('Real found: %d'%ids_target.size) # re st_i=0 end_i=bounds[0] print('Getting single affordance-instance per point') bar = Bar('Processing', max=bounds.shape[0]) for i in range(bounds.shape[0]-1): if points[i,-1]>0.3: if i>0: st_i=bounds[i] end_i=bounds[i+1] someData=data[st_i:end_i,...] #get unique aff_ids ids=np.unique(someData[:,0]) aff_ids=np.intersect1d(ids,ids_target) if aff_ids.shape[0]==0: continue toKeep=np.zeros((aff_ids.shape[0],7)) for j in range(aff_ids.shape[0]): affData=np.nonzero(someData[:,0]==aff_ids[j])[0] keep=np.argmax(someData[affData,2]) toKeep[j,:3]=points[i,...] toKeep[j,3:6]=someData[affData[keep],:3] toKeep[j,6]=i if i>0: newData=np.concatenate((newData,toKeep),axis=0) else: newData=toKeep bar.next() bar.finish() print('Recompute real targets') for i in range(labels.shape[0]): success=np.nonzero(newData[:,3]==i)[0] howMany[i]=success.size ids_target=np.nonzero(howMany>n_samples)[0] print('Real found: %d'%ids_target.size) ids_target=np.nonzero(howMany>n_samples)[0] print('Real found: %d'%ids_target.size) if n_orientations>1: name='AffordancesDataset_augmented_names.txt' else: name='AffordancesDataset_names.txt' aff_initials=[] with open(name, "w") as text_file: for i in range(ids_target.shape[0]): text_file.write("%d:%s-%s\n" % (i,labels[ids_target[i],0],labels[ids_target[i],2])) #aff_initials.append(labels[ids_target[i],0][0]) #aff_initials=set(aff_initials) #print(aff_initials) #sys.exit() #Test 4 affordances case, where all instances of interaction account for single affordance classe aff_lims=np.array([0,8,17,91,92]) #sample 128 points for every affordance, regardsless of their id sampled_ids=np.zeros((ids_target.size,n_samples)) for i in range(ids_target.shape[0]): interesting_ids=np.nonzero(newData[:,3]==ids_target[i])[0] sorted_ids=np.argsort(newData[interesting_ids,5]) sorted_ids=interesting_ids[sorted_ids[::-1]] sampled_ids[i,...]=newData[sorted_ids[:n_samples],-1] t=np.unique(sampled_ids.reshape(1,-1)) dataPoints=np.zeros((t.size,3),dtype=np.float32) dataPoints_labels=np.zeros((t.size,5),dtype=np.uint8) initials=[] for i in range(t.size): #get all affordances for this point ids=np.nonzero(newData[:,-1]==t[i])[0] labels=np.zeros(ids.shape[0],dtype=np.uint8) for j in range(ids.shape[0]): labels[j]=np.nonzero(aff_lims>newData[ids[j],3])[0][0] labels=np.unique(labels) dataPoints[i]=newData[ids[0],:3] dataPoints_labels[i,labels]=1 #extract voxel if n_orientations>1: name='dataPointsAffordances_augmented.h5' else: name='dataPointsAffordances.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,dataPoints,dataPoints_labels,'float32','uint8') #get dense cloud dense_sceneCloud=pypcd.PointCloud.from_path(scene_name).pc_data pc_array = np.array([[x, y, z] for x,y,z in dense_sceneCloud]) #generate pointclouds that were not detected to test against single example training good_points_file=path[:pos]+'/'+fileId+'_goodPointsIds.pcd' sampled_points_file=path[:pos]+'/'+fileId+'_samplePointsIds.pcd' sampled_ids=np.sort(load_pcd_data(sampled_points_file,cols=(0,),dataType=np.int32)) good_ids=np.sort(load_pcd_data(good_points_file,cols=(0,),dataType=np.int32)) non_affordance=np.setdiff1d(np.arange(sampled_ids.shape[0]),good_ids) sampled_points_file=path[:pos]+'/'+fileId+'_samplePoints.pcd' sampled_points=load_pcd_data(sampled_points_file,cols=(0,1,2)) np.random.shuffle(non_affordance) print('Getting 1024 negative examples ') #shuffle negative examples ids bar = Bar('Processing', max=1024) negative_examples=np.zeros((1024,n_points,3),dtype=np.float32) for i in range(1024): point=pc_array[non_affordance[i],...] voxel=getVoxel(point,max_rad,pc_array) sample=sample_cloud(voxel,n_points) negative_examples[i,...]=sample bar.next() bar.finish() negative_labels=100*np.ones((1024,1),dtype=np.uint8) print('Got %d negative examples'%(negative_examples.shape[0])) print(negative_examples[0,0,:]) name='AffordancesDataset_negatives.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,negative_examples,negative_labels,'float32','uint8') print('Sampling actual voxels from %s of %d points'%(scene_name,pc_array.shape[0])) dataSet_data=np.zeros((dataPoints.shape[0]*n_orientations,n_points,3),dtype=np.float32) dataSet_labels=np.zeros((dataPoints_labels.shape[0]*n_orientations,dataPoints_labels.shape[1]),dtype=np.uint8) print(dataSet_data.shape) count=0 #data_type 0->centered data_type=1 aff_names=np.array(['Non','Filling','Hanging','Placing','Sitting']) #extract voxels and pointclouds for dataset fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.hold(False) for aff in range(dataPoints.shape[0]): t_names=np.nonzero(dataPoints_labels[aff])[0] print('%d/%d Training example for %s'%(aff,dataPoints.shape[0],np.array_str(aff_names[t_names])) ) point=dataPoints[aff,:] #print(point.shape) voxel=getVoxel(point,max_rad,pc_array) if voxel.shape[0]<n_points: sample=aVoxel else: sample=sample_cloud(voxel,n_points) if data_type==0: centered_sample=sample-point else: centered_sample=sample #rotate this voxels n_orientations around Z (up) for j in range(n_orientations): rotated_voxel=rotate_point_cloud_by_angle(np.expand_dims(centered_sample,axis=0),j*2*np.pi/n_orientations).squeeze() dataSet_data[count,...]=rotated_voxel dataSet_labels[count,...]=dataPoints_labels[aff,...] count+=1 if j==0: ax.scatter(rotated_voxel[:,0],rotated_voxel[:,1],rotated_voxel[:,2],s=3) plt.pause(0.2) plt.draw() if n_orientations>1: name='AffordancesDataset_augmented.h5' else: name='AffordancesDataset.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,dataSet_data,dataSet_labels,'float32','uint8') return dataPoints,dataPoints_labels
def createDataSet(file): path=os.path.abspath(file) pos=path.rfind('/') tokens=path[pos+1:].split('_') descriptor_id=tokens[6] scene_name=tokens[2] scene_name=path[:pos]+'/'+scene_name+'_d.pcd' file_descriptor=path[:pos]+'/tmp'+descriptor_id+'.csv' labels=np.genfromtxt(file_descriptor,dtype='str',skip_header=1,delimiter=',') print('Affordances in descriptor %d'%labels.shape[0]) fileId=tokens[-1] tokens=fileId.split('.') fileId=tokens[0] print(fileId) res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd' res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd' data=load_pcd_data(res_data_file,cols=None) #print(data.shape) points=load_pcd_data(res_points_file,cols=(0,1,2)) real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32) #real_c_data=np.array(colors[:,-1],dtype=np.int32) red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1) green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1) blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1) real_c_data=np.concatenate((red,green,blue),axis=1) perPoint=np.sum(real_c_data,axis=1) bounds=np.cumsum(perPoint) #print(bounds) howMany=np.zeros((labels.shape[0],1),dtype=np.int32) all_data=np.zeros((data.shape[0],6)) for i in range(all_data.shape[0]): point_id=np.nonzero(bounds>i)[0][0] all_data[i,:3]=points[point_id,:] all_data[i,3:]=data[i,:3] for i in range(labels.shape[0]): success=np.nonzero(all_data[:,3]==i)[0] success2=np.nonzero(all_data[success,2]>0.3)[0] howMany[i]=success2.size ids_target=np.nonzero(howMany>n_samples)[0] print('Real found: %d'%ids_target.size) if n_orientations>1: name='AffordancesDataset_augmented_names.txt' else: name='AffordancesDataset_names.txt' with open(name, "w") as text_file: for i in range(ids_target.shape[0]): text_file.write("%d:%s-%s\n" % (i,labels[ids_target[i],0],labels[ids_target[i],2])) # #print(labels[ids_target,1:]) all_points=np.zeros((ids_target.size,n_samples,3)) all_points_score=np.zeros((ids_target.size,n_samples)) for i in range(ids_target.shape[0]): #get the 3D point for the response success=np.nonzero((all_data[:,3]==ids_target[i])&(all_data[:,2]>0.3))[0] sorted_ids=np.argsort(all_data[success,5]) print('Sampling for %s %s in %d points(%f,%f)'%(labels[ids_target[i],0],labels[ids_target[i],2],success.size,np.max(all_data[success,5]),np.min(all_data[success,5]))) sorted_ids=sorted_ids[::-1] for j in range(n_samples): all_points[i,j,:]=all_data[success[sorted_ids[j]],:3] all_points_score[i,j]=all_data[success[sorted_ids[j]],5] #print('Min %f max %f'%(all_points_score[i,0],all_points_score[i,-1])) labels_d=np.arange(ids_target.size) print('Sampled points maxZ %f minZ %f'%(np.max(all_points[:,:,2].reshape(1,-1)),np.min(all_points[:,:,2].reshape(1,-1))) ) #sys.exit() if n_orientations>1: name='dataPointsAffordances_augmented.h5' else: name='dataPointsAffordances.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,all_points,labels_d,'float32','uint8') #get dense cloud dense_sceneCloud=pypcd.PointCloud.from_path(scene_name).pc_data pc_array = np.array([[x, y, z] for x,y,z in dense_sceneCloud]) #generate pointclouds that were not detected to test against single example training good_points_file=path[:pos]+'/'+fileId+'_goodPointsIds.pcd' sampled_points_file=path[:pos]+'/'+fileId+'_samplePointsIds.pcd' sampled_ids=np.sort(load_pcd_data(sampled_points_file,cols=(0,),dataType=np.int32)) good_ids=np.sort(load_pcd_data(good_points_file,cols=(0,),dataType=np.int32)) non_affordance=np.setdiff1d(np.arange(sampled_ids.shape[0]),good_ids) sampled_points_file=path[:pos]+'/'+fileId+'_samplePoints.pcd' sampled_points=load_pcd_data(sampled_points_file,cols=(0,1,2)) np.random.shuffle(non_affordance) print('Getting 1024 negative examples ') #shuffle negative examples ids bar = Bar('Processing', max=1024) negative_examples=np.zeros((1024,n_points,3),dtype=np.float32) for i in range(1024): point=pc_array[non_affordance[i],...] voxel=getVoxel(point,max_rad,pc_array) minP=np.min(voxel,0); maxP=np.max(voxel,0); dist=np.linalg.norm(maxP-minP,axis=0)/2 print('RAD %f rad %f estimation %f'%(dist,max_rad,max_rad*np.sqrt(3))) sample=sample_cloud(voxel,n_points) negative_examples[i,...]=sample bar.next() bar.finish() negative_labels=100*np.ones((1024,1),dtype=np.uint8) print('Got %d negative examples'%(negative_examples.shape[0])) print(negative_examples[0,0,:]) name='AffordancesDataset_negatives.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,negative_examples,negative_labels,'float32','uint8') #sys.exit() print('Sampling actual voxels from %s of %d points'%(scene_name,pc_array.shape[0])) dataSet_data=np.zeros((all_points.shape[0]*all_points.shape[1]*n_orientations,n_points,3),dtype=np.float32) dataSet_labels=np.zeros((all_points.shape[0]*all_points.shape[1]*n_orientations,1),dtype=np.uint8) print(dataSet_data.shape) count=0 #data_type 0->centered data_type=1 #extract voxels and pointclouds for dataset fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.hold(False) for aff in range(all_points.shape[0]): print('Training examples for %s %s'%(labels[ids_target[aff],0],labels[ids_target[aff],2])) bar = Bar('Processing', max=all_points.shape[1]) for n_sample in range(all_points.shape[1]): point=all_points[aff,n_sample,:].reshape(3,-1) #print(point.shape) voxel=getVoxel(point,max_rad,pc_array) if voxel.shape[0]<n_points: sample=aVoxel else: sample=sample_cloud(voxel,n_points) if data_type==0: centered_sample=sample-point else: centered_sample=sample #rotate this voxels n_orientations around Z (up) for j in range(n_orientations): rotated_voxel=rotate_point_cloud_by_angle(np.expand_dims(centered_sample,axis=0),j*2*np.pi/n_orientations).squeeze() dataSet_data[count,...]=rotated_voxel dataSet_labels[count]=labels_d[aff] count+=1 if n_sample==0: ax.scatter(rotated_voxel[:,0],rotated_voxel[:,1],rotated_voxel[:,2],s=3) plt.pause(0.2) plt.draw() bar.next() bar.finish() if n_orientations>1: name='AffordancesDataset_augmented.h5' else: name='AffordancesDataset.h5' if os.path.exists(name): os.system('rm %s' % (name)) save_h5(name,dataSet_data,dataSet_labels,'float32','uint8')
def extractSingleLabeledData(data_file): data, label = load_h5(data_file) print(label.shape) train_examples = 512 test_examples = 128 examples = train_examples + test_examples print(examples * label.shape[1], data.shape[1], 3) new_data_train = np.zeros( (train_examples * label.shape[1], data.shape[1], 3), dtype=np.float32) new_labels_train = np.zeros((train_examples * label.shape[1], 1), dtype=np.int32) new_data_test = np.zeros( (test_examples * label.shape[1], data.shape[1], 3), dtype=np.float32) new_labels_test = np.zeros((test_examples * label.shape[1], 1), dtype=np.int32) #for every affordance st = 0 st2 = 0 for i in range(label.shape[1]): #get the pointclouds of this affordance target_indices = np.nonzero(label[:, i])[0] #print('Aff %d %d'%(i,target_indices.size)) to_sample_from = np.arange(target_indices.size) np.random.shuffle(to_sample_from) if to_sample_from.size < (train_examples + test_examples): real_train_examples = int(to_sample_from.size * .8 // 1) #print(real_train_examples) real_test_examples = to_sample_from.size - real_train_examples print('Less data from %d,%d' % (real_train_examples, real_test_examples)) else: real_train_examples = train_examples real_test_examples = test_examples ed = st + real_train_examples ed2 = st2 + real_test_examples real_sample = target_indices[to_sample_from[:real_train_examples]] real_sample_test = target_indices[ to_sample_from[real_train_examples:real_train_examples + real_test_examples]] new_data_train[st:ed, ...] = data[real_sample, ...] new_labels_train[st:ed, ...] = i new_data_test[st2:ed2, ...] = data[real_sample_test, ...] new_labels_test[st2:ed2, ...] = i st = ed st2 = ed2 # get the real data in case some affordances had less examples than the target new_data_train = new_data_train[:ed, ...] new_labels_train = new_labels_train[:ed, ...] new_data_test = new_data_test[:ed2, ...] new_labels_test = new_labels_test[:ed2, ...] #shuffle things ids = np.arange(new_labels_train.shape[0]) np.random.shuffle(ids) new_data_train = new_data_train[ids, ...] new_labels_train = new_labels_train[ids, ...] ids = np.arange(new_labels_test.shape[0]) np.random.shuffle(ids) new_data_test = new_data_test[ids, ...] new_labels_test = new_labels_test[ids, ...] print('New binary train data %d' % new_labels_train.shape[0]) print('New binary test data %d' % new_labels_test.shape[0]) name = 'SinglelabelDataSet_train_' + data_file.split('.')[0].split( '_')[-1] + '.h5' print(name) save_h5(name, new_data_train, new_labels_train, 'float32', 'uint8') name = 'SinglelabelDataSet_test_' + data_file.split('.')[0].split( '_')[-1] + '.h5' print(name) save_h5(name, new_data_test, new_labels_test, 'float32', 'uint8')
def getSingleTraining(file): path = os.path.abspath(file) pos = path.rfind('/') tokens = path[pos + 1:].split('_') descriptor_id = tokens[6] scene_name = tokens[2] scene_name = path[:pos] + '/' + scene_name + '_d.pcd' file_descriptor = path[:pos] + '/tmp' + descriptor_id + '.csv' labels = np.genfromtxt(file_descriptor, dtype='str', skip_header=1, delimiter=',') print('Affordances in descriptor %d' % labels.shape[0]) fileId = tokens[-1] tokens = fileId.split('.') fileId = tokens[0] # print(fileId) # # Need only those affordances that have # # over 128 good predictions in this result file # res_data_file=path[:pos]+'/'+fileId+'_goodPointsX.pcd' # res_points_file=path[:pos]+'/'+fileId+'_goodPoints.pcd' # data=load_pcd_data(res_data_file,cols=None) # #print(data.shape) # points,real_c_data=load_pcd_data_binary(res_points_file) # #real_c_data=load_pcd_data(res_points_file,cols=(3,),dataType=np.uint32) # #real_c_data=np.array(colors[:,-1],dtype=np.int32) # red=np.array((real_c_data>>16)& 0x0000ff,dtype=np.uint8).reshape(-1,1) # green=np.array((real_c_data>>8)& 0x0000ff,dtype=np.uint8).reshape(-1,1) # blue=np.array((real_c_data)& 0x0000ff,dtype=np.uint8).reshape(-1,1) # real_c_data=np.concatenate((red,green,blue),axis=1) # perPoint=np.sum(real_c_data,axis=1) # bounds=np.cumsum(perPoint) # #print(bounds) # howMany=np.zeros((labels.shape[0],1),dtype=np.int32) # all_data=np.zeros((data.shape[0],6)) # for i in range(all_data.shape[0]): # point_id=np.nonzero(bounds>i)[0][0] # all_data[i,:3]=points[point_id,:] # all_data[i,3:]=data[i,:3] # for i in range(labels.shape[0]): # success=np.nonzero(all_data[:,3]==i)[0] # #success2=np.nonzero(all_data[success,2]>0.2)[0] # howMany[i]=success.size # ids_target=np.nonzero(howMany>n_samples)[0] # print('Real found: %d'%ids_target.size) # print(ids_target) #sys.exit() new_c = np.genfromtxt('filtered_counts2.csv', delimiter=',', dtype='int') with open('file_lists2.csv', 'r') as f: reader = csv.reader(f) new_n = list(reader) samples = 32 points = 4096 ids_target = np.nonzero(new_c >= samples)[0] print('Actually using %d affordances' % (ids_target.size)) fig = plt.figure() plt.ion() ax = fig.add_subplot(121, projection='3d') ax2 = fig.add_subplot(122, projection='3d') unique_scenes = dict() k = 10 #ax.hold(False) if k > 1: bar = Bar('Creating original single example training dataset', max=ids_target.shape[0]) for i in range(ids_target.shape[0]): interaction = ids_target[i] path_to_data = os.path.abspath('../data') name = path_to_data + '/affordances/binaryOc_AffordancesDataset_train' + str( interaction) + '_' + str(TRAIN_EXAMPLES) + '.h5' if os.path.exists(name): continue #find training data aff_dir = labels[interaction, 0] query_object = labels[interaction, 2] data_file = path[:pos] + "/" + aff_dir + "/ibs_full_" + labels[ interaction, 1] + "_" + query_object + ".txt" with open(data_file) as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line content = [x.strip() for x in content] scene_file = content[0].split(":")[1] tmp = content[8].split(":")[1] datapoint = tmp.split(',') test_point = np.expand_dims(np.asarray( [float(x) for x in datapoint]), axis=0) data_file = path[:pos] + "/" + aff_dir + "/" + scene_file if '.pcd' in scene_file or '.ply' in scene_file: if os.path.exists(data_file): data_file = data_file else: try_data_file = data_file + '.ply' if os.path.exists(try_data_file): #print(try_data_file) data_file = try_data_file #maybe pcd extension missing else: try_data_file = data_file + '.pcd' if os.path.exists(try_data_file): data_file = try_data_file # if scene_file not in unique_scenes: # unique_scenes[scene_file]=interaction # else: # continue if '.pcd' in data_file: cloud_training = load_pcd_data(data_file) else: cloud_training = load_ply_data(data_file) data = np.zeros((2, n_points, 3), dtype=np.float32) data_labels = np.zeros((2, 1), dtype=np.int32) boundingBoxDiag = np.linalg.norm( np.min(cloud_training, 0) - np.max(cloud_training, 0)) #print('%s Diagonal %f Points %d'%(scene_file,boundingBoxDiag,cloud_training.shape[0])) #sample a voxel with rad from test-point kdt = BallTree(cloud_training, leaf_size=5, metric='euclidean') voxel_ids = getVoxel(test_point, max_rad, kdt) voxel = cloud_training[voxel_ids, :] sample = sample_cloud(voxel, n_points) sample_cloud_training = sample_cloud(cloud_training, n_points * 2) #genereate a negative example with noise around test_point low = test_point[0, 0] - max_rad high = test_point[0, 0] + max_rad tmp1 = (high - low) * np.random.random_sample( (n_points, 1)) + (low) low = test_point[0, 1] - max_rad high = test_point[0, 1] + max_rad tmp2 = (high - low) * np.random.random_sample( (n_points, 1)) + (low) low = test_point[0, 2] - max_rad high = test_point[0, 2] + max_rad tmp3 = (high - low) * np.random.random_sample( (n_points, 1)) + (low) negative_cloud_training = np.concatenate((tmp1, tmp2, tmp3), axis=1) data[0, ...] = sample - test_point data_labels[0, ...] = np.zeros((1, 1), dtype=np.int32) data[1, ...] = negative_cloud_training - test_point data_labels[1, ...] = np.ones((1, 1), dtype=np.int32) #name=path_to_data+'/affordances/binaryOc_AffordancesDataset_train'+str(interaction)+'_'+str(TRAIN_EXAMPLES)+'.h5' #print(name) save_h5(name, data, data_labels, 'float32', 'uint8') ax.scatter(sample_cloud_training[:, 0], sample_cloud_training[:, 1], sample_cloud_training[:, 2], s=1, c='b') ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2], s=3, c='b') ax2.scatter(negative_cloud_training[:, 0], negative_cloud_training[:, 1], negative_cloud_training[:, 2], s=3, c='r') plt.pause(1) plt.draw() ax.clear() ax2.clear() bar.next() bar.finish() name = '../data/affordances/names.txt' with open(name, "w") as text_file: for i in range(ids_target.shape[0]): text_file.write( "%d:%s-%s\n" % (i, labels[ids_target[i], 0], labels[ids_target[i], 2]))