def __init__(self, app, app_args): super(Config, self).__init__(app, app_args) self._current = { 'config': abspath(op_join(environ['HOME'], '.dotrc')), 'local': abspath(op_join(environ['HOME'], '.dot')), 'remote': '', }
def kmeans_centers_plot(clus_center_dir): kmeans_clus = AIF.pickle_load(op_join(clus_center_dir, 'kmeans.pickle')) ccents = AIF.pickle_load(op_join(clus_center_dir, 'ccents.pickle')) # export slices for visual inspection if False: for i in ccents: auto.dsp_cub(ccents[i]) else: clus_center_figure_dir = op_join(clus_center_dir, 'fig') if os.path.isdir(clus_center_figure_dir): shutil.rmtree(clus_center_figure_dir) os.makedirs(clus_center_figure_dir) # normalize across all images min_t = N.min([ccents[_].min() for _ in ccents]) max_t = N.max([ccents[_].max() for _ in ccents]) assert max_t > min_t ccents_t = {_: ((ccents[_] - min_t) / (max_t - min_t)) for _ in ccents} ccents_t = ccents for i in ccents_t: clus_siz = len(kmeans_clus[i]) t = AIVU.cub_img(ccents_t[i])['im'] AIIO.save_png(t, op_join(clus_center_figure_dir, '%003d--%d.png' % (i, clus_siz)), normalize=False)
def decode_all_images(d, data_dir): import keras.models as KM from os.path import join as op_join autoencoder = KM.load_model(op_join(data_dir, 'model', 'model-autoencoder.h5')) vs_p = decode_images(autoencoder, d['vs']) out_dir = op_join(data_dir, 'decoded') if not os.path.isdir(out_dir): os.makedirs(out_dir) with open(op_join(out_dir, 'decoded.pickle'), 'wb') as f: pickle.dump(vs_p, f, protocol=-1)
def load_all(): ''' 加载所有计算文件,返回计算文件字典,并检查是否有重复名称的文件,如有则报错 Return ------ out: dict 数据字典,格式为{name: {'data_description': dd, 'rel_path': rel_path}} ''' root_path = CONFIG['data_description_file_path'] queue = deque() queue.append((root_path, '')) # (abs_path, rel_path) out = {} while len(queue) > 0: abs_path, rel_path = queue.pop() if op_isfile(abs_path) and abs_path.endswith('.py'): # 必须是Python可执行文件 obj = file2object(abs_path) if obj.name in out: raise IndexError( 'Duplicate data name!(duplication={n}, relative_path={rp})' .format(n=obj.name, rp=rel_path[1:])) out[obj.name] = {'data_description': obj, 'rel_path': rel_path[1:]} else: if abs_path.endswith('__pycache__') or op_isfile( abs_path): # 忽略Python缓存文件夹以及非Python脚本文件 continue for f in listdir(abs_path): if '.py' in f: # Python可执行文件 frp = f[:-3] # 剔除文件后缀 else: frp = f queue.append((op_join(abs_path, f), REL_PATH_SEP.join([rel_path, frp]))) return out
def run_auto_classifier(d, option, out_dir): num_of_class = 2 all_data, all_labels, all_masks = preprocess(d, num_of_class) x_train, x_test, data_validation = all_data # training, testing and validation data labels, test_labels, labels_validation = all_labels # training, testing and validation labels masks, masks_auto = all_masks # masks model_dir = op_join(out_dir, 'model') if not os.path.isdir(model_dir): os.makedirs(model_dir) model_autoclassifier_checkpoint_file = op_join(model_dir, 'model-autoclassifier--weights--best.h5') if option == 'train': model = auto_classifier_model(img_shape=x_train[0].shape, num_of_class=num_of_class) adam = Adam(lr=0.0003, beta_1=0.9, decay = 0.001/500) # choose a proper lr to control convergance speed, and val_loss masks_auto_training = masks_auto losses = {'sequential_1':"mean_squared_error", 'dense_4': "categorical_crossentropy"} # sequential_1: autoencoder output. dense_4: classifier output lossWeights = {'sequential_1':1.0, 'dense_4':1.0} model.compile(optimizer=adam, loss=losses, loss_weights=lossWeights, metrics={'sequential_1':"mean_squared_error",'dense_4':"accuracy"}) if os.path.isfile(model_autoclassifier_checkpoint_file): print 'loading previous best weights', model_autoclassifier_checkpoint_file model.load_weights(model_autoclassifier_checkpoint_file) earlyStopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0, mode='auto') checkpoint = ModelCheckpoint(model_autoclassifier_checkpoint_file, monitor='val_dense_4_acc', verbose=1, save_best_only=True, mode='auto') model.fit(x_train, [x_train, labels], epochs=1, batch_size=16, shuffle=True, sample_weight ={'dense_4': masks, 'sequential_1':masks_auto_training},\ validation_data = (data_validation,[data_validation, labels_validation] ) ,callbacks=[checkpoint, earlyStopping]) else: model = auto_classifier_model(img_shape=x_train[0].shape, num_of_class=num_of_class) model.load_weights(model_autoclassifier_checkpoint_file) x_rec, classification_testing = model.predict([x_test]) test_prediction = np.argmax(classification_testing,axis=1) test_real_class = np.argmax(test_labels, axis = 1) true = 0. all_sample = float( len(test_prediction)) for i in range(len(test_prediction)): if test_prediction[i]==test_real_class[i]: true += 1. testing_accuracy = true / all_sample print "Classification Accuracy: %f"%testing_accuracy mse_error = mean_squared_error(x_rec.flatten(), x_test.flatten()) print "Reconstruction Error: %f"%mse_error
def main(args=None): parser = ArgumentParser(usage="Usage: %(prog)s [options]", description="create directories to be created on DPM") parser.add_argument("-i","--input",dest="input",type=str,default=".", help="this is the root folder") parser.add_argument("-d","--dpmdir",dest="rootdir",type=str,default="/dpm/unige.ch/home/dampe",help="home of dpm") parser.add_argument("-x","--xroot",dest="xroot",type=str,default="root://grid05.unige.ch:1094/",help="xrootd server") parser.add_argument("--xrootd-only",dest="xrootd_only",action='store_true',default=False, help='if used, show xrootd links instead of DPM') opts = parser.parse_args(args) print "finding files in %s"%opts.input dirs = [] # walk through folders for subdir, dirs, files in walk(opts.input): for _file in files: out=dirname(op_join(subdir, _file)) if not out in dirs: dirs.append(out) # print output for d in dirs: print d
v_l[max_l != l] = 0 vm[:,:,:,l] = v_l vs_m[k] = vm return vs_m if __name__ == "__main__": sel_clus = {1: [3, 21, 28, 34, 38, 39, 43, 62, 63, 81, 86, 88], 2: [15, 25, 29, 33, 35, 66, 79, 90, 92, 98]} # an example of selected clusters for segmentation # sel_clus is the selected clusters for segmentation, which can be multiple classes. import os from os.path import join as op_join data_dir = os.getcwd() data_file = op_join(data_dir, 'data.pickle')#here's the name of pickle data file of CECT small subvolumes with open(data_file, 'rb') as f: d = pickle.load(f, encoding='iso-8859-1') decode_all_images(d, data_dir) # The following files come from the previous Autoencoder3D results with open(op_join(data_dir, 'clus-center', 'kmeans.pickle'), 'rb') as f: km = pickle.load(f, encoding='iso-8859-1') with open(op_join(data_dir, 'clus-center', 'ccents.pickle'), 'rb') as f: cc = pickle.load(f, encoding='iso-8859-1') with open(op_join(data_dir, 'decoded', 'decoded.pickle'), 'rb') as f: vs_dec = pickle.load(f, encoding='iso-8859-1') # km = pickle.load(op_join(data_dir, 'clus-center', 'kmeans.pickle'))
def main(args): ''' Step1:Prepare input dataset You can download the example dataset from https://cmu.app.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp/file/509296892992 into your present working directory. Note: This is not the dataset used in the paper, which will be added in the future. Here's four parameters and dataset format. 1.A python pickle data file of CECT small subvolumes, this data file should be prepared as follows: d is the small subvolume data file. d is a dictionary consists 'v_siz' and 'vs'. d['v_siz'] is an numpy.ndarray specifying the shape of the small subvolume. For example, d['v_siz'] = array([32,32,32]). d['vs'] is a dictionary with keys of uuids specifying each small subvolume. d['vs'][an example uuid] is a dictionary consists 'center', 'id', and 'v'. d['vs'][an example uuid]['center'] is the center of the small subvolume in the tomogram. For example, d['vs'][an example uuid]['center'] = [110,407,200]. d['vs'][an example uuid]['id'] is the specific uuid. d['vs'][an example uuid]['v'] are voxel values of the small subvolume, which is an numpy.ndarray of shape d['v_siz']. 2.A tomogram file in .rec format, which is only required when performing pose normalization. 3.Whether the optional pose normalization step should be applied. Input should be True or False. 4.The number of clusters. This should be an positive integer such as 100. ''' ''' Step2 Train the auto encoder. Given the example dataset, you can use parameters1 or parameters2. ''' import aitom.classify.deep.unsupervised.autoencoder.autoencoder as AE import time s_time = time.time() # Note: these two datasets are missing for now # parameters1 = ["example/subvolumes_example_2.pickle", "None", "False", "4"] # parameters2 = ["example/subvolumes_example_1.pickle", "example/tomogram.rec", "True", "100"] # This example dataset is generated in the particle picking tutorial single_particle_param = [ 'data/demo_single_particle_subvolumes.pickle', 'None', "False", 4 ] out_dir = args.output_dir if args.data: single_particle_param[0] = args.data if args.cluster_number: single_particle_param[3] = args.cluster_number # demo dataset format is different, which has 2 kinds of particle and their templates multiple_particles_params = [ 'data/aitom_demo_subtomograms.pickle', 'None', "False", 4 ] parameters_demo = single_particle_param # choose one of the above import aitom.io.file as AIF d = AIF.pickle_load( parameters_demo[0]) # pickle data file of CECT small subvolumes if parameters_demo == multiple_particles_params: # TODO add multiparticle tutorial # choose one particle and convert the data format # d = d['5T2C_data'] # from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import subtomograms_to_subvolumes # d = subtomograms_to_subvolumes(d) pass img_org_file = parameters_demo[ 1] #A tomogram file in .rec format, which can be None when pose normalization is not required pose = eval( parameters_demo[2] ) #Whether the optional pose normalization step should be applied True or False clus_num = int(parameters_demo[3]) # The number of clusters AE.encoder_simple_conv_test(d=d, pose=pose, img_org_file=img_org_file, out_dir=out_dir, clus_num=clus_num) AE.kmeans_centers_plot(AE.op_join(out_dir, 'clus-center')) ''' Step 3. Manual selection of small subvolume clusters Autoencoder3D training step will have two output folders. 'model' directory saved the trained models 'clus-center' directory for the resulting clusters. There should be two pickle files in 'clus-center'. 'kmeans.pickle' stores the uuids for each cluster. 'ccents.pickle' stores the decoded cluster centers. The 'fig' folder under 'clus-center' directory contains the 2D slices of decoded cluster center. User can use the figures as guide for manual selection. Manual selection clues are provided in the folder 'fig' under 'clus-center'. Each picture is a 2D slices presentation of a decoded small subvolume cluster center. The picture name such as '035--47.png' refers to cluster 35 which consists 47 small subvolumes. ''' ''' Step 4. Optional Encoder-decoder Semantic Segmentation 3D network training. Based on the manual selection results, Encoder-decoder Semantic Segmentation 3D (EDSS3D) network can be trained and applied for another tomogram dataset. ''' import aitom.classify.deep.unsupervised.autoencoder.seg_src as SEG import os from os.path import join as op_join # sel_clus = {1: [3, 21, 28, 34, 38, 39, 43, 62, 63, 81, 86, 88], # 2: [15, 25, 29, 33, 35, 66, 79, 90, 92, 98]} # an example of selected clusters for segmentation # sel_clus is the selected clusters for segmentation, which can be multiple classes. sel_clus = {0: [3, 21, 28, 34, 38, 39, 43, 62, 63, 81, 86, 88]} import numpy as np # sel_clus = {0: np.arange(100)} data_dir = out_dir data_file = op_join( data_dir, parameters_demo[0] ) #here's the name of pickle data file of CECT small subvolumes with open(data_file, 'rb') as f: d = SEG.pickle.load(f, encoding='iso-8859-1') SEG.decode_all_images(d, data_dir) # The following files come from the previous Autoencoder3D results with open(op_join(data_dir, 'clus-center', 'kmeans.pickle'), 'rb') as f: km = SEG.pickle.load(f, encoding='iso-8859-1') with open(op_join(data_dir, 'clus-center', 'ccents.pickle'), 'rb') as f: cc = SEG.pickle.load(f, encoding='iso-8859-1') with open(op_join(data_dir, 'decoded', 'decoded.pickle'), 'rb') as f: vs_dec = SEG.pickle.load(f, encoding='iso-8859-1') print('sel_clus', len(sel_clus)) print('km', len(km)) vs_lbl = SEG.image_label_prepare(sel_clus, km) print('vs_lbl', len(vs_lbl)) # print(vs_lbl) for key in vs_lbl: vs_lbl[key] = 1 print('vs_lbl', len(vs_lbl)) # print(vs_lbl) vs_seg = SEG.train_label_prepare( vs_lbl=vs_lbl, vs_dec=vs_dec, iso_value=0.5) # iso_value is the mask threshold for segmentation print('vs_seg', len(vs_seg)) model_dir = op_join(data_dir, 'model-seg') if not os.path.isdir(model_dir): os.makedirs(model_dir) model_checkpoint_file = op_join(model_dir, 'model-seg--weights--best.h5') model_file = op_join(model_dir, 'model-seg.h5') if os.path.isfile(model_file): print('use existing', model_file) import keras.models as KM model = KM.load_model(model_file) else: model = SEG.train_validate__reshape( vs_lbl=vs_lbl, vs=d['vs'], vs_seg=vs_seg, model_file=model_file, model_checkpoint_file=model_checkpoint_file) model.save(model_file) # Segmentation prediction on new data data_dir = out_dir # This should be the new data for prediction data_file = op_join(data_dir, parameters_demo[0]) with open(data_file, 'rb') as f: d = SEG.pickle.load(f, encoding='iso-8859-1') prediction_dir = op_join(data_dir, 'prediction') if not os.path.isdir(prediction_dir): os.makedirs(prediction_dir) vs_p = SEG.predict__reshape(model, vs={_: d['vs'][_]['v'] for _ in vs_seg}) with open(op_join(prediction_dir, 'vs_p.pickle'), 'wb') as f: SEG.pickle.dump(vs_p, f, protocol=-1) print(time.time() - s_time, 's')
def encoder_simple_conv_test(d, pose, img_org_file, out_dir, clus_num): if pose == True: assert img_org_file != None tom0 = auto.read_mrc_numpy_vol(img_org_file) tom = auto.smooth(tom0, 2.0) x_keys = [_ for _ in d['vs'] if d['vs'][_]['v'] is not None] x_train_no_pose = [N.expand_dims(d['vs'][_]['v'], -1) for _ in x_keys] x_train_no_pose = N.array(x_train_no_pose) x_center = [d['vs'][_]['center'] for _ in x_keys] x_train = [] default_val = tom.mean() x_train_no_pose -= x_train_no_pose.max() x_train_no_pose = N.abs(x_train_no_pose) print ('pose normalizing') for i in range(len(x_train_no_pose)): center = x_center[i] v = x_train_no_pose[i][:,:,:,0] c = auto.center_mass(v) rm = auto.pca(v=v, c=c)['v'] # calculate principal directions mid_co = (N.array(v.shape)-1) / 2.0 loc_r__pn = rm.T.dot(mid_co - c) vr = auto.rotate_retrieve(v, tom = tom, rm=rm, center = center, loc_r=loc_r__pn,default_val = default_val) # pose normalize so that the major axis is along x-axis x_train.append(vr) x_train = N.array(x_train) x_train = N.expand_dims(x_train,axis = 4) print ('pose normalization finished') else: x_keys = [_ for _ in d['vs'] if d['vs'][_]['v'] is not None] x_train = [N.expand_dims(d['vs'][_]['v'], -1) for _ in x_keys] x_train = N.array(x_train) if False: # warning, if you normalize here, you need also to normalize when decoding. so it is better not normalize. Use batch normalization in the network instead if True: x_train -= x_train.mean() x_train /= x_train.std() else: x_train -= x_train.min() x_train /= x_train.max() x_train -= 0.5 x_train *= 2 #print 'x_train.shape', x_train.shape model_dir = op_join(out_dir, 'model') if not os.path.isdir(model_dir): os.makedirs(model_dir) model_autoencoder_checkpoint_file = op_join(model_dir, 'model-autoencoder--weights--best.h5') model_autoencoder_file = op_join(model_dir, 'model-autoencoder.h5') model_encoder_file = op_join(model_dir, 'model-encoder.h5') model_decoder_file = op_join(model_dir, 'model-decoder.h5') if not os.path.isfile(model_autoencoder_file): enc = encoder_simple_conv(img_shape=d['v_siz']) autoencoder = enc['autoencoder'] autoencoder_p = autoencoder from keras.optimizers import SGD, Adam adam = Adam(lr=0.001, beta_1=0.9, decay = 0.001/500) # choose a proper lr to control convergance speed, and val_loss #sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) autoencoder_p.compile(optimizer=adam, loss='mean_squared_error') if os.path.isfile(model_autoencoder_checkpoint_file): print ('loading previous best weights', model_autoencoder_checkpoint_file) autoencoder_p.load_weights(model_autoencoder_checkpoint_file) from keras.callbacks import EarlyStopping, ModelCheckpoint earlyStopping = EarlyStopping(monitor='val_loss', patience=20, verbose=0, mode='auto') checkpoint = ModelCheckpoint(model_autoencoder_checkpoint_file, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') autoencoder_p.fit(x_train, x_train, nb_epoch=100, batch_size=128, shuffle=True, validation_split=0.1, callbacks=[checkpoint, earlyStopping]) # use a large batch size when batch normalization is used autoencoder_p.load_weights(model_autoencoder_checkpoint_file) # we use the best weights for subsequent analysis enc['autoencoder'].save(model_autoencoder_file) enc['encoder'].save(model_encoder_file) enc['decoder'].save(model_decoder_file) else: import keras.models as KM enc = {} enc['autoencoder'] = KM.load_model(model_autoencoder_file) enc['encoder'] = KM.load_model(model_encoder_file) enc['decoder'] = KM.load_model(model_decoder_file) x_enc=enc['encoder'].predict(x_train) #use kmeans to seperate x_enc into a specific number of clusters, then decode cluster centers, and patch the decoded cluster centers back to the image, can use mayavi??? import multiprocessing from sklearn.cluster import KMeans kmeans_n_init = multiprocessing.cpu_count() kmeans = KMeans(n_clusters=clus_num, n_jobs=-1, n_init=100).fit(x_enc) x_km_cent = N.array([_.reshape(x_enc[0].shape) for _ in kmeans.cluster_centers_]) x_km_cent_pred=enc['decoder'].predict(x_km_cent) # save cluster info and cluster centers clus_center_dir = op_join(out_dir, 'clus-center') if not os.path.isdir(clus_center_dir): os.makedirs(clus_center_dir) kmeans_clus = defaultdict(list) for i,l in enumerate(kmeans.labels_): kmeans_clus[l].append(x_keys[i]) auto.pickle_dump(kmeans_clus, op_join(clus_center_dir, 'kmeans.pickle')) ccents = {} for i in range(len(x_km_cent_pred)): ccents[i] = x_km_cent_pred[i].reshape(d['v_siz']) auto.pickle_dump(ccents, op_join(clus_center_dir, 'ccents.pickle')) auto.pickle_dump(x_km_cent, op_join(clus_center_dir, 'ccents_d.pickle'))
max_t = N.max([ccents[_].max() for _ in ccents]) assert max_t > min_t ccents_t = {_:((ccents[_] - min_t) / (max_t - min_t) ) for _ in ccents} ccents_t = ccents for i in ccents_t: clus_siz = len(kmeans_clus[i]) t = auto.cub_img(ccents_t[i])['im'] auto.save_png(t, op_join(clus_center_figure_dir, '%003d--%d.png'%(i,clus_siz)), normalize=False) if __name__ == "__main__": d = auto.pickle_load(sys.argv[1]) img_org_file = sys.argv[2] pose = eval(sys.argv[3]) clus_num = int(sys.argv[4]) encoder_simple_conv_test(d = d, pose = pose , img_org_file = img_org_file, out_dir=os.getcwd(), clus_num = clus_num) kmeans_centers_plot(op_join(os.getcwd(), 'clus-center'))
#!/bin/env python ''' Created on Jun 29, 2016 @author: zimmer ''' from os import listdir from os.path import join as op_join from re import findall from sys import argv input_folder = argv[1] print "working on folder %s" % input_folder for folder in listdir(input_folder): release = None res = findall("\d+", folder) if len(res): release = res[0] fullPath = op_join(input_folder, folder) bad_files = [f for f in listdir(fullPath) if release not in f] if len(bad_files): print 'found %i bad files in %s' % (len(bad_files), fullPath)
FNAMES_MAP = { 'FLAIR.nii.gz': 'flair_flairspace', 'T1.nii.gz': 't1_flairspace', 'lesion_mask_flair.nii.gz': 'target_flairspace', 'brainmask_T1_mask.nii.gz': 'brainmask_flairspace', 'FLAIR_warped.nii.gz': 'flair', 'T1w_base.nii.gz': 't1', 'lesion_mask_t1.nii.gz': 'target', 'T1w_brain_mask.nii.gz': 'brainmask', 'FLAIR_warped_bias_corrected_fs_nucorrect.nii.gz': 'flair_fsn3', 'FLAIR_warped_bias_corrected_ants_n4.nii.gz': 'flair_antsn3', 'T1w_bias_corrected_fs_nucorrect.nii.gz': 't1_fsn3', 'T1w_bias_corrected_ants_n4.nii.gz': 't1_antsn3', 'T1w_bias_corrected_ants_n4_labeled.nii.gz': 't1_wm_mask', } ROOT_DIR = '/home/mikhail/nhw/data/derivatives/datasink/preproc' if __name__ == "__main__": subjects = [ f for f in os.listdir(ROOT_DIR) if os.path.isdir(op_join(ROOT_DIR, f)) ] df = pd.DataFrame(data=None, index=subjects, columns=list(FNAMES_MAP.values()) + ['cite']) for subj in subjects: for fname in list(FNAMES_MAP.keys()): df.loc[subj, FNAMES_MAP[fname]] = op_join(subj, fname) subj_id = subj.split('-')[1] df.loc[subj, 'cite'] = ''.join(i for i in subj_id if not i.isdigit()) df.to_csv(op_join(ROOT_DIR, 'metadata.csv'), index_label='id')
import os from os.path import join as op_join from shutil import copyfile WMH_ORIG = '/home/mikhail/nhw/data/wmh' WMH_PREPROC = '/home/mikhail/nhw/data/derivatives/datasink/preproc' if __name__ == "__main__": cites = os.listdir(WMH_ORIG) fnames_to_copy = ['brainmask_T1_mask.nii.gz', 'FLAIR.nii.gz', 'T1.nii.gz'] for cite in cites: cite_root = op_join(WMH_ORIG, cite) tom = os.listdir(cite_root)[0] tom_root = op_join(cite_root, tom) for subj in os.listdir(tom_root): dest_root = op_join(WMH_PREPROC, 'sub-{}{}'.format(cite, subj)) src_root = op_join(tom_root, subj) copyfile(op_join(src_root, 'wmh.nii.gz'), op_join(dest_root, 'lesion_mask_flair.nii.gz')) for fname in fnames_to_copy: copyfile(op_join(src_root, 'pre', fname), op_join(dest_root, fname))
def favicon(): return send_from_directory(op_join(app.root_path, 'static'), 'faviconSQ.ico', mimetype='image/png')
#!/bin/env python ''' Created on Jun 29, 2016 @author: zimmer ''' from os import listdir from os.path import join as op_join from re import findall from sys import argv input_folder = argv[1] print "working on folder %s"%input_folder for folder in listdir(input_folder): release = None res = findall("\d+",folder) if len(res): release = res[0] fullPath = op_join(input_folder, folder) bad_files = [f for f in listdir(fullPath) if release not in f] if len(bad_files): print 'found %i bad files in %s'%(len(bad_files),fullPath)