def test(): #test_x, test_y, class_all = getdata(args.mode) test_handle = getgen(args.mode) model = keras.models.load_model(os.path.join(args.save_path, 'baseline.h5'), custom_objects={'LEAM': LEAM}) result = model.evaluate_generator(datagen(test_handle, opt), steps=np.ceil(test_handle['x'].shape[0] / args.batch_size)) print("Result on the held-out set: ", result)
train_encoder_batch = batch(train_encoder, maxlen, input_ctable, train_batch_size, reverse) train_decoder_batch = batch(train_decoder, maxlen, target_ctable, train_batch_size) train_target_batch = batch(train_target, maxlen, target_ctable, train_batch_size) val_encoder_batch = batch(val_encoder, maxlen, input_ctable, val_batch_size, reverse) val_decoder_batch = batch(val_decoder, maxlen, target_ctable, val_batch_size) val_target_batch = batch(val_target, maxlen, target_ctable, val_batch_size) train_loader = datagen(train_encoder_batch, train_decoder_batch, train_target_batch) val_loader = datagen(val_encoder_batch, val_decoder_batch, val_target_batch) model.fit_generator(train_loader, steps_per_epoch=train_steps, epochs=1, verbose=1, validation_data=val_loader, validation_steps=val_steps) # On epoch end - decode a batch of misspelled tokens from the # validation set to visualize speller performance. nb_tokens = 5 input_tokens, target_tokens, decoded_tokens = decode_sequences( val_encoder, val_target, input_ctable, target_ctable, maxlen, reverse, encoder_model, decoder_model, nb_tokens,
def open_sesemi(): args = parse_args() network = args.network dataset = args.dataset nb_labels = args.nb_labels os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id arg2var = { 'convnet': convnet, 'wrn': wrn, 'nin': nin, 'svhn': svhn, 'cifar10': cifar10, 'cifar100': cifar100, } # Experiment- and dataset-dependent parameters. zca = True hflip = True epochs = 50 if dataset in {'svhn', 'cifar10'}: if dataset == 'svhn': zca = False hflip = False epochs = 30 nb_classes = 10 elif dataset == 'cifar100': nb_classes = 100 else: raise ValueError('`dataset` must be "svhn", "cifar10", "cifar100".') super_dropout = 0.2 in_network_dropout = 0.0 if network == 'convnet' and dataset == 'svhn': super_dropout = 0.5 in_network_dropout = 0.5 elif network == 'wrn' and dataset == 'svhn': super_dropout = 0.5 # Prepare the dataset. (x_train, y_train), (x_test, y_test) = arg2var[dataset].load_data() x_test = global_contrast_normalize(x_test) x_train = global_contrast_normalize(x_train) if zca: zca_whiten = zca_whitener(x_train) x_train = zca_whiten(x_train) x_test = zca_whiten(x_test) x_test = x_test.reshape((len(x_test), 32, 32, 3)) x_train = x_train.reshape((len(x_train), 32, 32, 3)) if nb_labels in {50000, 73257}: x_labeled = x_train y_labeled = y_train else: labels_per_class = nb_labels // nb_classes sample_inds = stratified_sample(y_train, labels_per_class) x_labeled = x_train[sample_inds] y_labeled = y_train[sample_inds] y_labeled = to_categorical(y_labeled) # Shared training parameters. base_lr = 0.05 batch_size = 16 lr_decay_power = 0.5 input_shape = (32, 32, 3) max_iter = (len(x_train) // batch_size) * epochs # Compile the SESEMI model. sesemi_model, inference_model = compile_sesemi(arg2var[network], input_shape, nb_classes, base_lr, in_network_dropout, super_dropout) print(sesemi_model.summary()) lr_poly_decay = LRScheduler(base_lr, max_iter, lr_decay_power) evaluate = DenseEvaluator(inference_model, (x_test, y_test), hflip, oversample=True) super_datagen = ImageDataGenerator( width_shift_range=[-2, -1, 0, 1, 2], height_shift_range=[-2, -1, 0, 1, 2], horizontal_flip=hflip, preprocessing_function=gaussian_noise, fill_mode='reflect', ) self_datagen = ImageDataGenerator( width_shift_range=[-2, -1, 0, 1, 2], height_shift_range=[-2, -1, 0, 1, 2], horizontal_flip=False, preprocessing_function=gaussian_noise, fill_mode='reflect', ) super_data = super_datagen.flow(x_labeled, y_labeled, shuffle=True, batch_size=1, seed=None) self_data = self_datagen.flow(x_train, shuffle=True, batch_size=1, seed=None) train_data_loader = datagen(super_data, self_data, batch_size) # Fit the SESEMI model on mini-batches with data augmentation. print('Run configuration:') print('network=%s,' % network, 'dataset=%s,' % dataset, \ 'horizontal_flip=%s,' % hflip, 'ZCA=%s,' % zca, \ 'nb_epochs=%d,' % epochs, 'batch_size=%d,' % batch_size, \ 'nb_labels=%d,' % len(y_labeled), 'gpu_id=%s' % args.gpu_id) sesemi_model.fit_generator( train_data_loader, epochs=epochs, verbose=1, steps_per_epoch=len(x_train) // batch_size, callbacks=[lr_poly_decay, evaluate], )
def main(): args = parse_args() network = args.network dataset = args.dataset nb_labels = args.nb_labels os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) arg2var = {'convnet': convnet, 'wrn': wrn, 'resnet50v2':resnet50v2, 'svhn': svhn, 'cifar10': cifar10, 'cifar100': cifar100,} # Dataset-specific parameters hflip = True zca = True epochs = 10 if dataset in ['svhn', 'cifar10']: if dataset == 'svhn': hflip = False zca = False epochs = 30 nb_classes = 10 elif dataset == 'cifar100': nb_classes = 100 else: raise ValueError('`dataset` must be "svhn", "cifar10", "cifar100".') (x_train, y_train), (x_test, y_test) = arg2var[dataset].load_data() x_train = global_contrast_normalize(x_train) x_test = global_contrast_normalize(x_test) if zca: zca_whiten = zca_whitener(x_train) x_train = zca_whiten(x_train) x_test = zca_whiten(x_test) x_train = x_train.reshape((len(x_train), 32, 32, 3)) x_test = x_test.reshape((len(x_test), 32, 32, 3)) labels_per_class = nb_labels // nb_classes if nb_labels == 73257: labels_per_class = 1000000 sample_inds = stratified_sample(y_train, labels_per_class) x_labeled = x_train[sample_inds] y_labeled = y_train[sample_inds] y_labeled = to_categorical(y_labeled) # Training parameters input_shape = (32, 32, 3) batch_size = 32 base_lr = 0.05 lr_decay_power = 0.5 dropout_rate = 0.2 max_iter = (len(x_train) // batch_size) * epochs sesemi_model, inference_model = open_sesemi( arg2var[network], input_shape, nb_classes, base_lr, dropout_rate) print(sesemi_model.summary()) super_datagen = ImageDataGenerator( width_shift_range=3, height_shift_range=3, horizontal_flip=hflip, preprocessing_function=gaussian_noise, fill_mode='reflect', ) self_datagen = ImageDataGenerator( width_shift_range=3, height_shift_range=3, horizontal_flip=False, preprocessing_function=gaussian_noise, fill_mode='reflect', ) super_data = super_datagen.flow( x_labeled, y_labeled, shuffle=True, batch_size=1, seed=None) self_data = self_datagen.flow( x_train, shuffle=True, batch_size=1, seed=None) train_data_loader = datagen(super_data, self_data, batch_size) lr_poly_decay = LRScheduler(base_lr, max_iter, lr_decay_power) evaluate = DenseEvaluator(inference_model, (x_test, y_test), hflip) # Fit the SESEMI model on mini-batches with data augmentation print('Run configuration:') print('network=%s,' % network, 'dataset=%s,' % dataset, \ 'horizontal_flip=%s,' % hflip, 'ZCA=%s,' % zca, \ 'nb_epochs=%d,' % epochs, 'batch_size=%d,' % batch_size, \ 'nb_labels=%d,' % len(x_labeled), 'gpu_id=%d' % args.gpu_id) sesemi_model.fit_generator(train_data_loader, epochs=epochs, verbose=1, steps_per_epoch=len(x_train) // batch_size, callbacks=[lr_poly_decay, evaluate],) return
def train(): # Read word embeddings from VECTOR_DIR with open(args.emb_path, 'rb') as f: word_vector = np.array(pickle.load(f)) # f0 - Where you convert the text sequence into their respective embeddings. sentence_inputs = Input(shape=(args.maxlen, ), dtype='int32') print("sentence_inputs, each of size max_len: ", K.int_shape(sentence_inputs)) sentence_embeddings = Embedding(args.token_size + 1, args.embedding_size, mask_zero=False, weights=[word_vector], trainable=False)(sentence_inputs) print("sentence_embeddings, each of shape (max_len, embedding_size): ", K.int_shape(sentence_embeddings)) # Calculates the attention values \beta and then the sentence encoder - z. #sentence_attn = AttentionLayer()(sentence_embeddings) #sentence_encoder = Model(sentence_inputs,sentence_attn) # Obtain the class embedding C (K X P) = (20 X 300) class_all_inputs = Input((args.class_num, ), dtype='int32') class_all_embeddings = Embedding(args.class_num, args.embedding_size, mask_zero=False)(class_all_inputs) #token_inputs = Input((args.sentence_size, args.maxlen,), dtype='int32') #label_inputs = Input((args.class_num,), dtype='int32') #token_encoder = TimeDistributed(sentence_encoder)(token_inputs) # f1 layer which outputs 'z' (average of the word embeddings weighted by the attentions score). #doc_leam = LEAM()([token_encoder, label_inputs, token_inputs, class_all_embeddings]) doc_leam = LEAM()([sentence_embeddings, class_all_embeddings]) # f2 layer (output) where you get the class probability after taking the sentence embedding - z (doc_leam here) output = Dense(args.class_num, activation='softmax')(doc_leam) #model = Model(input=[token_inputs,label_inputs,class_all_inputs], output=[output]) model = Model(input=[sentence_inputs, class_all_inputs], output=[output]) #plot_model(model, to_file=os.path.join(args.save_path, 'model_plot.png'), show_shapes=True, show_layer_names=True) optimizer = keras.optimizers.Adam(lr=args.lr) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc']) print(model.summary()) #train_x, train_y, test_x, test_y, class_all = getdata(args.mode) train_handle, valid_handle = getgen(args.mode) history = model.fit_generator( datagen(train_handle, opt), epochs=args.epochs, steps_per_epoch=np.ceil(train_handle['x'].shape[0] / args.batch_size), validation_data=datagen(valid_handle, opt), validation_steps=np.ceil(valid_handle['x'].shape[0] / args.batch_size)) #Save the cross_validation results if args.fold != '': args.save_path = os.path.join(args.save_path, 'e_{}_lr{}'.format(args.epochs, args.lr)) args.save_path = os.path.join(args.save_path, 'fold{}'.format(args.fold)) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) with open(os.path.join(args.save_path, 'accuracy.pkl'), 'wb') as f: pickle.dump(history.history['acc'], f) model.save(os.path.join(args.save_path, 'baseline.h5'))
def main(): #load the model batch_size = 32 model = keras.models.load_model('cifar10-1000-1.h5') print(model.summary()) (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = global_contrast_normalize(x_train) x_test = global_contrast_normalize(x_test) print(x_train.shape) x_train = x_train.reshape((len(x_train), 32, 32, 3)) x_test = x_test.reshape((len(x_test), 32, 32, 3)) print(x_test.shape) #print(x_test[1]) nb_classes = 10 nb_labels = 1000 labels_per_class = nb_labels // nb_classes if nb_labels == 73257: labels_per_class = 1000000 sample_inds = stratified_sample(y_test, labels_per_class) x_labeled = x_test[sample_inds] y_labeled = y_test[sample_inds] y_labeled = to_categorical(y_labeled) #print(x_labeled) #print(y_labeled) print(x_labeled.shape) print(y_labeled.shape) super_datagen = ImageDataGenerator( width_shift_range=3, height_shift_range=3, #horizontal_flip=hflip, preprocessing_function=gaussian_noise, fill_mode='reflect', ) self_datagen = ImageDataGenerator( width_shift_range=3, height_shift_range=3, horizontal_flip=False, preprocessing_function=gaussian_noise, fill_mode='reflect', ) super_data = super_datagen.flow( x_labeled,y_labeled,shuffle=True, batch_size=1, seed=None) self_data = self_datagen.flow( x_test, shuffle=True, batch_size=1, seed=None) #super_data = x_labeled #self_data = x_test train_data_loader = datagen(super_data, self_data, batch_size) print('self - data') print(self_data) print('supervised - data') print(super_data) print('train_data_loader') print(train_data_loader) print(len(model.layers)) print(model.layers) layer_name = 'convnet_trunk' layer_outputs = [model.get_layer(layer_name).get_output_at(2)] # extract the ouputs of the top 6 layers activation_model = Model(inputs=model.input,outputs=layer_outputs) steps = len(x_test)/batch_size activations = activation_model.predict_generator(train_data_loader,steps=steps,verbose=0) print(activations) print(activations.shape) k=6666 first_layer_activation = activations[k] print(first_layer_activation.shape) print(first_layer_activation) #plt.imshow(first_layer_activation) #plt.show() #imsave('first_layer_activation.jpg',first_layer_activation) #plt.figure(1) plt.matshow(first_layer_activation[:,:,0]) plt.show() #imsave('first_layer_activation'+str(k)+'[:,:,0].jpg',first_layer_activation[:,:,0]) layer_name_2 = 'self_clf' layer_outputs_2 = [model.get_layer(layer_name_2).get_output_at(0)] representation_model = Model(inputs=model.input,outputs=layer_outputs_2) final_representation = np.array(representation_model.predict_generator(train_data_loader,steps = steps,verbose = 0)) print('The shape of final_representation') print(final_representation.shape) #print(final_representation) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() representation = scaler.fit_transform(final_representation) print(representation.shape) plot_representation = representation[:,0:2] #取其中的前两维 print(plot_representation.shape) plot_Y = plot_representation plt.scatter(plot_Y[:, 0], plot_Y[:, 1], c = "green", marker='o', label='two') #plt.show() plt.savefig('The_scatter_of_first_2_columns.jpg',dpi = None) '''kmeans = KMeans(n_clusters=10, init='k-means++') kmeans.fit(plot_Y) print(kmeans.inertia_) centroids = kmeans.cluster_centers_ print(centroids) print(centroids.shape)''' #plt.scatter(centroids[:, 0], centroids[:, 1], #marker='x', s=169, linewidths=3, #color='w', zorder=10) #plt.savefig('centers-cifar10-sesemi-features-1.jpg') '''tsne = manifold.TSNE(n_components=2, init='pca', random_state=501) X_tsne = tsne.fit_transform(X) print("Org data dimension is {}.Embedded data dimension is {}".format(X.shape[-1], X_tsne.shape[-1])) x_min, x_max = X_tsne.min(0), X_tsne.max(0) X_norm = (X_tsne - x_min) / (x_max - x_min) # 归一化 plt.figure(figsize=(8, 8)) for i in range(X_norm.shape[0]): plt.text(X_norm[i, 0], X_norm[i, 1], str(y[i]), color=plt.cm.Set1(y[i]), fontdict={'weight': 'bold', 'size': 9}) plt.xticks([]) plt.yticks([]) plt.show() plt.savefig('t-SNE-cifar10.jpg')''' '''superd = next(super_data) selfd = next(self_data) print('super - d') print(superd) print(superd.shape()) print('self - d') print(selfd) print('Try to extract the representation of the sesemi model') fig = plt.figure(figsize=(14,10)) for n in range(1,29): fig.add_subplot(4, 7, n) img_tensor = [self_data[n],super_data[n]] #img_tensor = np.expand_dims(img_tensor, axis=0) #img_tensor /= 255. print('image tensor to be shown') print(img_tensor) print(len(img_tensor)) #plt.imshow(self_data) #plt.show() #print(img_tensor2.shape) #img = expand_dims(img, axis=0)i #img = preprocess_input(img) img_tensor = list(itertools.chain.from_iterable(img_tensor)) print(img_tensor.shape()) img_tensor.flatten() print(img_tensor) feature_maps = model.predict(img_tensor) print(feature_maps) draw_features(feature_maps) plt.axis('off') plt.show() return print('Try to visualize the representation!')''' return