def get_land_mask(data_config: DataConfig): land_mask = util.get_mask(data_config.radar_range, RAW_SIZE, SAT_PATH, SAT_RANGE) range_mask = util.create_range_mask(data_config.radar_range * 0.95, SAT_RANGE, RAW_SIZE) land_mask[range_mask] = True return land_mask
def forward(self, keys, values, pair_length, query, id=None): """Forward. Args: keys: Keys (batch_size, num_keys). values: Values (batch_size, num_keys). pair_length: Pair length (batch_size, ). query: Query (batch_size, hidden_size). Returns: (batch_size, value_size) """ num_keys = keys.size(1) keys = self.key_embedding(keys) # (batch_size, num_keys, key_size) values = self.value_embedding(values) # (batch_size, num_keys, value_size) probability = torch.matmul(query.unsqueeze(1), keys.transpose(1, 2)) probability = probability.squeeze(1) # (batch_size, num_keys) mask = get_mask(num_keys, pair_length) # (batch_size, num_keys) probability = masked_softmax(probability, mask, 1) # (batch_size, num_keys) knowledge = torch.matmul(probability.unsqueeze(1), values) knowledge = knowledge.squeeze(1) # (batch_size, value_size) return knowledge
def init_query(self, query_tokens): # (batch_size, max_query_length, query_token_embed_dim) # (batch_size, max_query_length) self.query_token_embed = self.encoder.embedding(query_tokens) self.query_token_embed_mask = get_mask(query_tokens) self.query_embed = self.encoder(self.query_token_embed, mask=self.query_token_embed_mask) # self.query_embed = self.encoder(query_tokens, self.query_token_embed, # mask=self.query_token_embed_mask, dropout=self.dropout, srng=self.srng) return self.query_embed, self.query_token_embed_mask
def __init__(self, fimage=None, location="LaSilla"): # Todo: load ALL PARAMS self.location = location self.params = util.get_params(location) if fimage is None: fimage = "current.JPG" self.fimage = fimage self.retrieve_image() self.im_masked, self.im_original = util.loadallsky(fimage, return_complete=True) self.mask = util.get_mask(self.im_original) self.observability_map = None
def text_loss(to_hidden: ToHidden, text_decoder: TextDecoder, text_length: int, context, target, target_length, hiddens, encode_knowledge_func=None, teacher_forcing_ratio=1.0): """Text loss. Args: to_hidden (ToHidden): Context to hidden. text_decoder (TextDecoder): Text decoder. text_length (int): Text length. context: Context (batch_size, ContextEncoderConfig.output_size). target: Target (batch_size, dialog_text_max_len) target_length: Target length (batch_size, ). encode_knowledge_func (optional): Knowledge encoding function. hiddens: (seq_len, batch, num_directions * hidden_size). Returns: loss: Loss. n_totals: Number of words which produces loss. """ batch_size = context.size(0) loss = 0 n_totals = 0 mask = get_mask(text_length, target_length) mask = mask.transpose(0, 1) # (text_length, batch_size) hidden = to_hidden(context).to(GlobalConfig.device) word = SOS_ID * torch.ones(batch_size, dtype=torch.long) word = word.to(GlobalConfig.device) target = target.transpose(0, 1) use_teacher_forcing = random.random() < teacher_forcing_ratio # (text_length, batch_size) for i in range(text_length): output, hidden = text_decoder(word, hidden, hiddens, encode_knowledge_func) mask_loss, n_total = mask_nll_loss(output, target[i], mask[i]) if use_teacher_forcing: topv, topi = output.topk(1) word = topi.squeeze(1).detach() else: word = target[i] loss += mask_loss n_totals += n_total return loss, n_totals
def main(): input_dir = args.input_dir output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) for f_path in os.listdir(input_dir): f = f_path.split('.') suffix = f[-1].lower() prefix = os.path.split(f[-2])[-1] filename = prefix + '.' + suffix img = cv2.imread(os.path.join(input_dir, f_path)) centers, landmarks = detect(img) if landmarks is None: with open(os.path.join(output_dir, 'fails.log'), 'a') as f: f.write(filename) print(filename, 'failed') continue print(filename) if suffix in ['jpg', 'png']: for ft_list in ft_lists: dirname = '-'.join(ft_list) ft_dir = os.path.join(output_dir, dirname) # img = auto_mask_single_img(f_path, ft_list, disturb_ellipse=0) mask = get_mask(img, ft_list, centers, landmarks, disturb_ellipse=2, randrange=(10, 50)) if not os.path.exists(ft_dir): os.makedirs(ft_dir) imsave(os.path.join(ft_dir, filename), mask)
def objective(trial): model = define_model(trial).to(device) print( f"Trial Id: {trial.number} | Model params: {sum(p.numel() for p in model.parameters() if p.requires_grad)} | Timestamp: {trial.datetime_start}" ) print() lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True) optimiser = optim.Adam(model.parameters(), lr=lr) criterion_name = trial.suggest_categorical("criterion", ["MSELoss", "L1Loss"]) criterion = getattr(nn, criterion_name)() batch_size = trial.suggest_categorical("batch_size", [16, 32, 128]) drop_last = True if (len(valdata.input) > batch_size) else False no_epochs = trial.suggest_int( "no_epochs", 30, 300) # --> Get rid of it , Early stopping ToDo trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=drop_last) valloader = torch.utils.data.DataLoader(valdata, batch_size=batch_size, shuffle=False, drop_last=False) mse, r2 = 0, 0 for epoch in range(no_epochs): model.train() for i, (X1, X2, labels, set_sizes) in enumerate(trainloader): # Extract inputs and associated labels from dataloader batch X1 = X1.to(device) X2 = X2.to(device) labels = labels.to(device) set_sizes = set_sizes.to(device) mask = get_mask(set_sizes, X1.shape[2]) # Predict outputs (forward pass) predictions = model(X1, X2, mask=mask) # Zero-out the gradients before backward pass (pytorch stores the gradients) optimiser.zero_grad() # Compute Loss loss = criterion(predictions, labels) # Backpropagation loss.backward() # Perform one step of gradient descent optimiser.step() model.eval() y_pred = np.array([]) y_gold = np.array([]) with torch.no_grad(): for i, (X1, X2, labels, set_sizes) in enumerate(valloader): # Extract inputs and associated labels from dataloader batch X1 = X1.to(device) X2 = X2.to(device) labels = labels.to(device) set_sizes = set_sizes.to(device) mask = get_mask(set_sizes, X1.shape[2]) # Predict outputs (forward pass) predictions = model(X1, X2, mask=mask) # Predict outputs (forward pass) # Get predictions and append to label array + count number of correct and total y_pred = np.append(y_pred, predictions.cpu().detach().numpy()) y_gold = np.append(y_gold, labels.cpu().detach().numpy()) try: r2 = metrics.r2_score(y_gold, y_pred) mse = metrics.mean_squared_error(y_gold, y_pred) except: print("++++++++++++++++++++") print(" NaN ") print("++++++++++++++++++++") trial.report(-100, epoch) raise optuna.exceptions.TrialPruned() trial.report(r2, epoch) # Handle pruning based on the intermediate value. if trial.should_prune(): raise optuna.exceptions.TrialPruned() torch.save(model, "trained_models/{}.pt".format(trial.number)) return r2
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--units', metavar='units', type=str, help='an unit to visualize e.g. [0, 999]') parser.add_argument('--n_iters', metavar='iter', type=int, default=10, help='Number of sampling steps per each unit') parser.add_argument('--threshold', metavar='w', type=float, default=-1.0, nargs='?', help='The probability threshold to decide whether to keep an image') parser.add_argument('--save_every', metavar='save_iter', type=int, default=1, help='Save a sample every N iterations. 0 to disable saving') parser.add_argument('--reset_every', metavar='reset_iter', type=int, default=0, help='Reset the code every N iterations') parser.add_argument('--lr', metavar='lr', type=float, default=2.0, nargs='?', help='Learning rate') parser.add_argument('--lr_end', metavar='lr', type=float, default=-1.0, nargs='?', help='Ending Learning rate') parser.add_argument('--epsilon1', metavar='lr', type=float, default=1.0, nargs='?', help='Prior') parser.add_argument('--epsilon2', metavar='lr', type=float, default=1.0, nargs='?', help='Condition') parser.add_argument('--epsilon3', metavar='lr', type=float, default=1.0, nargs='?', help='Noise') parser.add_argument('--epsilon4', metavar='lr', type=float, default=0.0, nargs='?', help='Context') parser.add_argument('--seed', metavar='n', type=int, default=0, nargs='?', help='Random seed') parser.add_argument('--xy', metavar='n', type=int, default=0, nargs='?', help='Spatial position for conv units') parser.add_argument('--opt_layer', metavar='s', type=str, help='Layer at which we optimize a code') parser.add_argument('--act_layer', metavar='s', type=str, default="fc8", help='Layer at which we activate a neuron') parser.add_argument('--init_file', metavar='s', type=str, default="None", help='Init image') parser.add_argument('--write_labels', action='store_true', default=False, help='Write class labels to images') parser.add_argument('--output_dir', metavar='b', type=str, default=".", help='Output directory for saving results') parser.add_argument('--net_weights', metavar='b', type=str, default=settings.encoder_weights, help='Weights of the net being visualized') parser.add_argument('--net_definition', metavar='b', type=str, default=settings.encoder_definition, help='Definition of the net being visualized') args = parser.parse_args() # Default to constant learning rate if args.lr_end < 0: args.lr_end = args.lr # initialize MyWriter logFilename = "%s/%s_%s_log.txt" % ( args.output_dir, args.units, str(datetime.datetime.now()).split('.')[0] ) writer = MyWriter(sys.stdout, logFilename) sys.stdout = writer start_time = datetime.datetime.now() # summary print "-------------" print " current time: %s" % str(start_time) print " units: %s xy: %s" % (args.units, args.xy) print " n_iters: %s" % args.n_iters print " reset_every: %s" % args.reset_every print " save_every: %s" % args.save_every print " threshold: %s" % args.threshold print " epsilon1: %s" % args.epsilon1 print " epsilon2: %s" % args.epsilon2 print " epsilon3: %s" % args.epsilon3 print " epsilon4: %s" % args.epsilon4 print " start learning rate: %s" % args.lr print " end learning rate: %s" % args.lr_end print " seed: %s" % args.seed print " opt_layer: %s" % args.opt_layer print " act_layer: %s" % args.act_layer print " init_file: %s" % args.init_file print "-------------" print " output dir: %s" % args.output_dir print " net weights: %s" % args.net_weights print " net definition: %s" % args.net_definition print "-------------" # encoder and generator for images encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights, caffe.TEST) generator = caffe.Net(settings.generator_definition, settings.generator_weights, caffe.TEST) # condition network, here an image classification net net = caffe.Classifier(args.net_definition, args.net_weights, mean = np.float32([104.0, 117.0, 123.0]), # ImageNet mean channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB # Fix the seed np.random.seed(args.seed) # Sampler for class-conditional generation sampler = ClassConditionalSampler() inpainting = None if args.init_file != "None": # Pre-compute masks if we want to perform inpainting if args.epsilon4 > 0: mask, neg = util.get_mask() else: neg = None # Get the code for the masked image start_code, start_image = get_code(encoder=encoder, path=args.init_file, layer=args.opt_layer, mask=neg) # Package settings for in-painting experiments if args.epsilon4 > 0: inpainting = { "mask" : mask, "mask_neg" : neg, "image" : start_image, "epsilon4" : args.epsilon4 } print "Loaded init code: ", start_code.shape else: # shape of the code being optimized shape = generator.blobs[settings.generator_in_layer].data.shape start_code = np.random.normal(0, 1, shape) print ">>", np.min(start_code), np.max(start_code) # Separate the dash-separated list of units into numbers conditions = [ { "unit": int(u), "xy": args.xy } for u in args.units.split("_") ] # Optimize a code via gradient ascent output_image, list_samples = sampler.sampling( condition_net=net, image_encoder=encoder, image_generator=generator, gen_in_layer=settings.generator_in_layer, gen_out_layer=settings.generator_out_layer, start_code=start_code, n_iters=args.n_iters, lr=args.lr, lr_end=args.lr_end, threshold=args.threshold, layer=args.act_layer, conditions=conditions, epsilon1=args.epsilon1, epsilon2=args.epsilon2, epsilon3=args.epsilon3, inpainting=inpainting, output_dir=args.output_dir, reset_every=args.reset_every, save_every=args.save_every) # Output image filename = "%s/%s_%04d_%04d_%s_h_%s_%s_%s_%s__%s.jpg" % ( args.output_dir, args.act_layer, conditions[0]["unit"], args.n_iters, args.lr, str(args.epsilon1), str(args.epsilon2), str(args.epsilon3), str(args.epsilon4), args.seed ) if inpainting != None: output_image = util.stitch(start_image, output_image) # Save the final image util.save_image(output_image, filename) print "%s/%s" % (os.getcwd(), filename) # Write labels to images print "Saving images..." for p in list_samples: img, name, label = p util.save_image(img, name) if args.write_labels: util.write_label_to_img(name, label) end_time = datetime.datetime.now() elapsed_time = end_time - start_time print "current time: %s" % str(end_time) print "elapsed time since start: %s" % str(elapsed_time.split('.')[0])
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--units', metavar='units', type=str, help='an unit to visualize e.g. [0, 999]') parser.add_argument('--n_iters', metavar='iter', type=int, default=10, help='Number of sampling steps per each unit') parser.add_argument( '--threshold', metavar='w', type=float, default=-1.0, nargs='?', help='The probability threshold to decide whether to keep an image') parser.add_argument( '--save_every', metavar='save_iter', type=int, default=1, help='Save a sample every N iterations. 0 to disable saving') parser.add_argument('--reset_every', metavar='reset_iter', type=int, default=0, help='Reset the code every N iterations') parser.add_argument('--lr', metavar='lr', type=float, default=2.0, nargs='?', help='Learning rate') parser.add_argument('--lr_end', metavar='lr', type=float, default=-1.0, nargs='?', help='Ending Learning rate') parser.add_argument('--epsilon1', metavar='lr', type=float, default=1.0, nargs='?', help='Prior') parser.add_argument('--epsilon2', metavar='lr', type=float, default=1.0, nargs='?', help='Condition') parser.add_argument('--epsilon3', metavar='lr', type=float, default=1.0, nargs='?', help='Noise') parser.add_argument('--epsilon4', metavar='lr', type=float, default=0.0, nargs='?', help='Context') parser.add_argument('--seed', metavar='n', type=int, default=0, nargs='?', help='Random seed') parser.add_argument('--xy', metavar='n', type=int, default=0, nargs='?', help='Spatial position for conv units') parser.add_argument('--opt_layer', metavar='s', type=str, help='Layer at which we optimize a code') parser.add_argument('--act_layer', metavar='s', type=str, default="fc8", help='Layer at which we activate a neuron') parser.add_argument('--init_file', metavar='s', type=str, default="None", help='Init image') parser.add_argument('--write_labels', action='store_true', default=False, help='Write class labels to images') parser.add_argument('--output_dir', metavar='b', type=str, default=".", help='Output directory for saving results') parser.add_argument('--net_weights', metavar='b', type=str, default=settings.encoder_weights, help='Weights of the net being visualized') parser.add_argument('--net_definition', metavar='b', type=str, default=settings.encoder_definition, help='Definition of the net being visualized') args = parser.parse_args() # Default to constant learning rate if args.lr_end < 0: args.lr_end = args.lr # summary print "-------------" print " units: %s xy: %s" % (args.units, args.xy) print " n_iters: %s" % args.n_iters print " reset_every: %s" % args.reset_every print " save_every: %s" % args.save_every print " threshold: %s" % args.threshold print " epsilon1: %s" % args.epsilon1 print " epsilon2: %s" % args.epsilon2 print " epsilon3: %s" % args.epsilon3 print " epsilon4: %s" % args.epsilon4 print " start learning rate: %s" % args.lr print " end learning rate: %s" % args.lr_end print " seed: %s" % args.seed print " opt_layer: %s" % args.opt_layer print " act_layer: %s" % args.act_layer print " init_file: %s" % args.init_file print "-------------" print " output dir: %s" % args.output_dir print " net weights: %s" % args.net_weights print " net definition: %s" % args.net_definition print "-------------" # encoder and generator for images encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights, caffe.TEST) generator = caffe.Net(settings.generator_definition, settings.generator_weights, caffe.TEST) # condition network, here an image classification net net = caffe.Classifier( args.net_definition, args.net_weights, mean=np.float32([104.0, 117.0, 123.0]), # ImageNet mean channel_swap=( 2, 1, 0)) # the reference model has channels in BGR order instead of RGB h_net = caffe.Net("./nets/h_classifier/h_classifier.prototxt", "./nets/h_classifier/h_classifier.caffemodel", caffe.TEST) # Fix the seed np.random.seed(args.seed) # Sampler for class-conditional generation sampler = ClassConditionalSampler() inpainting = None if args.init_file != "None": # Pre-compute masks if we want to perform inpainting if args.epsilon4 > 0: mask, neg = util.get_mask() else: neg = None # Get the code for the masked image start_code, start_image = get_code(encoder=encoder, path=args.init_file, layer=args.opt_layer, mask=neg) # Package settings for in-painting experiments if args.epsilon4 > 0: inpainting = { "mask": mask, "mask_neg": neg, "image": start_image, "epsilon4": args.epsilon4 } print "Loaded init code: ", start_code.shape else: # shape of the code being optimized shape = generator.blobs[settings.generator_in_layer].data.shape start_code = np.random.normal(0, 1, shape) print ">>", np.min(start_code), np.max(start_code) # Separate the dash-separated list of units into numbers conditions = [{ "unit": int(u), "xy": args.xy } for u in args.units.split("_")] # Optimize a code via gradient ascent output_image, list_samples, last_h, d_prior_norms, d_condition_norms, boundary_points, h_norms = sampler.sampling( condition_net=net, image_encoder=encoder, image_generator=generator, gen_in_layer=settings.generator_in_layer, gen_out_layer=settings.generator_out_layer, start_code=start_code, n_iters=args.n_iters, lr=args.lr, lr_end=args.lr_end, threshold=args.threshold, layer=args.act_layer, conditions=conditions, epsilon1=args.epsilon1, epsilon2=args.epsilon2, epsilon3=args.epsilon3, inpainting=inpainting, output_dir=args.output_dir, reset_every=args.reset_every, save_every=args.save_every) #################### send h through the h_net to verify class probability #################### probs = h_net.forward(fc6=last_h, end='prob') class_prob = probs['prob'][0][conditions[0]["unit"]] print("class probability is " + str(class_prob)) ############################################################################################## #################### Plot gradients vs. num_iters #################### # plot the gradients plt.subplot(3, 1, 1) #subplot(nrows, ncols, plot_number) x1 = np.linspace(0, args.n_iters, args.n_iters + 1, endpoint=True) plt.title('d_prior and d_condition') plt.plot(x1, d_prior_norms, color="blue", linewidth=2.0, linestyle="--", label='d_prior norms') plt.plot(x1, d_condition_norms, color="red", linewidth=2.0, linestyle="--", label='d_condition norms') plt.legend() plt.subplot(3, 1, 2) x2 = np.linspace(0, args.n_iters, args.n_iters + 1, endpoint=True) plt.title('d_prior (scaled by eps1=' + '%.0e' % Decimal(args.epsilon1) + ') and d_condition (scaled by eps2=' + '%.0e' % Decimal(args.epsilon2) + ')') plt.plot(x2, d_condition_norms * args.epsilon2, color="red", linewidth=2.0, linestyle="--", label='d_condition norms') plt.plot(x2, d_prior_norms * args.epsilon1, color="blue", linewidth=2.0, linestyle="--", label='d_prior norms (scaled)') plt.legend() plt.subplot(3, 1, 3) x3 = np.linspace(25, args.n_iters, args.n_iters + 1 - 25, endpoint=True) plt.title('d_prior (scaled by eps1=' + '%.0e' % Decimal(args.epsilon1) + ') and d_condition (scaled by eps2=' + '%.0e' % Decimal(args.epsilon2) + ') from n_iter=25') plt.plot(x3, d_condition_norms[25:] * args.epsilon2, color="red", linewidth=2.0, linestyle="--", label='d_condition norms') plt.plot(x3, d_prior_norms[25:] * args.epsilon1, color="blue", linewidth=2.0, linestyle="--", label='d_prior norms (scaled)') plt.xlabel('num iters') plt.legend() # for i in xrange(args.n_iters): # if i % 20 == 0: # plt.annotate('(%s, %s)' %(i, d_condition_norms[i]), xy=(i, d_condition_norms[i]+ 20), textcoords='data') #plt.annotate('(%s, %s)' %(i, d_condition_mins[i]), xy=(i, d_condition_mins[i] - 0.0005), textcoords='data') # plt.title('% of boundary points') # plt.plot(boundary_points/float(start_code.shape[1])*100) # plt.xlabel('num iters') # plt.title('norm of h') # plt.plot(h_norms) # plt.xlabel('num iters') plt.show() #plt.savefig("%s/gradients_plt.png")#, dpi=72) #################################################################### # Output image filename = "%s/%04d_%04d_%s_h_%s_%s_%s_%s__%s.jpg" % ( args.output_dir, conditions[0]["unit"], args.n_iters, args.lr, str(args.epsilon1), str(args.epsilon2), str( args.epsilon3), str(args.epsilon4), args.seed) if inpainting != None: output_image = util.stitch(start_image, output_image) # Save the final image util.save_image(output_image, filename) print "%s/%s" % (os.getcwd(), filename) # Write labels to images print "Saving images..." for p in list_samples: img, name, label = p util.save_image(img, name) if args.write_labels: util.write_label_to_img(name, label)
def recommend_loss(similarity: Similarity, batch_size: int, context, pos_products, neg_products): """Recommend Loss. Args: similarity (Similarity): batch_size (int): context: Context. pos_products: Positive products. (num_pos_products, pos_images, pos_product_texts, pos_product_text_lengths) neg_products: Negative products. (num_neg_products, neg_images, neg_product_texts, neg_product_text_lengths) """ ones = torch.ones(batch_size).to(GlobalConfig.device) zeros = torch.zeros(batch_size).to(GlobalConfig.device) (num_pos_products, pos_images, pos_product_texts, pos_product_text_lengths) = pos_products (num_neg_products, neg_images, neg_product_texts, neg_product_text_lengths) = neg_products # Sizes: # num_pos_products: (batch_size, ) # pos_images: (batch_size, pos_images_max_num, 3, image_size, image_size) # pos_product_texts: (batch_size, pos_images_max_num, product_text_max_len) # pos_product_text_lengths: (batch_size, pos_images_max_num) # # num_neg_products: (batch_size, ) # neg_images: (batch_size, neg_images_max_num, 3, image_size, image_size) # neg_product_texts: (batch_size, neg_images_max_num, product_text_max_len) # neg_product_text_lengths: (batch_size, neg_images_max_num) # num_pos_products = num_pos_products.to(GlobalConfig.device) pos_images = pos_images.to(GlobalConfig.device) pos_product_texts = pos_product_texts.to(GlobalConfig.device) pos_product_text_lengths = pos_product_text_lengths.to(GlobalConfig.device) pos_images.transpose_(0, 1) pos_product_texts.transpose_(0, 1) pos_product_text_lengths.transpose_(0, 1) # pos_images: (pos_images_max_num, batch_size, 3, image_size, image_size) # pos_product_texts: (pos_images_max_num, batch_size, product_text_max_len) # pos_product_text_lengths: (pos_images_max_num, batch_size) num_neg_products = num_neg_products.to(GlobalConfig.device) neg_images = neg_images.to(GlobalConfig.device) neg_product_texts = neg_product_texts.to(GlobalConfig.device) neg_product_text_lengths = neg_product_text_lengths.to(GlobalConfig.device) neg_images.transpose_(0, 1) neg_product_texts.transpose_(0, 1) neg_product_text_lengths.transpose_(0, 1) # neg_images: (neg_images_max_num, batch_size, 3, image_size, image_size) # neg_product_texts: (neg_images_max_num, batch_size, product_text_max_len) # neg_product_text_lengths: (neg_images_max_num, batch_size) pos_cos_sim = similarity(context, pos_product_texts[0], pos_product_text_lengths[0], pos_images[0]) # mask mask = get_mask(DatasetConfig.neg_images_max_num, num_neg_products) mask = mask.transpose(0, 1) # (neg_images_max_num, batch_size) losses = [] for i in range(DatasetConfig.neg_images_max_num): neg_cos_sim = similarity(context, neg_product_texts[i], neg_product_text_lengths[i], neg_images[i]) loss = torch.max(zeros, ones - pos_cos_sim + neg_cos_sim) losses.append(loss) losses = torch.stack(losses) # (neg_images_max_num, batch_size) loss = losses.masked_select(mask.byte()).mean() return loss
def recommend_eval(similarity: Similarity, batch_size: int, context, pos_products, neg_products): """Recommend Evaluation. Args: similarity (Similarity): batch_size (int): context: Context. pos_products: Positive products. (num_pos_products, pos_images, pos_product_texts, pos_product_text_lengths) neg_products: Negative products. (num_neg_products, neg_images, neg_product_texts, neg_product_text_lengths) """ (num_pos_products, pos_images, pos_product_texts, pos_product_text_lengths) = pos_products (num_neg_products, neg_images, neg_product_texts, neg_product_text_lengths) = neg_products # Sizes: # num_pos_products: (batch_size, ) # pos_images: (batch_size, pos_images_max_num, 3, image_size, image_size) # pos_product_texts: (batch_size, pos_images_max_num, product_text_max_len) # pos_product_text_lengths: (batch_size, pos_images_max_num) # # num_neg_products: (batch_size, ) # neg_images: (batch_size, neg_images_max_num, 3, image_size, image_size) # neg_product_texts: (batch_size, neg_images_max_num, product_text_max_len) # neg_product_text_lengths: (batch_size, neg_images_max_num) # num_pos_products = num_pos_products.to(GlobalConfig.device) pos_images = pos_images.to(GlobalConfig.device) pos_product_texts = pos_product_texts.to(GlobalConfig.device) pos_product_text_lengths = pos_product_text_lengths.to(GlobalConfig.device) pos_images.transpose_(0, 1) pos_product_texts.transpose_(0, 1) pos_product_text_lengths.transpose_(0, 1) # pos_images: (pos_images_max_num, batch_size, 3, image_size, image_size) # pos_product_texts: (pos_images_max_num, batch_size, product_text_max_len) # pos_product_text_lengths: (pos_images_max_num, batch_size) num_neg_products = num_neg_products.to(GlobalConfig.device) neg_images = neg_images.to(GlobalConfig.device) neg_product_texts = neg_product_texts.to(GlobalConfig.device) neg_product_text_lengths = neg_product_text_lengths.to(GlobalConfig.device) neg_images.transpose_(0, 1) neg_product_texts.transpose_(0, 1) neg_product_text_lengths.transpose_(0, 1) # neg_images: (neg_images_max_num, batch_size, 3, image_size, image_size) # neg_product_texts: (neg_images_max_num, batch_size, product_text_max_len) # neg_product_text_lengths: (neg_images_max_num, batch_size) pos_cos_sim = similarity(context, pos_product_texts[0], pos_product_text_lengths[0], pos_images[0]) # Mask. mask = get_mask(DatasetConfig.neg_images_max_num, num_neg_products) mask = mask.transpose(0, 1).long() # (neg_images_max_num, batch_size) rank = torch.zeros(batch_size, dtype=torch.long).to(GlobalConfig.device) for i in range(DatasetConfig.neg_images_max_num): neg_cos_sim = similarity(context, neg_product_texts[i], neg_product_text_lengths[i], neg_images[i]) rank += torch.lt(pos_cos_sim, neg_cos_sim).long() * mask[i] num_rank = [0] * (DatasetConfig.neg_images_max_num + 1) for i in range(batch_size): num_rank[rank[i]] += 1 return torch.tensor(num_rank).to(GlobalConfig.device)