def setup_model(user_model, model_type, model_path): if model_path is not None: model_path = 'static/' + model_path if user_model == "no_model": if model_type == "mmbt": model = MMBT.from_pretrained("mmbt.hateful_memes.images") elif model_type == "fusion": model = LateFusion.from_pretrained("late_fusion.hateful_memes") elif model_type == "vilbert": model = ViLBERT.from_pretrained("vilbert.finetuned.hateful_memes.direct") else: # visual bert model = VisualBERT.from_pretrained("visual_bert.finetuned.hateful_memes.direct") elif user_model == "mmf": if model_type == "mmbt": model = MMBT.from_pretrained(model_path) print("here itsmeeeeeeeeeeeeeeeeeeee") elif model_type == "fusion": model = LateFusion.from_pretrained(model_path) elif model_type == "vilbert": model = ViLBERT.from_pretrained(model_path) else: model = VisualBERT.from_pretrained(model_path) else: model = MMBT.from_pretrained("mmbt.hateful_memes.images") # elif user_model == "onnx": ????? return model
def setup_model(user_model, model_type, model_path): if user_model == "no_model": if model_type == "MMBT": model = MMBT.from_pretrained("mmbt.hateful_memes.images") elif model_type == "LateFusion": model = LateFusion.from_pretrained("late_fusion.hateful_memes") elif model_type == "ViLBERT": model = ViLBERT.from_pretrained( "vilbert.finetuned.hateful_memes.direct") else: # visual bert model = VisualBERT.from_pretrained( "visual_bert.finetuned.hateful_memes.direct") elif user_model == "mmf": try: if model_type == "MMBT": model = MMBT.from_pretrained(model_path) elif model_type == "LateFusion": model = LateFusion.from_pretrained(model_path) elif model_type == "ViLBERT": model = ViLBERT.from_pretrained(model_path) else: model = VisualBERT.from_pretrained(model_path) except: return "Sorry, we cannot open the mmf checkpoint you uploaded. It should be an .ckpt file saved from the mmf trainer." else: model = MMBT.from_pretrained("mmbt.hateful_memes.images") # elif user_model == "onnx": ????? return model
def _examples(): """ Example for how to use this explainer """ # read data to try data_path = r"hm-data/" labels = utils.read_labels(data_path + "train.jsonl", True) ids = [28061] target_labels = [l for l in labels if l['id'] in ids] print(f" target_labels = {target_labels}") target_images, target_texts = utils.parse_labels(target_labels, img_to_array=True, separate_outputs=True) # model to explain model = MMBT.from_pretrained("mmbt.hateful_memes.images") # Explainer hyper params max_evals = 100 batch_size = 50 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # test default partition algo explainer = Explainer(model, max_evals=max_evals, batch_size=batch_size) # text_shap_values = explainer.explain(target_images, target_texts, "text_only") image_shap_values = explainer.explain(target_images, target_texts, "image_only") # plots # explainer.text_plot(text_shap_values) explainer.image_plot(image_shap_values)
def main(): # pragma: no cover import matplotlib.pyplot as plt # import requests import torch from PIL import Image from mmf.models.mmbt import MMBT # Check if cuda is available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") image_path = "./hateful_memes/example.jpg" image = Image.open(image_path) text = "look how many people love you" model = MMBTGridHMInterfaceOnlyImage( MMBT.from_pretrained("mmbt.hateful_memes.images"), text) model.to(device) # Move model to GPU if cuda is available output = model.classify(image) plt.imshow(image) plt.axis("off") plt.show() hateful = "Yes" if output["label"] == 1 else "No" print("Hateful as per the model?", hateful) print(f"Model's confidence: {output['confidence'] * 100:.3f}%")
def testMultiExtremalPerturbationWithSmoothMask(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") text = "How I want to say hello to Asian people" model = MMBTGridHMInterfaceOnlyImage( MMBT.from_pretrained("mmbt.hateful_memes.images"), text) model = model.to(device) image_path = "https://img.17qq.com/images/ghhngkfnkwy.jpeg" image_tensor = model.imageToTensor(image_path) # if device has some error just comment it image_tensor = image_tensor.to(device) _out, out, = multi_extremal_perturbation(model, torch.unsqueeze(image_tensor, 0), image_path, text, 0, reward_func=contrastive_reward, debug=True, max_iter=200, areas=[0.12], smooth=0.5, show_text_result=True)
def torchray_multimodal_explain(image_path,text): # image_path = "static\\" + image_path model = MMBT.from_pretrained("mmbt.hateful_memes.images") model = model.to(torch.device( "cuda:0" if torch.cuda.is_available() else "cpu")) image_tensor = image2tensor(image_path) image_tensor = image_tensor.to((torch.device( "cuda:0" if torch.cuda.is_available() else "cpu"))) mask_, hist_, output_tensor, summary, conclusion = multi_extremal_perturbation( model, image_tensor, image_path, text, 0, reward_func=contrastive_reward, debug=True, areas=[0.12]) # summary is a higher level explanation in terms of sentence # conclusion is a list that contains words and their weights # output_tensor is the masked image Image = transforms.ToPILImage()(imsc(image_tensor[0],quiet=False)[0]).convert("RGB") Image.save("torchray.png") print(summary) return conclusion
def predict_HM(image_name, text): model = MMBT.from_pretrained("mmbt.hateful_memes.images") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) image = "static\\" + image_name output = model.classify(image, text) # Explainer hyper params max_evals = 100 batch_size = 50 explainer = Explainer(model, max_evals=max_evals, batch_size=batch_size) target_images = Image.open(image) target_images = np.array(target_images, dtype=np.uint8) if target_images.shape[2] > 3: target_images = target_images[:, :, :3] target_images = target_images.reshape(1, target_images.shape[0], target_images.shape[1], target_images.shape[2]) target_texts = np.array([text]) image_shap_values = explainer.explain(target_images, target_texts, "image_only") PIL_image = explainer.image_plot(image_shap_values) exp_image = 'shap_' + image_name filename = os.path.join(dirname, '../static/' + exp_image) PIL_image.save(filename) # hateful = "hateful" if output["label"] == 1 else "not hateful" # result = "This image is: " + hateful + ". " + f"Model's confidence: {output['confidence'] * 100:.3f}%" result = [] hateful = "Hateful" if output["label"] == 1 else "Not Hateful" result.append("This image is: " + hateful) result.append(f"Model's confidence: {output['confidence'] * 100:.3f}%") return result, exp_image
def _examples(): """ Example for how to use this explainer """ # read data to try data_path = r"hm-data/" labels = utils.read_labels(data_path + "train.jsonl", True) ids = [5643] target_labels = [l for l in labels if l['id'] in ids] print(f"{target_labels = }") target_images, target_texts = utils.parse_labels(target_labels, img_to_array=True, separate_outputs=True) # model to explain model = MMBT.from_pretrained("mmbt.hateful_memes.images") # Explainer hyper params max_evals = 100 batch_size = 50 # test default partition algo explainer = Explainer(model, max_evals=max_evals, batch_size=batch_size) text_shap_values = explainer.explain(target_images, target_texts, "text_only") image_shap_values = explainer.explain(target_images, target_texts, "image_only") img_values, txt_values = explainer.explain(target_images, target_texts, mode="multimodal")
def testObjectInitiation(): try: model = MMBTGridHMInterfaceOnlyImage( MMBT.from_pretrained("mmbt.hateful_memes.images"), "test text") except: assert False, "cannot instantiate MMBTGridHMInterfaceOnlyImage object" else: assert True
def setup_model(user_model, model_type, model_path): if user_model == "no_model": try: if model_type == "MMBT": model = MMBT.from_pretrained("mmbt.hateful_memes.images") elif model_type == "LateFusion": model = LateFusion.from_pretrained("late_fusion.hateful_memes") elif model_type == "ViLBERT": model = ViLBERT.from_pretrained( "vilbert.finetuned.hateful_memes.from_cc_original" ) else: # visual bert model = VisualBERT.from_pretrained( "visual_bert.finetuned.hateful_memes.from_coco" ) except: raise InputError( "Sorry, having trouble opening the models we provided, please try again later." ) elif user_model == "mmf": try: if model_type == "MMBT": model = MMBT.from_pretrained(model_path) elif model_type == "LateFusion": model = LateFusion.from_pretrained(model_path) elif model_type == "ViLBERT": model = ViLBERT.from_pretrained(model_path) else: model = VisualBERT.from_pretrained(model_path) except: raise InputError( "Sorry, we cannot open the mmf checkpoint you uploaded. It should be an .ckpt file saved from the mmf trainer." ) elif user_model == "onnx": model = ONNXInterface(model_path, model_type) else: raise InputError("Please select a model upload type") return model
def multi_predict(imgs, txts, zero_image=False, zero_text=False): model = MMBT.from_pretrained("mmbt.hateful_memes.images") inputs = zip(imgs, txts) res = np.zeros((len(imgs), 2)) for i, this_input in enumerate(inputs): img = Image.fromarray(this_input[0]) txt = this_input[1] this_output = model.classify(img, txt, zero_image, zero_text) res[i][this_output["label"]] = this_output["confidence"] res[i][1 - this_output["label"]] = 1 - this_output["confidence"] return res
def test_mmbt_hm_interface(self): model = MMBT.from_pretrained("mmbt.hateful_memes.images") result = model.classify("https://i.imgur.com/tEcsk5q.jpg", "look how many people love you") self.assertEqual(result["label"], 0) np.testing.assert_almost_equal(result["confidence"], 0.9993, decimal=4) result = model.classify("https://i.imgur.com/tEcsk5q.jpg", "they have the privilege") self.assertEqual(result["label"], 0) np.testing.assert_almost_equal(result["confidence"], 0.9777, decimal=4) result = model.classify("https://i.imgur.com/tEcsk5q.jpg", "hitler and jews") self.assertEqual(result["label"], 1) np.testing.assert_almost_equal(result["confidence"], 0.6342, decimal=4)
def setup_model(user_model, model_type, model_path): if user_model == "no_model": if model_type == "mmbt": model = MMBT.from_pretrained("mmbt.hateful_memes.images") elif model_type == "fusion": fusion = LateFusion.from_pretrained("late_fusion.hateful_memes") elif model_type == "vilbert": vilbert = ViLBERT.from_pretrained("vilbert.finetuned.hateful_memes.direct") elif model_type == "visual_bert" visual_bert_model = VisualBERT.from_pretrained("visual_bert.finetuned.hateful_memes.direct") elif user_model == "mmf": if model_type == "mmbt": model = MMBT.from_pretrained(model_path) elif model_type == "fusion": fusion = LateFusion.from_pretrained(model_path) elif model_type == "vilbert": vilbert = ViLBERT.from_pretrained(model_path) elif model_type == "visual_bert" visual_bert_model = VisualBERT.from_pretrained(model_path) # elif user_model == "onnx": ????? return model
def main(): image_path = input("enter your image path : ") text = input("enter your text : ") model = MMBT.from_pretrained("mmbt.hateful_memes.images") model.to(torch.device("cuda:0" if torch.cuda.is_available() else "cpu")) image_tensor = image2tensor(image_path) mask_, hist_, output_tensor, txt_summary, text_explaination = multi_extremal_perturbation( model, image_tensor, image_path, text, 0, # 0 non hateful 1 hateful max_iter=50, areas=[0.12], ) return output_tensor, txt_summary, text_explaination
def classify(self,image,text_input, image_tensor = None): ''' Args: image_path: directory of input image text_input : the text input Str image_tensor : the image torch.tensor with size (1,3,224,224) Returns : label of model prediction and the corresponding confidence ''' scoreFlag = False if image_tensor != None: scoreFlag = True logits = self.onnx_model_forward(image_tensor,text_input) else: p = transforms.Compose([transforms.Scale((224,224))]) image,i = imsc(p(image),quiet=True) image_tensor = torch.reshape(image, (1,3,224,224)) logits = self.onnx_model_forward(image_tensor,text_input) if list(torch.tensor(logits).size()) != [1, 2]: if self.defaultmodel == None: self.defaultmodel = MMBT.from_pretrained("mmbt.hateful_memes.images") self.defaultmodel.to(self.device) logits = self.defaultmodel.classify(image, text_input, image_tensor=torch.squeeze(image_tensor.to(self.device), 0)) scores = nn.functional.softmax(torch.tensor(logits), dim=1) if scoreFlag == True: return scores confidence, label = torch.max(scores, dim=1) return {"label": label.item(), "confidence": confidence.item()}
def global_data(): max_evals = 2 batch_size = 1 model = MMBT.from_pretrained("mmbt.hateful_memes.images") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) labels = utils.read_labels(DATA_PATH, True) # ids = [5643] # single input ids = [5643, 6937] # multiple inputs - tested target_labels = [l for l in labels if l["id"] in ids] target_images, target_texts = utils.parse_labels( target_labels, img_to_array=True, separate_outputs=True ) outputs = model_outputs(model, target_images, target_texts) return { "max_evals": max_evals, "batch_size": batch_size, "model": model, "target_images": target_images, "target_texts": target_texts, "outputs": outputs, }
def _test_mmbt_hm_interface_from_folder(self): with tempfile.TemporaryDirectory() as tmpdir: self._create_checkpoint_folder(tmpdir) model = MMBT.from_pretrained(tmpdir, interface=True) self._test_model_performance(model)
def _test_mmbt_hm_interface_from_file(self): with tempfile.NamedTemporaryFile(suffix=".pth") as tmp: self._create_checkpoint_file(tmp.name) model = MMBT.from_pretrained(tmp.name, interface=True) self._test_model_performance(model)
def test_mmbt_hm_interface(self): model = MMBT.from_pretrained("mmbt.hateful_memes.images") self._test_model_performance(model) self._test_mmbt_hm_interface_from_file() self._test_mmbt_hm_interface_from_folder()
from PIL import Image from mmxai.interpretability.classification.lime.lime_multimodal import * from mmf.models.mmbt import MMBT from mmf.models.visual_bert import VisualBERT # prepare image, text and model for the explanation generation pipeline img_path = "tests/mmxai/interpretability/classification/lime/gun.jpeg" img_try = Image.open(img_path) text = "How I want to say hello to deliberately hateful Asian people, I hate them" image_numpy = np.array(img_try) model_mmbt = MMBT.from_pretrained("mmbt.hateful_memes.images") model_visualbert = VisualBERT.from_pretrained( "visual_bert.finetuned.hateful_memes.from_coco") # prediction using mock classification model object def classifier_fn(model, imgs, txts, zero_image=False, zero_text=False): inputs = zip(imgs, txts) res = np.zeros((len(imgs), 2)) for i, this_input in enumerate(inputs): img = Image.fromarray(this_input[0]) txt = this_input[1] try: this_output = model.classify(img, txt, zero_image=zero_image, zero_text=zero_text) except: this_output = model.classify(img, txt) res[i][this_output["label"]] = this_output["confidence"]
import requests def testObjectInitiation(): try: model = MMBTGridHMInterfaceOnlyImage( MMBT.from_pretrained("mmbt.hateful_memes.images"), "test text") except: assert False, "cannot instantiate MMBTGridHMInterfaceOnlyImage object" else: assert True # instantiate a model globally for better testing efficiency MODEL = MMBTGridHMInterfaceOnlyImage( MMBT.from_pretrained("mmbt.hateful_memes.images"), "test text") MODEL = MODEL.to( torch.device("cuda:0" if torch.cuda.is_available() else "cpu")) def testCanGetTextAttribute(): text = MODEL.text assert text == "test text" def testCanClassifyMultiModalInputs(): image_path = "https://img.17qq.com/images/ghhngkfnkwy.jpeg" text = "How I want to say hello to Asian people" try: MODEL.classify(image_path, text)