print 'Invalid NeuralTalk results file' sys.exit(-1) captions_dict = pickle.load(open(captions_dict_path)) generated_captions_list = open(nt_results_path, 'r').readlines() tp = 0 for gc_item in generated_captions_list: frame_filename, generated_caption = gc_item.split(SEPARATOR) frame_filename = frame_filename.strip() generated_caption = generated_caption.strip() video_filename = get_videoname(frame_filename) if not video_filename in captions_dict: print 'Video was not found in dictionary:\n%s' % (video_filename) continue original_caption = captions_dict[video_filename] generated_verbs = nlpa.extract_lemmatized_verbs(generated_caption) original_caption = nlpa.remove_invalid_unicode( original_caption) #clean-up caption original_verbs = set(nlpa.extract_lemmatized_verbs(original_caption)) for gverb in generated_verbs: if gverb in original_verbs: tp += 1 break tp_rate = tp / len(generated_captions_list) print 'TP: %d' % tp print 'Total captions: %d' % len(generated_captions_list) print 'True positive rate: %04f' % (tp_rate)
if not os.path.isfile(nt_results_path): print 'Invalid NeuralTalk results file' sys.exit(-1) captions_dict = pickle.load(open(captions_dict_path)) generated_captions_list = open(nt_results_path, 'r').readlines() tp = 0 for gc_item in generated_captions_list: frame_filename, generated_caption = gc_item.split(SEPARATOR) frame_filename = frame_filename.strip() generated_caption = generated_caption.strip() video_filename = get_videoname(frame_filename) if not video_filename in captions_dict: print 'Video was not found in dictionary:\n%s'%(video_filename) continue original_caption = captions_dict[video_filename] generated_verbs = nlpa.extract_lemmatized_verbs(generated_caption) original_caption = nlpa.remove_invalid_unicode(original_caption) #clean-up caption original_verbs = set(nlpa.extract_lemmatized_verbs(original_caption)) for gverb in generated_verbs: if gverb in original_verbs: tp += 1 break tp_rate = tp/len(generated_captions_list) print 'TP: %d'%tp print 'Total captions: %d'%len(generated_captions_list) print 'True positive rate: %04f'%(tp_rate)
all_srt_path = '/Users/zal/CMU/Fall2015/HCMMML/FinalProject/Dataset/MontrealVideoAnnotationDataset/M-VAD/srt_files/all_srt' dict_filename = '/Users/zal/CMU/Fall2015/HCMMML/FinalProject/Repository/DataProcessing/all_captions_dict.p' inv_dict_filename = '/Users/zal/CMU/Fall2015/HCMMML/FinalProject/Repository/DataProcessing/all_captions_inv_dict.p' verbs_dict_filename = '/Users/zal/CMU/Fall2015/HCMMML/FinalProject/Repository/DataProcessing/all_captions_verbs_dict.p' captions_dict = {} captions_inv_dict = {} verbs_dict = {} for srt_file in glob.glob(join(all_srt_path,'*.srt')): movie_name = os.path.splitext(os.path.basename(srt_file))[0] all_lines = open(join(all_srt_path, srt_file),'r').readlines() for i in range(0,len(all_lines),4): file_name = all_lines[i].strip() caption = all_lines[i+2].strip() #Captions dict Key:VideoFileName Value:Caption captions_dict[file_name]=caption #Captions Inv Index Key:Caption Value:Video FileName captions_inv_dict[caption]=file_name #Inverted dict based on verbs Key: lemmatizedVerb Value: [(filename, caption), ...] caption_verbs = nlpa.extract_lemmatized_verbs(caption.decode('utf8')) for verb in caption_verbs: if verb not in verbs_dict: verbs_dict[verb] = [(file_name, caption)] else: verbs_dict[verb].append((file_name, caption)) pickle.dump(captions_dict,open(dict_filename, 'wb')) pickle.dump(captions_inv_dict,open(inv_dict_filename, 'wb')) pickle.dump(verbs_dict, open(verbs_dict_filename, 'wb'))