Пример #1
0
def main():

    #model_path = '../../checkpoints/model_best.pth.tar'
    #model_path = 'D:/deep-learning/proposal-defence/ucf101_s1_rgb_resnet152.pth.tar'
    model_path = '../../models/ucf101_s1_rgb_resnet152.pth.tar'
    data_dir = "~/UCF101/frames"
    start_frame = 0
    num_categories = 101

    model_start_time = time.time()
    #params = torch.load(model_path, map_location={'cuda:0': 'cpu'})
    params = torch.load(model_path)

    spatial_net = models.rgb_resnet152(pretrained=False, num_classes=101)
    spatial_net.load_state_dict(params['state_dict'])
    spatial_net.cuda()
    spatial_net.eval()
    model_end_time = time.time()
    model_time = model_end_time - model_start_time
    print("Action recognition model is loaded in %4.4f seconds." % (model_time))

    #val_file = "./spatial_testlist01_with_labels.txt"
    val_file = "./out2.txt"
    f_val = open(val_file, "r")
    val_list = f_val.readlines()
    print("we got %d test videos" % len(val_list))

    line_id = 1
    match_count = 0
    result_list = []
    for line in val_list:
        line_info = line.split(" ")
        clip_path = line_info[0]
        input_video_label = int(line_info[1]) - 1

        spatial_prediction = VideoSpatialPrediction(
                clip_path,
                spatial_net,
                num_categories,
                start_frame)

        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        # print(avg_spatial_pred_fc8.shape)
        result_list.append(avg_spatial_pred_fc8)
        # avg_spatial_pred = softmax(avg_spatial_pred_fc8)

        pred_index = np.argmax(avg_spatial_pred_fc8)
        print("Sample %d/%d: GT: %d, Prediction: %d" % (line_id, len(val_list), input_video_label, pred_index))

        if pred_index == input_video_label:
            match_count += 1
        line_id += 1

    print(match_count)
    print(len(val_list))
    print("Accuracy is %4.4f" % (float(match_count)/len(val_list)))
    np.save("ucf101_s1_rgb_resnet152.npy", np.array(result_list))
def main():

    # model_path = '../../checkpoints/rgb_model_best.pth.tar'
    model_path = '../../checkpoints/225_rgb_checkpoint.pth.tar'
    # data_dir = "~/basedata/expression_data/ck+_pre"
    start_frame = 0
    num_categories = 8

    model_start_time = time.time()
    params = torch.load(model_path)

    spatial_net = models.rgb_resnet152(pretrained=False, num_classes=8)
    spatial_net.load_state_dict(params['state_dict'])
    spatial_net.cuda()
    spatial_net.eval()
    model_end_time = time.time()
    model_time = model_end_time - model_start_time
    print("Action recognition model is loaded in %4.4f seconds." % (model_time))

    val_file = "/home/wxc/.pyenv/versions/3.5.5/envs/two_stream/datasets/settings/ck/val_rgb_split.txt"
    f_val = open(val_file, "r")
    val_list = f_val.readlines()
    print("we got %d test videos" % len(val_list))

    line_id = 1
    match_count = 0
    result_list = []
    for line in val_list:
        line_info = line.split(" ")
        clip_path = line_info[0]
        input_video_frames=int(line_info[1])
        input_video_label = int(line_info[2])

        spatial_prediction = VideoSpatialPrediction(
                clip_path,
                spatial_net,
                num_categories,
                start_frame)

        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        # print(avg_spatial_pred_fc8.shape)
        result_list.append(avg_spatial_pred_fc8)
        avg_spatial_pred = softmax(avg_spatial_pred_fc8)

        save_fig(avg_spatial_pred,clip_path)

        pred_index = np.argmax(avg_spatial_pred_fc8)
        print("Sample %d/%d: number of frames:%d, GT: %d, Prediction: %d, " % (line_id, len(val_list),input_video_frames, input_video_label, pred_index)+clip_path)

        if pred_index == input_video_label:
            match_count += 1
        line_id += 1

    print(match_count)
    print(len(val_list))
    print("Accuracy is %4.4f" % (float(match_count)/len(val_list)))
    np.save("ucf101_s1_rgb_resnet152.npy", np.array(result_list))
Пример #3
0
def main():

    model_path = '../../checkpoints/model_best.pth.tar'
    data_dir = "~/UCF101/frames"
    start_frame = 0
    num_categories = 101

    model_start_time = time.time()
    params = torch.load(model_path)

    spatial_net = models.rgb_resnet152(pretrained=False, num_classes=101)
    spatial_net.load_state_dict(params['state_dict'])
    spatial_net.cuda()
    spatial_net.eval()
    model_end_time = time.time()
    model_time = model_end_time - model_start_time
    print("Action recognition model is loaded in %4.4f seconds." % (model_time))

    val_file = "./testlist01_with_labels.txt"
    f_val = open(val_file, "r")
    val_list = f_val.readlines()
    print("we got %d test videos" % len(val_list))

    line_id = 1
    match_count = 0
    result_list = []
    for line in val_list:
        line_info = line.split(" ")
        clip_path = line_info[0]
        input_video_label = int(line_info[1]) - 1

        spatial_prediction = VideoSpatialPrediction(
                clip_path,
                spatial_net,
                num_categories,
                start_frame)

        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        # print(avg_spatial_pred_fc8.shape)
        result_list.append(avg_spatial_pred_fc8)
        # avg_spatial_pred = softmax(avg_spatial_pred_fc8)

        pred_index = np.argmax(avg_spatial_pred_fc8)
        print("Sample %d/%d: GT: %d, Prediction: %d" % (line_id, len(val_list), input_video_label, pred_index))

        if pred_index == input_video_label:
            match_count += 1
        line_id += 1

    print(match_count)
    print(len(val_list))
    print("Accuracy is %4.4f" % (float(match_count)/len(val_list)))
    np.save("ucf101_s1_rgb_resnet152.npy", np.array(result_list))
Пример #4
0
def main():
    args = parser.parse_args()
    output_path = logging(args)
    model_path = args.model_path
    data_dir = args.data_dir

    start_frame = 0
    num_samples = 25 if args.modality[:3] == 'rgb' else 1
    ext = ".png" if args.modality == "rhythm" and args.dataset == "hmdb51" else ".jpg"
    num_categories = 51 if args.dataset == 'hmdb51' else 101
    new_size = 224

    model_start_time = time.time()
    params = torch.load(model_path)

    if args.architecture == "inception_v3":
        new_size = 299
        if args.modality == "rhythm":
            spatial_net = models.flow_inception_v3(pretrained=False,
                                                   channels=1,
                                                   num_classes=num_categories)
        else:
            spatial_net = models.rgb_inception_v3(pretrained=False,
                                                  channels=3,
                                                  num_classes=num_categories)
    else:
        if args.modality == "rhythm":
            spatial_net = models.flow_resnet152(pretrained=False,
                                                channels=1,
                                                num_classes=num_categories)
        else:
            spatial_net = models.rgb_resnet152(pretrained=False,
                                               channels=3,
                                               num_classes=num_categories)

    spatial_net.load_state_dict(params['state_dict'])
    spatial_net.cuda()
    spatial_net.eval()
    model_end_time = time.time()
    model_time = model_end_time - model_start_time
    print("Action recognition spatial model is loaded in %4.4f seconds." %
          (model_time))

    test_path = os.path.join(args.settings, args.dataset)
    test_file = os.path.join(test_path,
                             "dataset_list.txt") if args.w else os.path.join(
                                 test_path, "test_split%d.txt" % (args.split))
    print(test_file)
    f_test = open(test_file, "r")
    test_list = f_test.readlines()
    print("we got %d videos" % len(test_list))

    line_id = 1
    match_count = 0
    result_list = []

    lines = []
    if args.vr_approach == 3:
        direction_path = os.path.join(args.settings, args.dataset,
                                      "direction.txt")
        lines = [int(line.rstrip('\n')) for line in open(direction_path)]
    elif args.vr_approach == 4:
        direction_path = os.path.join(args.settings, args.dataset,
                                      "direction_video.txt")
        lines = {
            line.split()[0]: int(line.split()[1])
            for line in open(direction_path)
        }

    for line in test_list:
        line_info = line.split(" ")
        clip_path = os.path.join(data_dir, line_info[0])
        num_frames = int(line_info[1])
        input_video_label = int(line_info[2])

        video_name = clip_path.split("/")[-1]
        index = lines[input_video_label] if args.vr_approach == 3 else (
            lines[video_name] if args.vr_approach == 4 else args.vr_approach)

        spatial_prediction = VideoSpatialPrediction(args.modality,
                                                    clip_path,
                                                    spatial_net,
                                                    num_categories,
                                                    start_frame,
                                                    num_frames,
                                                    num_samples,
                                                    index,
                                                    new_size=new_size,
                                                    ext=ext)
        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        result_list.append(avg_spatial_pred_fc8)

        pred_index = np.argmax(avg_spatial_pred_fc8)
        print(args.modality + " split " + str(args.split) +
              ", sample %d/%d: GT: %d, Prediction: %d ==> correct: %d" %
              (line_id, len(test_list), input_video_label, pred_index,
               match_count))

        if pred_index == input_video_label:
            match_count += 1
        line_id += 1

    print(match_count)
    print(len(test_list))
    print("Accuracy is: %4.4f" % (float(match_count) / len(test_list)))

    npy_name = args.dataset + "_" + args.modality + "_" + args.architecture + "_s" + str(
        args.split) + ".npy"
    npy_path = os.path.join(output_path, npy_name)
    np.save(npy_path, np.array(result_list))
Пример #5
0
def main():
    args = parser.parse_args()

    #model_path = '../../parameters/'+args.architecture+"/"+args.modality+'_s'+str(args.split)+'.pth.tar'
    #data_dir = '../datasets/'+args.dataset+'_frames'
    #data_dir = '/home/Datasets/UCF-101-OF_CPU'
    model_path = args.model_path
    data_dir = args.data_dir

    start_frame = 0
    if args.modality[:3] == 'rgb':
        num_samples = 25
    else:
        num_samples = 1
    num_categories = 51 if args.dataset == 'hmdb51' else 101

    model_start_time = time.time()
    params = torch.load(model_path)

    new_size = 224
    if args.architecture == "inception_v3":
        new_size = 299
        if args.modality == "rhythm":
            spatial_net = models.flow_inception_v3(pretrained=False,
                                                   channels=1,
                                                   num_classes=num_categories)
        else:
            spatial_net = models.rgb_inception_v3(pretrained=False,
                                                  channels=3,
                                                  num_classes=num_categories)
    else:
        if args.modality == "rhythm":
            spatial_net = models.flow_resnet152(pretrained=False,
                                                channels=1,
                                                num_classes=num_categories)
        else:
            spatial_net = models.rgb_resnet152(pretrained=False,
                                               channels=3,
                                               num_classes=num_categories)
    spatial_net.load_state_dict(params['state_dict'])
    spatial_net.cuda()
    spatial_net.eval()
    model_end_time = time.time()
    model_time = model_end_time - model_start_time
    print("Action recognition model is loaded in %4.4f seconds." %
          (model_time))

    val_file = "./splits/" + args.dataset + "/val_split%d.txt" % (args.split)
    #val_file = 'spatial_testlist01_with_labels.txt'
    f_val = open(val_file, "r")
    val_list = f_val.readlines()
    print("we got %d test videos" % len(val_list))

    line_id = 1
    match_count = 0

    result = []
    lines = [
        int(line.rstrip('\n'))
        for line in open('../datasets/settings/' + args.dataset +
                         '/direction.txt')
    ]
    for line in val_list:
        line_info = line.split(" ")
        clip_path = os.path.join(data_dir, line_info[0])
        num_frames = int(line_info[1])
        input_video_label = int(line_info[2])
        spatial_prediction = VideoSpatialPrediction(
            args.modality, clip_path, spatial_net, num_categories, start_frame,
            num_frames, num_samples, args.vr_approach
            if args.vr_approach != 3 else lines[input_video_label], new_size)
        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        result.append(avg_spatial_pred_fc8)
        # avg_spatial_pred = softmax(avg_spatial_pred_fc8)

        pred_index = np.argmax(avg_spatial_pred_fc8)

        print(args.modality + " split " + str(args.split) +
              ", sample %d/%d: GT: %d, Prediction: %d ==> correct: %d" %
              (line_id, len(val_list), input_video_label, pred_index,
               match_count))

        if pred_index == input_video_label:
            match_count += 1
        line_id += 1

    print(match_count)
    print(len(val_list))
    print("Accuracy is : %4.4f" % ((float(match_count) / len(val_list))))
    np.save(
        args.dataset + "_" + args.modality + "_" + args.architecture + "_s" +
        str(args.split) + ".npy", np.array(result))
def main():
    args = parser.parse_args()

    model_path = '../../checkpoints/'+args.modality+'_s'+str(args.split)+'.pth.tar'
    data_dir = '../../datasets/ucf101_frames'        
    
    start_frame = 0
    if args.modality[:3]=='rgb':
        num_samples = 25
    else:
        num_samples = 1
    num_categories = 101

    model_start_time = time.time()
    params = torch.load(model_path)

    spatial_net = models.rgb_resnet152(pretrained=False, num_classes=101)
    spatial_net.load_state_dict(params['state_dict'])
    spatial_net.cuda()
    spatial_net.eval()
    model_end_time = time.time()
    model_time = model_end_time - model_start_time
    print("Action recognition model is loaded in %4.4f seconds." % (model_time))


    val_file = "./splits/val_split%d.txt"%(args.split)
    f_val = open(val_file, "r")
    val_list = f_val.readlines()
    print("we got %d test videos" % len(val_list))

    line_id = 1
    match_count = 0

    result = []

    for line in val_list:
        line_info = line.split(" ")
        clip_path = os.path.join(data_dir,line_info[0])
        num_frames = int(line_info[1])
        input_video_label = int(line_info[2])
        spatial_prediction = VideoSpatialPrediction(
                "rgb" if args.modality=='rgb2' else args.modality,
                clip_path,
                spatial_net,
                num_categories,
                start_frame,
                num_frames,
                num_samples
                )
        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        result.append(avg_spatial_pred_fc8)
        # avg_spatial_pred = softmax(avg_spatial_pred_fc8)

        pred_index = np.argmax(avg_spatial_pred_fc8)
        
        print(args.modality+" split "+str(args.split)+", sample %d/%d: GT: %d, Prediction: %d" % (line_id, len(val_list), input_video_label, pred_index))

        if pred_index == input_video_label:
            match_count += 1
        line_id += 1

    print(match_count)
    print(len(val_list))
    print("Accuracy is : %4.4f" % ((float(match_count)/len(val_list))))
    np.save("ucf101_"+args.modality+"_resnet152_s"+str(args.split)+".npy", np.array(result))