示例#1
0
def feats_pca(cnn, vid_ids_path, num_vids, org_dim, new_dim):

    feat_save_path = config.MSVD_FEATS_DIR + cnn + "_pca" + str(new_dim) + "/"
    print "saving feats to :", feat_save_path
    utils.create_dir_if_not_exist(feat_save_path)

    vid_ids = utils.read_file_to_list(vid_ids_path)
    vid_clips_list = [vid[:-4] for vid in vid_ids]
    assert len(vid_ids) == num_vids

    vid_feats_all = np.empty((0, org_dim), dtype=np.float32)
    for vid in vid_clips_list:
        # print("loading features from : "+vid)
        vid_feats_path = config.MSVD_FEATS_DIR + cnn + "/" + vid + ".npy"
        vid_feats = np.load(vid_feats_path)
        # print(vid_feats.shape)
        vid_feat_avg = np.mean(vid_feats, axis=0)
        # print(vid_feat_avg.shape)
        vid_feats_all = np.vstack((vid_feats_all, vid_feat_avg))

    print(vid_feats_all.shape)
    # vid_feats_scaled = StandardScaler().fit_transform(vid_feats_all)
    vid_feats_pca = PCA(n_components=new_dim).fit_transform(vid_feats_all)
    print(vid_feats_pca.shape)

    for ind in range(num_vids):
        vid = vid_clips_list[ind]
        vid_feat = vid_feats_pca[ind]
        # print("saving features from : "+vid)
        np.save(feat_save_path + vid + ".npy", vid_feat)
示例#2
0
def main():
    parser = argparse.ArgumentParser(prog="mipsal", description='Assemble and link a MIPS assembly program.')
    parser.add_argument("files", action="store", nargs="+", type=str, help="list of assembly files to process")
    parser.add_argument("--int", action="store_true", default=False, help="output intermediate files")
    parser.add_argument("--obj", action="store_true", default=False, help="output object files")
    parser.add_argument("-o", action="store", dest="out_name", type=str, default="mips.out", help="override output file name", metavar="file_name")
    parser.add_argument("-l", "--link", action="append", help="add file to program when linking. This option can be used more than once", metavar="file_name")
    args = parser.parse_args()

    obj_code = []
    for input_file in args.files:
        ints, objs = assembler.assemble(input_file)
        obj_code.append(objs)
        file_name = utils.get_file_name(input_file)
        if args.int:
            int_file = file_name + ".int"
            utils.write_file_from_list(int_file, ints)
        if args.obj:
            obj_file = file_name + ".o"
            utils.write_file_from_list(obj_file, objs)
    if args.link != None:
        for link_file in args.link:
            obj_code.append([x.strip() for x in utils.read_file_to_list(link_file)])
    output = linker.link(obj_code)
    utils.write_file_from_list(args.out_name, output)
    def __fetch_sensitive_apis(self):
        """
        :return: 0 success 1 failure
        """
        self.sensitive_apis = []
        output_apis_txt_path = os.path.join(self.dst_output_path, 'apis.txt')
        if not os.path.exists(output_apis_txt_path):
            smali_search_result = glob.glob(os.path.join(
                self.smali_dir_path, "**\\*.smali"),
                                            recursive=True)
            for smali_file in smali_search_result:
                if extract_sensitive_apis_list_from_smali(
                        self.sensitive_apis, smali_file) != STATUS_OK:
                    print('extract apis failed')
                    return STATUS_ERR
            write_list_to_file(self.sensitive_apis, output_apis_txt_path)

        if (not self.sensitive_apis) and read_file_to_list(
                self.sensitive_apis, output_apis_txt_path) != STATUS_OK:
            return STATUS_ERR
        else:
            get_filtered_vector(
                self.feature_list, self.sensitive_apis,
                CONSTANTS['SENSITIVE_APIS_106']['REFERENCE_LIST'])
            return STATUS_OK
 def __fetch_intent_actions(self):
     """
     :return: 0 success 1 failure
     """
     self.intent_actions = []
     output_actions_txt_path = os.path.join(self.dst_output_path,
                                            'actions.txt')
     if not os.path.exists(output_actions_txt_path):
         if extract_spec_list_from_file(
                 self.intent_actions, self.am_processed_path,
                 EXTRACT_SPECS['ACTION']) != STATUS_OK:
             return STATUS_ERR
         write_list_to_file(self.intent_actions, output_actions_txt_path)
     if (not self.intent_actions) and read_file_to_list(
             self.intent_actions, output_actions_txt_path) != STATUS_OK:
         return STATUS_ERR
     else:
         if self.include_intent_actions_126:
             get_filtered_vector(
                 self.feature_list, self.intent_actions,
                 CONSTANTS['INTENT_ACTIONS_126']['REFERENCE_LIST'])
             return STATUS_OK
         elif self.include_intent_actions_110:
             get_filtered_vector(
                 self.feature_list, self.intent_actions,
                 CONSTANTS['INTENT_ACTIONS_110']['REFERENCE_LIST'])
             return STATUS_OK
def _init():
    read_file_to_list(CONSTANTS['PERMISSIONS_147']['REFERENCE_LIST'],
                      CONSTANTS['PERMISSIONS_147']['REFERENCE_FILE'])
    read_file_to_list(CONSTANTS['INTENT_ACTIONS_126']['REFERENCE_LIST'],
                      CONSTANTS['INTENT_ACTIONS_126']['REFERENCE_FILE'])
    read_file_to_list(CONSTANTS['INTENT_ACTIONS_110']['REFERENCE_LIST'],
                      CONSTANTS['INTENT_ACTIONS_110']['REFERENCE_FILE'])
    read_file_to_list(CONSTANTS['SENSITIVE_APIS_106']['REFERENCE_LIST'],
                      CONSTANTS['SENSITIVE_APIS_106']['REFERENCE_FILE'])
    read_file_to_list(CONSTANTS['ANDROID_PACKAGES']['REFERENCE_LIST'],
                      CONSTANTS['ANDROID_PACKAGES']['REFERENCE_FILE'])
    # for api_level, api_dict in CONSTANTS['PERMISSION_MAPPINGS'].items():
    #     read_permission_map_file_to_dict(
    #         api_dict['REFERENCE_DICT'], api_dict['REFERENCE_FILE'])
    read_permission_map_file_to_dict(
        CONSTANTS['PERMISSION_MAPPINGS']['16']['REFERENCE_DICT'],
        CONSTANTS['PERMISSION_MAPPINGS']['16']['REFERENCE_FILE'])
示例#6
0
def main():
    parser = argparse.ArgumentParser(prog="mipsl", description='Link a MIPS program from multiple object files.')
    parser.add_argument("files", action="store", nargs="+", type=str, help="list of object files to process")
    parser.add_argument("-o", action="store", dest="out_name", type=str, default="mips.out", help="override output file name", metavar="file_name")
    args = parser.parse_args()

    obj_code = []
    for link_file in args.files:
        obj_code.append([x.strip() for x in utils.read_file_to_list(link_file)])
    output = link(obj_code)
    utils.write_file_from_list(args.out_name, output)
    def __fetch_package_call_graph(self):
        """
        Extract by package name
        """
        # Read the adjacency matrix and list of functions
        all_funcs = []
        read_file_to_list(all_funcs,
                          os.path.join(self.dst_output_path, 'all_funcs.txt'))
        adj = sp.csr_matrix(
            sp.load_npz(os.path.join(self.dst_output_path, 'adj_matrix.npz')))
        features = sp.csr_matrix(
            sp.load_npz(os.path.join(self.dst_output_path,
                                     'node_features.npz')))
        """ 65 * 65 """
        pkg_adj_matrix = np.zeros((65, 65), dtype=np.uint32)
        pkg_adj_matrix[0, :] = np.ones((1, 65), dtype=np.uint32)
        """ 65 * (147 + 126) """
        pkg_node_features = np.zeros((65, 273), dtype=np.uint32)
        pkg_node_features[0, :] = np.array(np.load(
            os.path.join(
                self.dst_output_path, 'features_' +
                str(self.requested_features - 32) + '.npy'))[0:273],
                                           dtype=np.uint32)

        for i in range(len(all_funcs)):
            caller_idx = self.__get_pkg_idx(all_funcs[i])
            if caller_idx == -1:
                continue
            else:
                pkg_node_features[caller_idx, :] = pkg_node_features[
                    caller_idx, :] + features[i, :].todense()
                called_idxs = adj[i].todense().tolist()[0]
                for j in range(len(called_idxs)):
                    if called_idxs[j] == 1:
                        called_idx = self.__get_pkg_idx(all_funcs[j])
                        pkg_adj_matrix[caller_idx, called_idx] = 1

        np.save(os.path.join(self.dst_output_path, 'pkg_adj_matrix.npy'),
                pkg_adj_matrix)
        np.save(os.path.join(self.dst_output_path, 'pkg_node_features.npy'),
                pkg_node_features)
    def __fetch_pkg_features(self):
        all_funcs = []
        read_file_to_list(all_funcs,
                          os.path.join(self.dst_output_path, 'all_funcs.txt'))
        node_feat_path = os.path.join(self.dst_output_path,
                                      'node_features.npz')
        pkg_features = np.zeros((65, 273), dtype=np.uint8)
        node_features = sp.csr_matrix(sp.load_npz(node_feat_path))
        num = node_features.shape[0]
        for i in range(len(all_funcs))[1:]:
            if i >= num:
                break
            idx = self.__get_pkg_idx(all_funcs[i])
            if idx == -1:
                pkg_features[
                    0, :] = pkg_features[0, :] + node_features[i, :].todense()
            else:
                pkg_features[idx, :] = pkg_features[idx, :] + node_features[
                    i, :].todense()

        np.save(os.path.join(self.dst_output_path, 'pkg_features.npy'),
                pkg_features)
示例#9
0
def main():
    parser = argparse.ArgumentParser(
        prog="mipsal",
        description='Assemble and link a MIPS assembly program.')
    parser.add_argument("files",
                        action="store",
                        nargs="+",
                        type=str,
                        help="list of assembly files to process")
    parser.add_argument("--int",
                        action="store_true",
                        default=False,
                        help="output intermediate files")
    parser.add_argument("--obj",
                        action="store_true",
                        default=False,
                        help="output object files")
    parser.add_argument("-o",
                        action="store",
                        dest="out_name",
                        type=str,
                        default="mips.out",
                        help="override output file name",
                        metavar="file_name")
    parser.add_argument(
        "-l",
        "--link",
        action="append",
        help=
        "add file to program when linking. This option can be used more than once",
        metavar="file_name")
    args = parser.parse_args()

    obj_code = []
    for input_file in args.files:
        ints, objs = assembler.assemble(input_file)
        obj_code.append(objs)
        file_name = utils.get_file_name(input_file)
        if args.int:
            int_file = file_name + ".int"
            utils.write_file_from_list(int_file, ints)
        if args.obj:
            obj_file = file_name + ".o"
            utils.write_file_from_list(obj_file, objs)
    if args.link != None:
        for link_file in args.link:
            obj_code.append(
                [x.strip() for x in utils.read_file_to_list(link_file)])
    output = linker.link(obj_code)
    utils.write_file_from_list(args.out_name, output)
 def load_data(self):
     print('loading {}-{} features'.format(self.dataset_name,
                                           self.cnn_name))
     self.train_data_ids = utils.read_file_to_list(self.train_data_ids_path)
     self.val_data_ids = utils.read_file_to_list(self.val_data_ids_path)
     self.test_data_ids = utils.read_file_to_list(self.test_data_ids_path)
     utils.shuffle_array(self.train_data_ids)
     utils.shuffle_array(self.val_data_ids)
     utils.shuffle_array(self.test_data_ids)
     self.train_data_ids = self.train_data_ids[:
                                               1]  # ONLY FOR DEBUG - REMOVE
     self.val_data_ids = self.val_data_ids[:1]
     self.test_data_ids = self.test_data_ids[:1]
     self.train_caps = utils.read_from_json(self.train_caps_path)
     self.val_caps = utils.read_from_json(self.val_caps_path)
     self.test_caps = utils.read_from_json(self.test_caps_path)
     self.vocab = utils.read_from_json(self.vocab_path)
     self.reverse_vocab = utils.read_from_pickle(self.reverse_vocab_path)
     self.vocab_size = len(self.vocab)
     if self.cnn_name in ['ResNet50', 'ResNet152', 'InceptionV3']:
         self.ctx_dim = 2048
     elif self.cnn_name in ['MURALI']:
         self.ctx_dim = 1024
     elif self.cnn_name in ['VGG19']:
         self.ctx_dim = 512
     else:
         raise NotImplementedError()
     self.train_ids = self.get_vid_ids(self.train_data_ids)
     self.val_ids = self.get_vid_ids(self.val_data_ids)
     self.test_ids = self.get_vid_ids(self.test_data_ids)
     self.kf_train = utils.generate_minibatch_idx(len(self.train_data_ids),
                                                  self.mb_size_train)
     self.kf_val = utils.generate_minibatch_idx(len(
         self.val_data_ids), self.mb_size_test)  #TODO - verify test or val
     self.kf_test = utils.generate_minibatch_idx(len(self.test_data_ids),
                                                 self.mb_size_test)
示例#11
0
    def __init__(self, root, transform=None, target_transform=None):
        classes, class_to_idx = self._find_classes(root)
        samples = _make_dataset(root, class_to_idx, IMG_EXTENSIONS)
        cooccuring_tracks_file = os.path.join(root, "cooccurring_tracks.txt")
        with open(cooccuring_tracks_file) as file:
            self.cooccurring_tracks = [[int(n) for n in line.split(',')]
                                       for line in file]
        if len(samples) == 0:
            raise (RuntimeError("Found 0 files in subfolders of: " + root +
                                "\n"
                                "Supported extensions are: " +
                                ",".join(IMG_EXTENSIONS)))

        self.root = root

        self.classes = classes
        self.class_to_idx = class_to_idx
        self.samples = samples
        self.track_targets = [s[1] for s in samples]
        # self.gt_targets = [s[2] for s in samples]

        track_to_gt_list = utils.read_file_to_list(
            os.path.join(root, 'track_gt.txt'))
        track_to_gt_dict = utils.list_to_dict(track_to_gt_list)

        gtclass_to_idx = {}
        gt_idx = 0
        gt_targets = []
        for track_id in self.track_targets:
            if track_to_gt_dict[track_id] not in gtclass_to_idx.keys():
                gtclass_to_idx[track_to_gt_dict[track_id]] = gt_idx
                gt_idx += 1
            label = gtclass_to_idx[track_to_gt_dict[track_id]]
            gt_targets.append(label)

        self.gt_targets = gt_targets

        track_idx_to_sample_idx = {}
        for track_idx in np.unique(self.track_targets):
            track_idx_to_sample_idx[track_idx] = np.where(
                self.track_targets == track_idx)[0]

        self.track_idx_to_sample_idx = track_idx_to_sample_idx

        self.transform = transform
        self.target_transform = target_transform
示例#12
0
def split_data(csv_data):
    vid_ids = utils.read_file_to_list(config.MSVD_VID_IDS_ALL_PATH)
    assert len(vid_ids) == config.TOTAL_VIDS
    utils.shuffle_array(vid_ids)
    train_ids = vid_ids[0:1200]
    val_ids = vid_ids[1200:1300]
    test_ids = vid_ids[1300:1970]
    assert len(train_ids) == config.TRAIN_VIDS
    assert len(val_ids) == config.VAL_VIDS
    assert len(test_ids) == config.TEST_VIDS
    utils.write_list_to_file(config.MSVD_VID_IDS_TRAIN_PATH, train_ids)
    utils.write_list_to_file(config.MSVD_VID_IDS_VAL_PATH, val_ids)
    utils.write_list_to_file(config.MSVD_VID_IDS_TEST_PATH, test_ids)
    train_df = filter_df(csv_data, train_ids,
                         config.MSVD_FINAL_CORPUS_TRAIN_PATH)
    val_df = filter_df(csv_data, val_ids, config.MSVD_FINAL_CORPUS_VAL_PATH)
    test_df = filter_df(csv_data, test_ids, config.MSVD_FINAL_CORPUS_TEST_PATH)
    return train_df, val_df, test_df
def frames_to_feat(cnn, vid_ids_path, num_vids):
    if cnn == "ResNet50":
        model, height, width, preprocess_input = get_ResNet50_model()
        FEAT_DIM = config.RESNET_FEAT_DIM
    elif cnn == "ResNet152":
        model, height, width, preprocess_input = get_ResNet152_model()
        FEAT_DIM = config.RESNET_FEAT_DIM
    elif cnn == "InceptionV3":
        model, height, width, preprocess_input = get_InceptionV3_model()
        FEAT_DIM = config.INCEPTION_FEAT_DIM
    elif cnn == "VGG19":
        model, height, width, preprocess_input = get_VGG19_model()
        FEAT_DIM = config.VGG_FEAT_DIM
    else:
        raise NotImplementedError()

    feat_save_path = config.MSVD_FEATS_DIR + cnn + "/"
    print "saving feats to :", feat_save_path
    utils.create_dir_if_not_exist(feat_save_path)

    vid_ids = utils.read_file_to_list(vid_ids_path)
    vid_clips_list = [vid[:-4] for vid in vid_ids]
    assert len(vid_ids) == num_vids

    for vid in vid_clips_list:
        print("extracting features from : " + vid)
        vid_frames_dir = config.MSVD_FRAMES_DIR + "/" + vid
        frames_list = utils.read_dir(vid_frames_dir)
        n_frames = len(frames_list)
        if n_frames > config.MAX_FRAMES:
            n_frames = config.MAX_FRAMES
        selected_frames = extract_frames_equally_spaced(
            n_frames, config.FRAME_SPACING)
        vid_feats = np.empty((0, FEAT_DIM), dtype=np.float32)
        for fid in selected_frames:
            img_path = vid_frames_dir + "/frame" + str(fid) + ".jpg"
            # print("extracting features from : "+img_path)
            img_feat = img_to_feat(img_path, height, width, preprocess_input,
                                   model)
            vid_feats = np.vstack((vid_feats, img_feat))
        print(vid_feats.shape)
        np.save(feat_save_path + vid + ".npy", vid_feats)
示例#14
0
def feats_kmeans(cnn, vid_ids_path, num_vids, org_dim, k):

    feat_save_path = config.MSVD_FEATS_DIR + cnn + "_kmeans" + str(k) + "/"
    print "saving feats to :", feat_save_path
    utils.create_dir_if_not_exist(feat_save_path)

    vid_ids = utils.read_file_to_list(vid_ids_path)
    vid_clips_list = [vid[:-4] for vid in vid_ids]
    assert len(vid_ids) == num_vids

    for vid in vid_clips_list:
        # print("loading features from : "+vid)
        vid_feats_path = config.MSVD_FEATS_DIR + cnn + "/" + vid + ".npy"
        vid_feats = np.load(vid_feats_path)
        # print(vid_feats.shape)
        kmeans = KMeans(n_clusters=k, init='k-means++',
                        random_state=0).fit(vid_feats)
        vid_feat_kmeans = kmeans.cluster_centers_
        # print(vid_feat_kmeans.shape)
        np.save(feat_save_path + vid + ".npy", vid_feat_kmeans)
 def __fetch_permissions(self):
     """
     :return: 0 success 1 failure
     """
     self.permissions = []
     output_permissions_txt_path = os.path.join(self.dst_output_path,
                                                'permissions.txt')
     if not os.path.exists(output_permissions_txt_path):
         if extract_spec_list_from_file(
                 self.permissions, self.am_processed_path,
                 EXTRACT_SPECS['PERMISSION']) != STATUS_OK:
             return STATUS_ERR
         write_list_to_file(self.permissions, output_permissions_txt_path)
     if (not self.permissions) and read_file_to_list(
             self.permissions, output_permissions_txt_path) != STATUS_OK:
         return STATUS_ERR
     else:
         get_filtered_vector(self.feature_list, self.permissions,
                             CONSTANTS['PERMISSIONS_147']['REFERENCE_LIST'])
         return STATUS_OK
示例#16
0
def assemble(input_file):
    cleaned = [strip_comments(line).strip() for line in utils.read_file_to_list(input_file)]
    asm = [line for line in cleaned if line != ""]
    symtbl = SymbolTable(False)
    reltbl = SymbolTable(True)
    # Pass One
    intermediate, errors_one = pass_one(asm, symtbl)
    # Pass Two
    output, errors_two = pass_two(intermediate, symtbl, reltbl)

    if len(errors_one) > 0:
        print("Errors during pass one:")
        for line_num, e in errors_one:
            print("Error: line {0}: {1}".format(line_num, e))
    if len(errors_two) > 0:
        print("Errors during pass two:")
        for line_num, e in errors_two:
            print("Error: line {0}: {1}".format(line_num, e))
    if len(errors_one) > 0 or len(errors_two) > 0:
        print("One or more errors encountered during assembly operation")
    return intermediate, output
示例#17
0
def assemble(input_file):
    cleaned = [
        strip_comments(line).strip()
        for line in utils.read_file_to_list(input_file)
    ]
    asm = [line for line in cleaned if line != ""]
    symtbl = SymbolTable(False)
    reltbl = SymbolTable(True)
    # Pass One
    intermediate, errors_one = pass_one(asm, symtbl)
    # Pass Two
    output, errors_two = pass_two(intermediate, symtbl, reltbl)

    if len(errors_one) > 0:
        print("Errors during pass one:")
        for line_num, e in errors_one:
            print("Error: line {0}: {1}".format(line_num, e))
    if len(errors_two) > 0:
        print("Errors during pass two:")
        for line_num, e in errors_two:
            print("Error: line {0}: {1}".format(line_num, e))
    if len(errors_one) > 0 or len(errors_two) > 0:
        print("One or more errors encountered during assembly operation")
    return intermediate, output
示例#18
0
def main():
    parser = argparse.ArgumentParser(
        prog="mipsl",
        description='Link a MIPS program from multiple object files.')
    parser.add_argument("files",
                        action="store",
                        nargs="+",
                        type=str,
                        help="list of object files to process")
    parser.add_argument("-o",
                        action="store",
                        dest="out_name",
                        type=str,
                        default="mips.out",
                        help="override output file name",
                        metavar="file_name")
    args = parser.parse_args()

    obj_code = []
    for link_file in args.files:
        obj_code.append(
            [x.strip() for x in utils.read_file_to_list(link_file)])
    output = link(obj_code)
    utils.write_file_from_list(args.out_name, output)
示例#19
0
def total_fuel_from_mass(mass):
    fuel = fuel_from_mass(mass)

    if fuel <= 0:
        return 0
    else:
        return fuel + total_fuel_from_mass(fuel)


def sum_updated_list_of_mass_modules(mass_list):
    return reduce(lambda total, current: total + total_fuel_from_mass(current),
                  mass_list, 0)


if __name__ == '__main__':
    mass_modules = read_file_to_list("input/01.txt")
    print(sum_list_of_mass_modules(mass_modules))
    print(sum_updated_list_of_mass_modules(mass_modules))


class Test(unittest.TestCase):
    def test_setup_properly(self):
        self.assertEqual(2, 1 + 1)

    def test_mass_of_twelve_equals_fuel_of_two(self):
        fuel = fuel_from_mass(12)

        self.assertEqual(fuel, 2)

    def test_mass_of_ninteen_sixty_nine_equals_fuel_of_six_fifty_four(self):
        fuel = fuel_from_mass(1969)
示例#20
0
import utils

utils.getVersion()

input = utils.read_file_to_list("input.txt")

# print(input)

size = len(input)
utils.log.debug("len: {}".format(size))

seats = []
seats = [i for i in range(127 * 8 + 8)]
max_seat_id = 0
for i in range(0, size):
    seat_id = utils.get_seat_id(input[i])
    seats[seat_id] = 'X'
    if seat_id > max_seat_id:
        max_seat_id = seat_id

utils.log.info(
    "solution part 1 ==> max_seat_id: {}".format(max_seat_id))  # ==> 928

possible_seats = 0
my_seat = -1
for i in range(0, len(seats)):
    if 1 < i < len(seats):
        if seats[i - 1] == 'X' and seats[i + 1] == 'X' and seats[i] != 'X':
            utils.log.debug("empty seat: {}".format(seats[i]))
            my_seat = seats[i]
            possible_seats += 1
示例#21
0
            intcode[target] = intcode[augend] + intcode[addend]
        elif opcode == "MULTIPLICATION":
            multiplier = intcode[pointer + 1]
            multiplicand = intcode[pointer + 2]
            target = intcode[pointer + 3]

            intcode[target] = intcode[multiplier] * intcode[multiplicand]
        else:
            raise ValueError("opcode should be 1, 2 or 99")
        pointer += 4

    return intcode


if __name__ == "__main__":
    intcode = [int(x) for x in read_file_to_list("input/02.txt")[0].split(",")]

    # replace two positions with hardcoded data (via instructions)
    intcode[1] = 12
    intcode[2] = 2

    print(process_intcode(intcode))
    print(f"the value at position 0 after the program halts is: {intcode[0]}")

    intcode = [int(x) for x in read_file_to_list("input/02.txt")[0].split(",")]
    output_19690720 = find_output_19690720(intcode)
    print(f"100 * noun + verb = {100 * output_19690720[0] + output_19690720[1]}")


class Test(unittest.TestCase):
    def test_setup_properly(self):
示例#22
0
def gen_vocab(df, whichdata):
    if whichdata == "test":
        outfname = config.MURALI_MSVD_VID_CAPS_TEST_PATH
        dictsize = config.MURALI_TEST_VIDS
        capspath = config.MURALI_MSVD_CAPTIONS_TEST_PATH
    elif whichdata == "val":
        outfname = config.MURALI_MSVD_VID_CAPS_VAL_PATH
        dictsize = config.MURALI_VAL_VIDS
        capspath = None
        raise NotImplementedError()
    else:
        outfname = config.MURALI_MSVD_VID_CAPS_TRAIN_PATH
        dictsize = config.MURALI_TRAIN_VIDS
        capspath = config.MURALI_MSVD_CAPTIONS_TRAIN_PATH
    vocab = set()
    punct_dict = get_punctuations()
    translator = string.maketrans("", "")
    vid_caps_dict = {}
    omitted_caps = []
    for index in range(dictsize):
        vid_id = whichdata + "_" + str(index)
        descriptions = utils.read_file_to_list(capspath + str(index) +
                                               ".txt")[0].split("|")
        vid_caps = []
        for desc in descriptions:
            try:
                cap = desc.strip().encode('UTF-8')
                if len(cap) > 0:
                    vid_caps.append(cap)
            except Exception as e:
                # print vid_id, " : ", desc.strip()
                omitted_caps.append(vid_id + " : " + desc.strip())
        for vid_cap in vid_caps:
            tokens, _ = tokenize(vid_cap, punct_dict, translator)
            if (vid_id in vid_caps_dict):
                vid_caps_dict[vid_id].append(tokens)
            else:
                vid_caps_dict[vid_id] = [tokens]
            if whichdata == "train":
                vocab |= set(tokens)
    print("Non-ASCII captions omitted :" + str(len(omitted_caps)))
    utils.write_to_json(vid_caps_dict, outfname)
    print("Size of " + whichdata + " vid caps dict: " +
          str(len(vid_caps_dict)))
    assert len(vid_caps_dict) == dictsize
    if whichdata == "train":
        vocab_list = list(vocab)
        vocab_list.sort()
        vocab_dict = {
            vocab_list[index]: index + 2
            for index in range(len(vocab_list))
        }
        # vocab_dict['<bos>'] = 0
        vocab_dict['<eos>'] = 0
        vocab_dict['UNK'] = 1
        vocab_rev_dict = {
            index + 2: vocab_list[index]
            for index in range(len(vocab_list))
        }
        # vocab_rev_dict[0] = '<bos>'
        vocab_rev_dict[0] = '<eos>'
        vocab_rev_dict[1] = 'UNK'
        utils.write_to_json(vocab_dict, config.MURALI_MSVD_VOCAB_PATH)
        utils.write_to_pickle(vocab_rev_dict,
                              config.MURALI_MSVD_REVERSE_VOCAB_PATH)
        print("Size of Vocabulary: " + str(len(vocab)))
    return vocab, vid_caps_dict, omitted_caps
    def __fetch_function_call_graph(self):
        """

        :return:
        """
        """ 
        Deal with the component corresponding action
        Component names in the AndroidManifest file are either complete or incomplete. If there is only one word, it is considered incomplete
        In comp_dict, key is the class name,value is the action feature value
        All methods in the class inherit this action feature   
        """
        comp_dict = {}
        for comp_match in COMPONENT_PATTERN.finditer(self.am_content):
            action_list = []
            comp_action_features = []
            comp_detail = comp_match.group(0)
            comp_name = comp_match.group('compname')
            if comp_name.startswith('.'):
                comp_name = self.package_name + comp_name
            elif len(comp_name.split('.')) == 1:
                comp_name = self.package_name + '.' + comp_name
            class_path = join_class_path(comp_name)
            for action_match in INTENT_ACTION_PATTERN.finditer(comp_detail):
                action_list.append(action_match.group('action').split('.')[-1])
            get_filtered_vector(
                comp_action_features, action_list,
                CONSTANTS['INTENT_ACTIONS_126']['REFERENCE_LIST'])
            comp_dict[class_path] = np.array(comp_action_features,
                                             dtype=np.uint8,
                                             ndmin=2)

        output_func_call_pairs_txt_path = os.path.join(self.dst_output_path,
                                                       'func_call_pairs.txt')
        if not os.path.exists(output_func_call_pairs_txt_path):
            temp_dict = {}
            smali_search_result = glob.glob(os.path.join(
                self.smali_dir_path, "**\\*.smali"),
                                            recursive=True)
            for smali_file in smali_search_result:
                if extract_func_call_pairs_list_from_smali(
                        temp_dict, smali_file) != STATUS_OK:
                    print('extract func call pairs failed')
                    return STATUS_ERR
            self.func_call_pairs = list(temp_dict.keys())
            write_list_to_file(self.func_call_pairs,
                               output_func_call_pairs_txt_path)
            temp_dict.clear()
        if (not self.func_call_pairs) and read_file_to_list(
                self.func_call_pairs,
                output_func_call_pairs_txt_path) != STATUS_OK:
            return STATUS_ERR

        all_funcs_set = set()
        for call_pair in self.func_call_pairs:
            temp_list = call_pair.split(' ')
            if len(temp_list) == 3:
                all_funcs_set.add(temp_list[0])
                all_funcs_set.add(temp_list[2])
            elif len(temp_list) == 2:
                print('length 2 -> ' + ','.join(temp_list))
            elif len(temp_list) == 1:
                print('length 1 -> ' + ','.join(temp_list))
            elif len(temp_list) == 0:
                print('length 0')
            else:
                print('other length ' + str(len(temp_list)))

        # have a MainNode,,,
        self.nodes_num = len(list(all_funcs_set)) + 1
        if self.nodes_num > 30000:
            return STATUS_ERR
        all_funcs_set = None
        print('nodes num->', self.nodes_num)

        self.adj_matrix = np.zeros((self.nodes_num, self.nodes_num),
                                   dtype=np.uint8)
        self.node_features = np.zeros((self.nodes_num, 273), dtype=np.uint8)
        self.node_labels = []
        all_funcs = []
        api_lv_match = TARGET_SDK_VER_PATTERN.search(self.am_content)
        if not api_lv_match:
            api_lv_match = MIN_SDK_VER_PATTERN.search(self.am_content)
        if api_lv_match and int(api_lv_match.group('apilevel')) >= 16:
            self.api_level = api_lv_match.group('apilevel')

        # The construct mainNode is characterized by the entire app, and its tag is the tag of the app,malicious 10 benign 01
        all_funcs.append('MainNode')
        self.node_labels.append(
            [1, 0]) if self.is_malicious else self.node_labels.append([0, 1])
        self.adj_matrix[0] = np.ones((1, self.nodes_num), dtype=np.uint8)
        self.node_features[0] = np.array(self.feature_list,
                                         dtype=np.uint8)[0:273]

        for call_pair in self.func_call_pairs:
            temp_list = call_pair.split(' ')
            if len(temp_list) == 3:
                caller = temp_list[0]
                called = temp_list[2]
                """
                Extract by API
                """

                # row :caller| column :called
                caller_idx = self.__process_func(caller, all_funcs, comp_dict)
                called_idx = self.__process_func(called, all_funcs, comp_dict)
                self.adj_matrix[caller_idx, called_idx] = 1
            elif len(temp_list) == 2:
                print('length 2 -> ' + ','.join(temp_list))
            elif len(temp_list) == 1:
                print('length 1 -> ' + ','.join(temp_list))
            elif len(temp_list) == 0:
                print('length 0')
            else:
                print('other length ' + str(len(temp_list)))

        write_list_to_file(all_funcs,
                           os.path.join(self.dst_output_path, 'all_funcs.txt'))
        return STATUS_OK
    print('Loading model from checkpoint {}'.format(
        config.model.checkpoint_path))
    checkpoint = torch.load(config.model.checkpoint_path)
    embedding_size = checkpoint['embedding_size']

    # CUDA for PyTorch
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    model = models.load_model(config.model.model_arch,
                              device,
                              embedding_size=embedding_size)
    model.load_state_dict(checkpoint['model_state_dict'])

    filename = os.path.join(config.dataset.movie.dataset_path, 'bbx.txt')
    bbx_list = utils.read_file_to_list(filename)

    plotter = utils.VisdomPlotter(config.visdom.server,
                                  env_name='video_annotation',
                                  port=config.visdom.port)

    vd_utils.annotate_video(
        config.dataset.movie.movie_path,
        config.output.video_dir,
        model,
        device,
        max_frame=config.dataset.movie.num_frame,
        bbx_list=bbx_list,
        tracker_max_age=config.hyperparameters.tracker_max_age,
        plotter=plotter,
        name='base')
示例#25
0
def annotate_video(movie_file_path: str,
                   dataset_path: str,
                   output_path: str,
                   model: nn.Module,
                   device,
                   max_frame: int = 100000,
                   tracker_max_age: int = 10,
                   plotter: utils.plotter_utils.VisdomPlotter = None,
                   name: str = '',
                   compute_track_mean: bool = False):

    filename = os.path.join(dataset_path, 'bbx.txt')
    print('Getting annotations from {}'.format(filename))
    bbx_list = utils.read_file_to_list(filename)

    if bbx_list:
        bounding_boxes_list = bbx_list
    else:
        bounding_boxes_list = get_bounding_boxes(movie_file_path,
                                                 max_frame=max_frame,
                                                 tracker_max_age=tracker_max_age)

    print('Extracting ROI of the video.')
    cropped_image_list = get_cropped_images(movie_file_path,
                                            bounding_boxes_list,
                                            max_frame=max_frame)

    track_dict = get_track_dict(bounding_boxes_list)
    frame_dict = get_frame_dict(bounding_boxes_list)
    bbx_dict = get_bbx_dict(bounding_boxes_list)

    # Data transform
    data_transform = transforms.Compose([
        transforms.ToTensor()
    ])

    dataset = NumpyDataset(cropped_image_list,
                           transform=data_transform)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             num_workers=2,
                                             batch_size=100)

    print('Extracting features.')
    model = model.to(device)
    features = ml_utils.extract_features(dataloader,
                                         model,
                                         device)
    cluster_techniques_list = ['kmeans', 'spectral', 'hac']

    tsne_features, tsne_chosen_samples = projection_utils.tsne_projection(features)
    pca_features, pca_chosen_samples = projection_utils.pca_projection(features)

    # Frame level clustering
    print('Performing frame level clustering.')
    for cluster_method in cluster_techniques_list:
        cluster_name = '{}_frame_level_{}'.format(name, cluster_method)
        predictions, data_dict = clustering.cluster_techniques(features,
                                                     cluster_method,
                                                     max_clusters=10)

        write_video(movie_file_path,
                    output_path,
                    predictions,
                    frame_dict,
                    name=cluster_name,
                    max_frame=max_frame)

        plotter.scatter_plot(cluster_name + '_tsne',
                             tsne_features,
                             predictions[tsne_chosen_samples])
        plotter.scatter_plot(cluster_name + '_pca',
                             pca_features,
                             predictions[pca_chosen_samples])

    # Add ground truth if it exist
    gt_file_path = os.path.join(dataset_path, 'bbx_gt.txt')
    if os.path.isfile(gt_file_path):
        print('Creating ground truth video and plots.')
        bbx_to_gt_list = utils.read_file_to_list(gt_file_path)
        bbx_to_gt_dict = utils.list_to_dict(bbx_to_gt_list)

        groundtruth = []
        gt_to_idx_dict = {}
        bbx_count = 0
        for bbx in bounding_boxes_list:
            bbx_idx = bbx[2]
            gt = bbx_to_gt_dict[bbx_idx]
            if gt not in gt_to_idx_dict.keys():
                gt_to_idx_dict[gt] = bbx_count
                bbx_count += 1
            label = gt_to_idx_dict[gt]
            groundtruth.append(label)
        groundtruth = np.array(groundtruth)

        gt_name = '{}_gt'.format(name)
        write_video(movie_file_path,
                    output_path,
                    groundtruth,
                    frame_dict,
                    name=gt_name,
                    max_frame=max_frame)

        plotter.scatter_plot(gt_name + '_tsne',
                             tsne_features,
                             groundtruth[tsne_chosen_samples])
        plotter.scatter_plot(gt_name + '_pca',
                             pca_features,
                             groundtruth[pca_chosen_samples])

    # Track level clustering
    if compute_track_mean:
        print('Performing track level clustering.')

        mean_features = []
        track_to_idx_dict = {}
        for idx, track_idx in enumerate(track_dict.keys()):
            feature_track = features[track_dict[track_idx]]
            mean_features.append(np.mean(feature_track, axis=0))
            track_to_idx_dict[track_idx] = idx
        mean_features = np.asarray(mean_features)

        for cluster_method in cluster_techniques_list:
            cluster_name = '{}_track_level_{}'.format(name, cluster_method)
            mean_predictions, data_dict = clustering.cluster_techniques(mean_features,
                                                                        cluster_method,
                                                                        max_clusters=10)
            predictions = []
            for bbx_idx in bbx_dict.keys():
                track_idx = track_to_idx_dict[bbx_dict[bbx_idx][0]]
                predictions.append(mean_predictions[track_idx])
            predictions = np.array(predictions)

            write_video(movie_file_path,
                        output_path,
                        predictions,
                        frame_dict,
                        name=cluster_name,
                        max_frame=max_frame)

            plotter.scatter_plot(cluster_name + '_tsne',
                                 tsne_features,
                                 predictions[tsne_chosen_samples])
            plotter.scatter_plot(cluster_name + '_pca',
                                 pca_features,
                                 predictions[pca_chosen_samples])
示例#26
0

if __name__ == '__main__':
    has_ids_list = True
    print("removing empty lines in original corpus...")
    preproc_csv(config.MSVD_CSV_DATA_PATH, config.MSVD_PREPROC_CSV_DATA_PATH)
    print("loading proccessed corpus...")
    csv_data = utils.read_csv_data(config.MSVD_PREPROC_CSV_DATA_PATH)
    print("reading video clips ids...")
    if not has_ids_list:
        vid_ids_list = utils.read_dir_files(
            config.MSVD_VIDEO_DATA_PATH
        )  # read dataset vid ids from video clips directory
    else:
        vid_ids_list = utils.read_file_to_list(
            config.DATA_DIR +
            "present_vid_ids.txt")  # read dataset vid ids from text file
    assert len(vid_ids_list) == config.TOTAL_VIDS
    print("filtering clips in df...")
    present_vid_ids, missing_vid_ids, present_vid_ids_csv = filter_clips(
        csv_data, vid_ids_list)
    assert len(present_vid_ids) == config.TOTAL_VIDS
    print("saving filtered df...")
    df = clean_caps_df(csv_data, present_vid_ids, present_vid_ids_csv)
    print("loading final corpus...")
    csv_data = utils.read_csv_data(config.MSVD_FINAL_CORPUS_PATH)
    print("splitting corpus into train-val-test...")
    train_df, val_df, test_df = split_data(csv_data)
    print("generating vocab for train data...")
    vocab, _ = gen_vocab(train_df, "train")
    _, _ = gen_vocab(val_df, "val")