Пример #1
0
    def _parse_from_voc(self):
        # get all the files names from the voc_imageset_text_path
        filenames_list = get_file_lines(self.voc_image_set_path)
        #for each filename from the image set we need to get the annotations
        for filename in filenames_list:
            # get the path of the annotation file
            annotation_file = self._get_img_detection_filepath(
                self.voc_annon_path,
                filename.partition(' ')[0])
            # tree of the xml
            tree = ElementTree.parse(annotation_file)
            # get the root element
            root_node = tree.getroot()
            # get file name
            img_filename = root_node.find('filename').text
            img_full_path = self._get_img_filepath(filename.partition(' ')[0])
            # get the size of the image from the annotation xml file
            width, height = self._get_img_size(root_node)

            #get the the list of all object trees from the annotation xml
            object_tree_list = root_node.findall('object')
            if len(object_tree_list) > 1:
                continue
            #for each object tree
            for object_annotation in object_tree_list:
                # create a dictionary with all the information
                # {img,img_full_path,width,height,class_name,xmin,ymin,xmax,ymax}
                row_dictionary = {}

                class_name = self._get_annotation_classname(object_annotation)
                img_foldername = object_annotation.find('name').text
                obj_bbox = object_annotation.find('bndbox')
                xmin, ymin, xmax, ymax = self._get_annotation_bbox(obj_bbox)

                # now that we have all the information from an annotation bbox
                # create a dict to be inserted in the final result
                row_dictionary.update({
                    'filename': img_filename,
                    'foldername': img_foldername,
                    'img_full_path': img_full_path,
                    'width': width,
                    'height': height,
                    'class_name': class_name,
                    'xmin': xmin,
                    'ymin': ymin,
                    'xmax': xmax,
                    'ymax': ymax
                })
                self._annotation_line_list.append(row_dictionary)
Пример #2
0
def part1():
    diffs = []
    lines = get_file_lines(input_path)
    adapters = sorted(map(int, lines))
    joltage = 0
    for adapter in adapters:
        diffs.append(adapter - joltage)
        joltage = adapter

    diffs.append(3)
    ones = len([a for a in diffs if a == 1])
    threes = len([a for a in diffs if a == 3])
    print(ones, threes)
    return ones * threes

    raise Exception('No answer found')
Пример #3
0
def part2():
    lines = get_file_lines(input_path)
    grid = [list(l) for l in lines]
    prev_grid = []
    while str(grid) != str(prev_grid): 
        prev_grid = deepcopy(grid)
        for y in range(len(grid)):
            for x in range(len(grid[y])):
                seat = prev_grid[y][x]
                if seat == '.':
                    continue
                elif seat == 'L' and get_occupied_adj_count2(x, y, prev_grid) == 0:
                    grid[y][x] = '#'
                elif seat == '#' and get_occupied_adj_count2(x, y, prev_grid) >= 5:
                    grid[y][x] = 'L'
        print_grid(grid)
    return count_occupied_seats(grid)
Пример #4
0
def part1():
    lines = get_file_lines(input_path)
    lines = map(int, lines)

    # PREAMBLE_SIZE = 5
    PREAMBLE_SIZE = 25
    preamble = []
    for index, line in enumerate(lines):
        if index < PREAMBLE_SIZE:
            preamble.append(line)
        elif is_sum_of_two(line, preamble):
            preamble.append(line)
            preamble.pop(0)
        else:
            return line

    raise Exception('No answer found')
Пример #5
0
def part2():
    lines = get_file_lines(input_path)
    adapters = sorted(map(int, lines))
    diffs = []
    joltage = 0
    for adapter in adapters:
        diffs.append(adapter - joltage)
        joltage = adapter
    diffs.append(3)

    total = 1
    cur_ones_group_length = 0
    for diff in diffs:
        if cur_ones_group_length > 0 and diff == 3:
            total *= get_permutations(cur_ones_group_length)
            cur_ones_group_length = 0
        elif diff == 1:
            cur_ones_group_length += 1

    return total

    raise Exception('No answer found')
Пример #6
0
def part2():
    lines = get_file_lines(input_path)
    lines = list(map(int, lines))

    # GOAL = 127
    GOAL = 542529149

    # Old solution
    # end = 1
    # for start in range(len(lines)):
    #     while sum(lines[start:end]) < GOAL:
    #         end += 1
    #     if sum(lines[start:end]) == GOAL:
    #         found = lines[start:end]
    #         return min(found) + max(found)

    # Better solution
    start, end = 0, 0
    while (total := sum(lines[start:end])) != GOAL:
        if total < GOAL:
            end += 1
        else:
            start += 1
Пример #7
0
def main(argv, classifier_name):
    # Command line args
    cargs = build_parser().parse_args(argv[1:])
    cargs = vars(cargs)  # Convert to dict

    # Logging
    setup_logging(-1 if cargs.pop('quiet') else cargs.pop('verbose'))
    logging.debug('Parsed args: {}'.format(cargs))

    # Classifier
    # noinspection PyPep8Naming
    ClassifierClass, clf_params = get_classifier_class(classifier_name)

    source = cargs.pop('source')
    mode = cargs.pop('mode')
    stats = cargs.pop('stats', False)

    feature_include_file = cargs.pop('feature_include_file', None)
    feature_exclude_file = cargs.pop('feature_exclude_file', None)
    if feature_include_file is not None:
        clf_params['feature_include_patterns'] = get_file_lines(feature_include_file, verbatim=True, no_empty=True)
    if feature_exclude_file is not None:
        clf_params['feature_exclude_patterns'] = get_file_lines(feature_exclude_file, verbatim=True, no_empty=True)

    loading_params = dict(  # Really these shouldn't be present if we are using the matrix, but what can you do?
        extract_features=cargs.pop('extract_features'),
        max_filesize=cargs.pop('max_filesize', None),
    )

    # Call appropriate function by mode
    if mode == FIT_MODE:
        # Feature weights
        feature_weights_file = cargs.pop('feature_weight_file', None)
        feature_weights = get_weights_from_file(feature_weights_file) if feature_weights_file else None

        # Imps
        imps_file = cargs.pop('imps_file', None)
        if imps_file:
            clf_params['imps_dict'] = get_imps_from_file(imps_file)

        # Classifier
        ml = ClassifierClass(**clf_params)
        if source == FEATURE_SOURCE:
            reports = load_goodware_malware(cargs.pop('goodware'), cargs.pop('malware'), **loading_params)
            feature_matrix, truths = ml.fit_transform_feature_matrix(reports), ml.get_truths(reports)
            feature_matrix = ml.filter_feature_matrix(feature_matrix)
        elif source == MATRIX_SOURCE:
            feature_matrix, truths, reports = load_matrix_dir(cargs.pop('matrix_dir'), ml)
            feature_matrix = ml.filter_feature_matrix(feature_matrix)
        else:
            raise NotImplementedError

        matrix_length = os.environ.pop('MML_MATRIX_LENGTH', None)
        if matrix_length:
            matrix_length = int(matrix_length)

            # find n samples from each category
            good_matching = np.where(truths==CATEGORY_BENIGN)[0][:matrix_length]
            mal_matching = np.where(truths==CATEGORY_MALICIOUS)[0][:matrix_length]
            matching = np.concatenate((good_matching, mal_matching))
            feature_matrix = feature_matrix[matching]
            truths = truths[matching]

        # Go!
        cargs.pop('func')(ml, reports, feature_matrix, truths, feature_weights=feature_weights, **cargs)

    elif mode == PREDICT_MODE:
        ml = ClassifierClass(**clf_params)
        files = list(expand_dirs(cargs.pop('files')))
        reports = load_files(files, truths=None, **loading_params)
        cargs.pop('func')(ml, reports, **cargs)

    else:
        raise NotImplementedError

    # Output some statistics
    if stats:
        # TODO add output directory option
        ml.output_feature_stats(reports)

    logging.info('Done')