def decode(self, img, first_strip=False): softstrip_matrix = SoftstripMatrix(img, self.gray_img) header_extractor = HeaderExtractor(softstrip_matrix) header_extractor.parse_header() vertical_sync_start = header_extractor.vertical_sync_start self.bits_count = header_extractor.get_bits_per_row() if self.config['row_extractor'] == CNN_ROW_EXTRACTOR: row_extractor = CnnRowExtractor(softstrip_matrix.grayscale_matrix, softstrip_matrix.binary_matrix, self.bits_count) gray_grouped_matrix, grouped_matrix = row_extractor.extract_rows() else: row_extractor = AlgorithmicRowExtractor(softstrip_matrix, self.bits_count) grouped_matrix, gray_grouped_matrix = row_extractor.extract_rows() if self.config['row_decoder'] == CNN_ROW_DECODER: row_decoder = CnnRowDecoder(gray_grouped_matrix, self.start_time, self.bits_count, self.config['timeout'], vertical_sync_start) reduced_pixel_matrix = row_decoder.decode_rows() else: row_decoder = AlgorithmicRowDecoder(grouped_matrix, self.bits_count, self.start_time, self.config['timeout']) reduced_pixel_matrix = row_decoder.decode_rows() if len(reduced_pixel_matrix) == 0: print('[ERROR] ' + self.path + ' is invalid!') else: data_extractor = DataExtractor(self.config['timeout']) data_extractor.extract_data(reduced_pixel_matrix, first_strip, self.start_time) self.data += data_extractor.data if data_extractor.valid: print('Checksum valid!') if first_strip: self.strip_meta_info = data_extractor.file_header print(self.strip_meta_info) else: print('Checksum invalid!')
def get_arguments(argument_list): short_options = "d:" long_options = ["document="] try: document_file = '' arguments, values = getopt.getopt(argument_list, short_options, long_options) print(arguments) if len(arguments) < 1: print("Invalid arguments") sys.exit(2) for t in arguments: if t[0] in ("-d", "--document"): document_file = t[1] print(document_file) return document_file except getopt.error as err: print(str(err)) sys.exit(2) if __name__ == '__main__': wiki_10_file = get_arguments(sys.argv[1:]) inverted_index = InvertedIndex() data_extractor = DataExtractor() structure_file_name = data_extractor.extract_data(wiki_10_file) index_file, vector_file = inverted_index.build_term_index( structure_file_name, wiki_10_file) print("Index file name: ", index_file) print("Vector file name: ", vector_file)