def extract_commands_table_response_pdf(self, file, short_name): out_table = None table_found = False while True: line = file.readline()[:-1] if line == "": continue if "Table" in line or "Page" in line: break result2 = re.search("([ ]*Type[ ]+Name[ ]+Description.*)", line) if result2: table_found = True row = result2.group(1) + "\n" results = re.split("[ ]{5,}", row) offsets = [] l = [] for r in results: r = r.strip() l.append(r) offsets.append(line.find(r)) out_table = data_structures.TPM2_Partx_Table( short_name, short_name, None, [l]) elif table_found: row = line + "\n" row = self.split_row(row, offsets) if row[0] is "": continue out_table.rows.append(row) return out_table
def extract_commands_table_command_pdf(self, file, command_short_name): in_table = None table_found = False result = re.search('TPM2_(' + command_short_name + ') Command\n', file) if result is not None: file.seek(result.end()) else: return while True: line = file.readline()[:-1] if line == "": continue if "Table" in line or "Page" in line: break result = re.search("([ ]*Type[ ]+Name[ ]+Description.*)", line) if result: table_found = True row = result.group(1) + "\n" results = re.split("[ ]{5,}", row) offsets = [] l = [] for r in results: r = r.strip() l.append(r) offsets.append(line.find(r)) in_table = data_structures.TPM2_Partx_Table( command_short_name, command_short_name, None, [l]) elif table_found: row = line + "\n" row = self.split_row(row, offsets) if row[0] is "": continue in_table.rows.append(row) return in_table
def extract_code_blocks(self, file, section_number, sub_section_number): code_blocks = data_structures.TPM2_Partx_File() ################################################################### # FUNCTIONS BLOCKS (START) code_found = False table_found = False code_offset = 0 while True: line = file.readline()[:-1] if line == "": continue # end of page, either break, or calculate new offsets if "Page" in line and "Family" in line: for i in range(0, 5): line = file.readline()[:-1] if re.search("Part[ ]+4:", line): code_offset = len( re.search("\f([ ]*).*", line).group(1)) if "Annex" in line: return code_blocks if line.strip().startswith(section_number + "." + str(int(sub_section_number) + 1)): break if (line.startswith(" ") and str(int(section_number) + 1) + " " in line.strip() and not section_number + "." + str(sub_section_number) in line): file.seek(file.tell() - len(line)) break result1 = re.search("^(\d{1,4}[ ]*)(.*)", line) if result1: code_found = True table_found = False if code_offset == 0: code_offset = len(result1.group(1)) code_line = line[code_offset:] code_blocks.append( data_structures.TPM2_Partx_CodeLine(code_line)) result2 = re.search("([ ]*Error Returns[ ]+Meaning.*)", line) if result2: table_found = True row = result2.group(1) + "\n" results = re.split("[ ]{5,}", row) offsets = [] l = [] for r in results: r = r.strip() l.append(r) offsets.append(line.find(r)) code_blocks.append( data_structures.TPM2_Partx_Table(None, None, None, l)) elif table_found: row = line + "\n" row = utils.split_row(row, offsets) code_blocks.elements[len(code_blocks.elements) - 1].append(row) elif line.strip().startswith(section_number + "." + str(int(sub_section_number) + 1)): break result2 = re.search("^[ ]{2,}(.*)", line) if not (table_found or code_found) and result2: code_blocks.append( data_structures.TPM2_Partx_CommentLine(result2.group(1))) if not (result1 or result2): break # FUNCTIONS BLOCKS (END) ################################################################### return code_blocks
def extract_code_blocks(self, entry, style): code_blocks = data_structures.TPM2_Partx_File() style_nr = int(re.search("([0-9]+)", style).group(1)) cont = ExtractionNavigator.selector(entry) while cont: # if the current entry is a text:p, table:table, or test:list with the current style, # append it to code blocks if isinstance(entry, Tag) and entry.name == constants.XML_TEXT_P: element = data_structures.TPM2_Partx_CommentLine( entry.get_text()) code_blocks.append(element) # once elif isinstance(entry, Tag) and entry.name == constants.XML_TABLE_TABLE: table_rows = [] rows = entry.find_all(constants.XML_TABLE_TABLE_ROW) for i in range(0, len(rows)): r = [] cells = rows[i].find_all(constants.XML_TABLE_TABLE_CELL) for cell in cells: r.append(cell.get_text()) table_rows.append(r) element = data_structures.TPM2_Partx_Table( "", "", 0, table_rows) code_blocks.append(element) # once elif isinstance(entry, Tag) and entry.name == constants.XML_TEXT_LIST\ and entry.has_attr(constants.XML_TEXT_STYLE_NAME) \ and entry[constants.XML_TEXT_STYLE_NAME] == style: text_ps = entry.findAll(constants.XML_TEXT_P) for text_p in text_ps: if not isinstance(text_p, Tag): break utils.convert_indentation(text_p) text_p_text = text_p.get_text() element = data_structures.TPM2_Partx_CodeLine(text_p_text) code_blocks.append(element) # for every code line # add an empty line for readability element = data_structures.TPM2_Partx_CodeLine("") code_blocks.append(element) # once next_list = entry current_style_nr = style_nr cont = False while current_style_nr - style_nr < 2 or current_style_nr - style_nr > 4: if next_list: next_list = next_list.next_sibling.next_sibling else: break if next_list and next_list.has_attr( constants.XML_TEXT_STYLE_NAME): current_style = next_list[constants.XML_TEXT_STYLE_NAME] result = re.search("WWNum([0-9]+)", current_style) if result and int(result.group(1)) > style_nr: current_style_nr = int(result.group(1)) if current_style == style: cont = True break entry = entry.next_sibling.next_sibling return code_blocks
def extract_tpm2_part3_command(self, function, style): command = data_structures.TPM2_Partx_File() entry = self.next_entry(function) """ distinguish between a text:list and a text:p tag: In case the tag is of type: text:list simply use its text:style-name as condition for further iteration (to find futher lines of the code). In case the tag is of type text:p, use its siblings text:style-name -- assuming its next_sibling is of type text:list. """ # extract the style if entry.name == constants.XML_TEXT_LIST: style = entry[constants.XML_TEXT_STYLE_NAME] if entry.name == constants.XML_TEXT_P: style = entry.next_sibling.next_sibling[ constants.XML_TEXT_STYLE_NAME] # entry tag is of type text:list, text:p or table:table continue_extraction = self.selector(entry) while continue_extraction: if entry.name == constants.XML_TEXT_P: element = data_structures.TPM2_Partx_CommentLine( entry.get_text()) command.append(element) # once elif entry.name == constants.XML_TABLE_TABLE: table_rows = [] rows = entry.find_all(constants.XML_TABLE_TABLE_ROW) for i in range(0, len(rows)): r = [] cells = rows[i].find_all(constants.XML_TABLE_TABLE_CELL) for cell in cells: r.append(cell.get_text()) table_rows.append(r) element = data_structures.TPM2_Partx_Table( "", "", 0, table_rows) command.append(element) # once elif entry.name == constants.XML_TEXT_LIST: text_ps = entry.findAll(constants.XML_TEXT_P) for text_p in text_ps: if not isinstance(text_p, Tag): break utils.convert_indentation(text_p) text_p_text = text_p.get_text() element = data_structures.TPM2_Partx_CodeLine(text_p_text) command.append(element) # for every code line command.append(data_structures.TPM2_Partx_CodeLine("")) entry = entry.find_next(self.selector) if not isinstance(entry, Tag): continue_extraction = False continue if entry.name == constants.XML_TEXT_P: continue_extraction = True elif entry.name == constants.XML_TEXT_LIST: continue_extraction = ( entry[constants.XML_TEXT_STYLE_NAME] == style) elif entry.name == constants.XML_TABLE_TABLE: next_list = entry.find_next(constants.XML_TEXT_LIST) continue_extraction = ( next_list[constants.XML_TEXT_STYLE_NAME] == style) else: continue_extraction = False return command