def extract_code_blocks(self, entry, style): code_blocks = data_structures.TPM2_Partx_File() style_nr = int(re.search("([0-9]+)", style).group(1)) cont = ExtractionNavigator.selector(entry) while cont: # if the current entry is a text:p, table:table, or test:list with the current style, # append it to code blocks if isinstance(entry, Tag) and entry.name == constants.XML_TEXT_P: element = data_structures.TPM2_Partx_CommentLine( entry.get_text()) code_blocks.append(element) # once elif isinstance(entry, Tag) and entry.name == constants.XML_TABLE_TABLE: table_rows = [] rows = entry.find_all(constants.XML_TABLE_TABLE_ROW) for i in range(0, len(rows)): r = [] cells = rows[i].find_all(constants.XML_TABLE_TABLE_CELL) for cell in cells: r.append(cell.get_text()) table_rows.append(r) element = data_structures.TPM2_Partx_Table( "", "", 0, table_rows) code_blocks.append(element) # once elif isinstance(entry, Tag) and entry.name == constants.XML_TEXT_LIST\ and entry.has_attr(constants.XML_TEXT_STYLE_NAME) \ and entry[constants.XML_TEXT_STYLE_NAME] == style: text_ps = entry.findAll(constants.XML_TEXT_P) for text_p in text_ps: if not isinstance(text_p, Tag): break utils.convert_indentation(text_p) text_p_text = text_p.get_text() element = data_structures.TPM2_Partx_CodeLine(text_p_text) code_blocks.append(element) # for every code line # add an empty line for readability element = data_structures.TPM2_Partx_CodeLine("") code_blocks.append(element) # once next_list = entry current_style_nr = style_nr cont = False while current_style_nr - style_nr < 2 or current_style_nr - style_nr > 4: if next_list: next_list = next_list.next_sibling.next_sibling else: break if next_list and next_list.has_attr( constants.XML_TEXT_STYLE_NAME): current_style = next_list[constants.XML_TEXT_STYLE_NAME] result = re.search("WWNum([0-9]+)", current_style) if result and int(result.group(1)) > style_nr: current_style_nr = int(result.group(1)) if current_style == style: cont = True break entry = entry.next_sibling.next_sibling return code_blocks
def extract_code_blocks(self, entry, style): code_blocks = data_structures.TPM2_Partx_File() style_nr = int(re.search("([0-9]+)", style).group(1)) cont = ExtractionNavigator.selector(entry) while cont: # if the current entry is a text:p, table:table, or test:list with the current style, # append it to code blocks if isinstance(entry, Tag) and entry.name == constants.XML_TEXT_P: element = data_structures.TPM2_Partx_CommentLine(entry.get_text()) code_blocks.append(element) # once elif isinstance(entry, Tag) and entry.name == constants.XML_TABLE_TABLE: table_rows = [] rows = entry.find_all(constants.XML_TABLE_TABLE_ROW) for i in range(0,len(rows)): r = [] cells = rows[i].find_all(constants.XML_TABLE_TABLE_CELL) for cell in cells: r.append(cell.get_text()) table_rows.append(r) element = data_structures.TPM2_Partx_Table("","", 0, table_rows) code_blocks.append(element) # once elif isinstance(entry, Tag) and entry.name == constants.XML_TEXT_LIST\ and entry.has_attr(constants.XML_TEXT_STYLE_NAME) \ and entry[constants.XML_TEXT_STYLE_NAME] == style: text_ps = entry.findAll(constants.XML_TEXT_P) for text_p in text_ps: if not isinstance(text_p, Tag): break utils.convert_indentation(text_p) text_p_text = text_p.get_text() element = data_structures.TPM2_Partx_CodeLine(text_p_text) code_blocks.append(element) # for every code line # add an empty line for readability element = data_structures.TPM2_Partx_CodeLine("") code_blocks.append(element) # once next_list = entry current_style_nr = style_nr cont = False while current_style_nr - style_nr < 2 or current_style_nr - style_nr > 4: if next_list: next_list = next_list.next_sibling.next_sibling else: break if next_list and next_list.has_attr(constants.XML_TEXT_STYLE_NAME): current_style = next_list[constants.XML_TEXT_STYLE_NAME] result = re.search("WWNum([0-9]+)", current_style) if result and int(result.group(1)) > style_nr: current_style_nr = int(result.group(1)) if current_style == style: cont = True break entry = entry.next_sibling.next_sibling return code_blocks
def extract_tpm2_part3_command(self, function, style): command = data_structures.TPM2_Partx_File() entry = self.next_entry(function) """ distinguish between a text:list and a text:p tag: In case the tag is of type: text:list simply use its text:style-name as condition for further iteration (to find futher lines of the code). In case the tag is of type text:p, use its siblings text:style-name -- assuming its next_sibling is of type text:list. """ # extract the style if entry.name == constants.XML_TEXT_LIST: style = entry[constants.XML_TEXT_STYLE_NAME] if entry.name == constants.XML_TEXT_P: style = entry.next_sibling.next_sibling[ constants.XML_TEXT_STYLE_NAME] # entry tag is of type text:list, text:p or table:table continue_extraction = self.selector(entry) while continue_extraction: if entry.name == constants.XML_TEXT_P: element = data_structures.TPM2_Partx_CommentLine( entry.get_text()) command.append(element) # once elif entry.name == constants.XML_TABLE_TABLE: table_rows = [] rows = entry.find_all(constants.XML_TABLE_TABLE_ROW) for i in range(0, len(rows)): r = [] cells = rows[i].find_all(constants.XML_TABLE_TABLE_CELL) for cell in cells: r.append(cell.get_text()) table_rows.append(r) element = data_structures.TPM2_Partx_Table( "", "", 0, table_rows) command.append(element) # once elif entry.name == constants.XML_TEXT_LIST: text_ps = entry.findAll(constants.XML_TEXT_P) for text_p in text_ps: if not isinstance(text_p, Tag): break utils.convert_indentation(text_p) text_p_text = text_p.get_text() element = data_structures.TPM2_Partx_CodeLine(text_p_text) command.append(element) # for every code line command.append(data_structures.TPM2_Partx_CodeLine("")) entry = entry.find_next(self.selector) if not isinstance(entry, Tag): continue_extraction = False continue if entry.name == constants.XML_TEXT_P: continue_extraction = True elif entry.name == constants.XML_TEXT_LIST: continue_extraction = ( entry[constants.XML_TEXT_STYLE_NAME] == style) elif entry.name == constants.XML_TABLE_TABLE: next_list = entry.find_next(constants.XML_TEXT_LIST) continue_extraction = ( next_list[constants.XML_TEXT_STYLE_NAME] == style) else: continue_extraction = False return command