def __handle_row_token(self, line): """ Requires: line -- line to parse Returns: ? Logic: the tokens in the row definition contain the following information: 1. row borders. 2. cell borders for all cells in the row. 3. cell positions for all cells in the row. Put all information about row borders into a row dictionary. Put all information about cell borders into into the dictionary in the last item in the cell list. ([{border:something, width:something}, {border:something, width:something}]) cw<bd<bor-t-r-to<nu<bdr-hair__|bdr-li-wid:0.50 """ if line[3:5] == 'bd': border_obj = border_parse.BorderParse() the_dict = border_obj.parse_border(line) keys = the_dict.keys() # border-cell-top-hairline in_cell = 0 for key in keys: if key[0:11] == 'border-cell': in_cell = 1 for key in keys: if in_cell: self.__cell_list[-1][key] = the_dict[key] else: self.__row_dict[key] = the_dict[key] # cw<tb<cell-posit<nu<216.00 elif self.__token_info == 'cw<tb<cell-posit': self.__found_cell_position(line) # cw<tb<row-pos-le<nu<-5.40 elif self.__token_info == 'cw<tb<row-pos-le': position = line[20:-1] self.__row_dict['left-row-position'] = position elif self.__token_info == 'cw<tb<row-header': self.__row_dict['header'] = 'true'
def __initiate_values(self): """ Initiate all values. """ # Dictionary needed to convert shortened style names to readable names self.__token_dict = { # paragraph formatting => pf 'par-end___': 'para', 'par-def___': 'paragraph-definition', 'keep-w-nex': 'keep-with-next', 'widow-cntl': 'widow-control', 'adjust-rgt': 'adjust-right', 'language__': 'language', 'right-inde': 'right-indent', 'fir-ln-ind': 'first-line-indent', 'left-inden': 'left-indent', 'space-befo': 'space-before', 'space-afte': 'space-after', 'line-space': 'line-spacing', 'default-ta': 'default-tab', 'align_____': 'align', 'widow-cntr': 'widow-control', # stylesheet = > ss 'style-shet': 'stylesheet', 'based-on__': 'based-on-style', 'next-style': 'next-style', 'char-style': 'character-style', # this is changed to get a nice attribute 'para-style': 'name', # graphics => gr 'picture___': 'pict', 'obj-class_': 'obj_class', 'mac-pic___': 'mac-pict', # section => sc 'section___': 'section-new', 'sect-defin': 'section-reset', 'sect-note_': 'endnotes-in-section', # list=> ls 'list-text_': 'list-text', # this line must be wrong because it duplicates an earlier one 'list-text_': 'list-text', 'list______': 'list', 'list-lev-d': 'list-level-definition', 'list-cardi': 'list-cardinal-numbering', 'list-decim': 'list-decimal-numbering', 'list-up-al': 'list-uppercase-alphabetic-numbering', 'list-up-ro': 'list-uppercae-roman-numbering', 'list-ord__': 'list-ordinal-numbering', 'list-ordte': 'list-ordinal-text-numbering', 'list-bulli': 'list-bullet', 'list-simpi': 'list-simple', 'list-conti': 'list-continue', 'list-hang_': 'list-hang', # 'list-tebef' : 'list-text-before', 'list-level': 'level', 'list-id___': 'list-id', 'list-start': 'list-start', 'nest-level': 'nest-level', # duplicate 'list-level': 'list-level', # notes => nt 'footnote__': 'footnote', 'type______': 'type', # anchor => an 'toc_______': 'anchor-toc', 'book-mk-st': 'bookmark-start', 'book-mk-en': 'bookmark-end', 'index-mark': 'anchor-index', 'place_____': 'place', # field => fd 'field_____': 'field', 'field-inst': 'field-instruction', 'field-rslt': 'field-result', 'datafield_': 'data-field', # info-tables => it 'font-table': 'font-table', 'colr-table': 'color-table', 'lovr-table': 'list-override-table', 'listtable_': 'list-table', 'revi-table': 'revision-table', # character info => ci 'hidden____': 'hidden', 'italics___': 'italics', 'bold______': 'bold', 'strike-thr': 'strike-through', 'shadow____': 'shadow', 'outline___': 'outline', 'small-caps': 'small-caps', 'caps______': 'caps', 'dbl-strike': 'double-strike-through', 'emboss____': 'emboss', 'engrave___': 'engrave', 'subscript_': 'subscript', 'superscrip': 'superscipt', 'font-style': 'font-style', 'font-color': 'font-color', 'font-size_': 'font-size', 'font-up___': 'superscript', 'font-down_': 'subscript', 'red_______': 'red', 'blue______': 'blue', 'green_____': 'green', # table => tb 'row-def___': 'row-definition', 'cell______': 'cell', 'row_______': 'row', 'in-table__': 'in-table', 'columns___': 'columns', 'row-pos-le': 'row-position-left', 'cell-posit': 'cell-position', # preamble => pr # underline 'underlined': 'underlined', # border => bd 'bor-t-r-hi': 'border-table-row-horizontal-inside', 'bor-t-r-vi': 'border-table-row-vertical-inside', 'bor-t-r-to': 'border-table-row-top', 'bor-t-r-le': 'border-table-row-left', 'bor-t-r-bo': 'border-table-row-bottom', 'bor-t-r-ri': 'border-table-row-right', 'bor-cel-bo': 'border-cell-bottom', 'bor-cel-to': 'border-cell-top', 'bor-cel-le': 'border-cell-left', 'bor-cel-ri': 'border-cell-right', 'bor-par-bo': 'border-paragraph-bottom', 'bor-par-to': 'border-paragraph-top', 'bor-par-le': 'border-paragraph-left', 'bor-par-ri': 'border-paragraph-right', 'bor-par-bo': 'border-paragraph-box', 'bor-for-ev': 'border-for-every-paragraph', 'bor-outsid': 'border-outisde', 'bor-none__': 'border', # border type => bt 'bdr-single': 'single', 'bdr-doubtb': 'double-thickness-border', 'bdr-shadow': 'shadowed-border', 'bdr-double': 'double-border', 'bdr-dotted': 'dotted-border', 'bdr-dashed': 'dashed', 'bdr-hair__': 'hairline', 'bdr-inset_': 'inset', 'bdr-das-sm': 'dash-small', 'bdr-dot-sm': 'dot-dash', 'bdr-dot-do': 'dot-dot-dash', 'bdr-outset': 'outset', 'bdr-trippl': 'tripple', 'bdr-thsm__': 'thick-thin-small', 'bdr-htsm__': 'thin-thick-small', 'bdr-hthsm_': 'thin-thick-thin-small', 'bdr-thm__': 'thick-thin-medium', 'bdr-htm__': 'thin-thick-medium', 'bdr-hthm_': 'thin-thick-thin-medium', 'bdr-thl__': 'thick-thin-large', 'bdr-hthl_': 'think-thick-think-large', 'bdr-wavy_': 'wavy', 'bdr-d-wav': 'double-wavy', 'bdr-strip': 'striped', 'bdr-embos': 'emboss', 'bdr-engra': 'engrave', 'bdr-frame': 'frame', 'bdr-li-wid': 'line-width', } self.__tabs_dict = { 'cw<pf<tab-stop__': self.__tab_stop_func, 'cw<pf<tab-center': self.__tab_type_func, 'cw<pf<tab-right_': self.__tab_type_func, 'cw<pf<tab-dec___': self.__tab_type_func, 'cw<pf<leader-dot': self.__tab_leader_func, 'cw<pf<leader-hyp': self.__tab_leader_func, 'cw<pf<leader-und': self.__tab_leader_func, 'cw<pf<tab-bar-st': self.__tab_bar_func, } self.__tab_type_dict = { 'cw<pf<tab-center': 'center', 'cw<pf<tab-right_': 'right', 'cw<pf<tab-dec___': 'decimal', 'cw<pf<leader-dot': 'leader-dot', 'cw<pf<leader-hyp': 'leader-hyphen', 'cw<pf<leader-und': 'leader-underline', } self.__border_obj = border_parse.BorderParse() self.__style_num_strings = [] self.__body_style_strings = [] self.__state = 'before_1st_para_def' self.__att_val_dict = {} self.__start_marker = 'mi<mk<pard-start\n' # outside para tags self.__start2_marker = 'mi<mk<pardstart_\n' # inside para tags self.__end2_marker = 'mi<mk<pardend___\n' # inside para tags self.__end_marker = 'mi<mk<pard-end__\n' # outside para tags self.__text_string = '' self.__state_dict = { 'before_1st_para_def': self.__before_1st_para_def_func, 'collect_tokens': self.__collect_tokens_func, 'after_para_def': self.__after_para_def_func, 'in_paragraphs': self.__in_paragraphs_func, 'after_para_end': self.__after_para_end_func, } self.__collect_tokens_dict = { 'mi<mk<para-start': self.__end_para_def_func, 'cw<pf<par-def___': self.__para_def_in_para_def_func, 'cw<tb<cell______': self.__empty_table_element_func, 'cw<tb<row_______': self.__empty_table_element_func, } self.__after_para_def_dict = { 'mi<mk<para-start': self.__start_para_after_def_func, 'cw<pf<par-def___': self.__found_para_def_func, 'cw<tb<cell______': self.__empty_table_element_func, 'cw<tb<row_______': self.__empty_table_element_func, } self.__in_paragraphs_dict = { 'mi<mk<para-end__': self.__found_para_end_func, } self.__after_para_end_dict = { 'mi<mk<para-start': self.__continue_block_func, 'mi<mk<para-end__': self.__continue_block_func, 'cw<pf<par-def___': self.__new_para_def_func, 'mi<mk<body-close': self.__stop_block_func, 'mi<mk<par-in-fld': self.__stop_block_func, 'cw<tb<cell______': self.__stop_block_func, 'cw<tb<row-def___': self.__stop_block_func, 'cw<tb<row_______': self.__stop_block_func, 'mi<mk<sect-close': self.__stop_block_func, 'mi<mk<sect-start': self.__stop_block_func, 'mi<mk<header-beg': self.__stop_block_func, 'mi<mk<header-end': self.__stop_block_func, 'mi<mk<head___clo': self.__stop_block_func, 'mi<mk<fldbk-end_': self.__stop_block_func, 'mi<mk<lst-txbeg_': self.__stop_block_func, }
def __initiate_values(self): """ Initiate all values. """ self.__border_obj = border_parse.BorderParse() self.__styles_dict = {'par': {}, 'char': {}} self.__styles_num = '0' self.__type_of_style = 'par' self.__text_string = '' self.__state = 'before_styles_table' self.__state_dict = { 'before_styles_table': self.__before_styles_func, 'in_styles_table': self.__in_styles_func, 'in_individual_style': self.__in_individual_style_func, 'after_styles_table': self.__after_styles_func, 'mi<mk<styles-beg': self.__found_styles_table_func, 'mi<mk<styles-end': self.__found_end_styles_table_func, 'mi<mk<stylei-beg': self.__found_beg_ind_style_func, 'mi<mk<stylei-end': self.__found_end_ind_style_func, 'cw<ss<para-style': self.__para_style_func, 'cw<ss<char-style': self.__char_style_func, } # A separate dictionary for parsing the body text self.__body_dict = { 'cw<ss<para-style': (self.__para_style_in_body_func, 'par'), 'cw<ss<char-style': (self.__para_style_in_body_func, 'char'), } # Dictionary needed to convert shortened style names to readable names self.__token_dict = { # paragraph formatting => pf 'par-end___': 'para', 'par-def___': 'paragraph-definition', 'keep-w-nex': 'keep-with-next', 'widow-cntl': 'widow-control', 'adjust-rgt': 'adjust-right', 'language__': 'language', 'right-inde': 'right-indent', 'fir-ln-ind': 'first-line-indent', 'left-inden': 'left-indent', 'space-befo': 'space-before', 'space-afte': 'space-after', 'line-space': 'line-spacing', 'default-ta': 'default-tab', 'align_____': 'align', 'widow-cntr': 'widow-control', # page formatting mixed in! (Just in older RTF?) 'margin-lef': 'left-indent', 'margin-rig': 'right-indent', 'margin-bot': 'space-after', 'margin-top': 'space-before', # stylesheet = > ss 'style-shet': 'stylesheet', 'based-on__': 'based-on-style', 'next-style': 'next-style', 'char-style': 'character-style', 'para-style': 'paragraph-style', # graphics => gr 'picture___': 'pict', 'obj-class_': 'obj_class', 'mac-pic___': 'mac-pict', # section => sc 'section___': 'section-new', 'sect-defin': 'section-reset', 'sect-note_': 'endnotes-in-section', # list=> ls 'list-text_': 'list-text', 'list______': 'list', 'list-lev-d': 'list-level-definition', 'list-cardi': 'list-cardinal-numbering', 'list-decim': 'list-decimal-numbering', 'list-up-al': 'list-uppercase-alphabetic-numbering', 'list-up-ro': 'list-uppercae-roman-numbering', 'list-ord__': 'list-ordinal-numbering', 'list-ordte': 'list-ordinal-text-numbering', 'list-bulli': 'list-bullet', 'list-simpi': 'list-simple', 'list-conti': 'list-continue', 'list-hang_': 'list-hang', # 'list-tebef' : 'list-text-before', # 'list-level' : 'level', 'list-id___': 'list-id', 'list-start': 'list-start', 'nest-level': 'nest-level', # duplicate 'list-level': 'list-level', # notes => nt 'footnote__': 'footnote', 'type______': 'type', # anchor => an 'toc_______': 'anchor-toc', 'book-mk-st': 'bookmark-start', 'book-mk-en': 'bookmark-end', 'index-mark': 'anchor-index', 'place_____': 'place', # field => fd 'field_____': 'field', 'field-inst': 'field-instruction', 'field-rslt': 'field-result', 'datafield_': 'data-field', # info-tables => it 'font-table': 'font-table', 'colr-table': 'color-table', 'lovr-table': 'list-override-table', 'listtable_': 'list-table', 'revi-table': 'revision-table', # character info => ci 'hidden____': 'hidden', 'italics___': 'italics', 'bold______': 'bold', 'strike-thr': 'strike-through', 'shadow____': 'shadow', 'outline___': 'outline', 'small-caps': 'small-caps', 'dbl-strike': 'double-strike-through', 'emboss____': 'emboss', 'engrave___': 'engrave', 'subscript_': 'subscript', 'superscrip': 'superscript', 'plain_____': 'plain', 'font-style': 'font-style', 'font-color': 'font-color', 'font-size_': 'font-size', 'font-up___': 'superscript', 'font-down_': 'subscript', 'red_______': 'red', 'blue______': 'blue', 'green_____': 'green', 'caps______': 'caps', # table => tb 'row-def___': 'row-definition', 'cell______': 'cell', 'row_______': 'row', 'in-table__': 'in-table', 'columns___': 'columns', 'row-pos-le': 'row-position-left', 'cell-posit': 'cell-position', # preamble => pr # underline 'underlined': 'underlined', # border => bd 'bor-t-r-hi': 'border-table-row-horizontal-inside', 'bor-t-r-vi': 'border-table-row-vertical-inside', 'bor-t-r-to': 'border-table-row-top', 'bor-t-r-le': 'border-table-row-left', 'bor-t-r-bo': 'border-table-row-bottom', 'bor-t-r-ri': 'border-table-row-right', 'bor-cel-bo': 'border-cell-bottom', 'bor-cel-to': 'border-cell-top', 'bor-cel-le': 'border-cell-left', 'bor-cel-ri': 'border-cell-right', # 'bor-par-bo' : 'border-paragraph-bottom', 'bor-par-to': 'border-paragraph-top', 'bor-par-le': 'border-paragraph-left', 'bor-par-ri': 'border-paragraph-right', 'bor-par-bo': 'border-paragraph-box', 'bor-for-ev': 'border-for-every-paragraph', 'bor-outsid': 'border-outisde', 'bor-none__': 'border', # border type => bt 'bdr-single': 'single', 'bdr-doubtb': 'double-thickness-border', 'bdr-shadow': 'shadowed-border', 'bdr-double': 'double-border', 'bdr-dotted': 'dotted-border', 'bdr-dashed': 'dashed', 'bdr-hair__': 'hairline', 'bdr-inset_': 'inset', 'bdr-das-sm': 'dash-small', 'bdr-dot-sm': 'dot-dash', 'bdr-dot-do': 'dot-dot-dash', 'bdr-outset': 'outset', 'bdr-trippl': 'tripple', 'bdr-thsm__': 'thick-thin-small', 'bdr-htsm__': 'thin-thick-small', 'bdr-hthsm_': 'thin-thick-thin-small', 'bdr-thm__': 'thick-thin-medium', 'bdr-htm__': 'thin-thick-medium', 'bdr-hthm_': 'thin-thick-thin-medium', 'bdr-thl__': 'thick-thin-large', 'bdr-hthl_': 'think-thick-think-large', 'bdr-wavy_': 'wavy', 'bdr-d-wav': 'double-wavy', 'bdr-strip': 'striped', 'bdr-embos': 'emboss', 'bdr-engra': 'engrave', 'bdr-frame': 'frame', 'bdr-li-wid': 'line-width', # tabs 'tab-center': 'center', 'tab-right_': 'right', 'tab-dec___': 'decimal', 'leader-dot': 'leader-dot', 'leader-hyp': 'leader-hyphen', 'leader-und': 'leader-underline', } self.__tabs_dict = { 'cw<pf<tab-stop__': self.__tab_stop_func, 'cw<pf<tab-center': self.__tab_type_func, 'cw<pf<tab-right_': self.__tab_type_func, 'cw<pf<tab-dec___': self.__tab_type_func, 'cw<pf<leader-dot': self.__tab_leader_func, 'cw<pf<leader-hyp': self.__tab_leader_func, 'cw<pf<leader-und': self.__tab_leader_func, 'cw<pf<tab-bar-st': self.__tab_bar_func, } self.__tab_type_dict = { 'cw<pf<tab-center': 'center', 'cw<pf<tab-right_': 'right', 'cw<pf<tab-dec___': 'decimal', 'cw<pf<leader-dot': 'leader-dot', 'cw<pf<leader-hyp': 'leader-hyphen', 'cw<pf<leader-und': 'leader-underline', } self.__ignore_list = [ 'list-tebef', ] self.__tabs_list = self.__tabs_dict.keys() self.__tab_type = 'left' self.__leader_found = 0