def test_136_v3(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '136_v3.html') meeting = html_parser.tdocs_by_agenda(file_name, v=2) self.assertEqual(meeting.meeting_number, '136', 'Expected 136') self.assertEqual(len(meeting.tdocs), 1815, 'Expected TDoc entries')
def test_exported_dtdocs_by_agenda_v2(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '2019.01.24 TdocsByAgenda.htm') meeting = html_parser.tdocs_by_agenda(file_name, v=2) self.assertEqual(meeting.meeting_number, '130', 'Expected 130') self.assertEqual(len(meeting.tdocs), 1026, 'Expected TDoc entries')
def get_tdocs_of_selected_meeting(self): selected_meeting = gui.main.tkvar_meeting.get() meeting_folder = application.sa2_meeting_data.get_server_folder_for_meeting_choice( selected_meeting) local_agenda_file = gui.main.get_tdocs_by_agenda_file_or_url( server.get_local_tdocs_by_agenda_filename(meeting_folder)) tdocs_df = html_parser.tdocs_by_agenda(local_agenda_file).tdocs return tdocs_df
def test_exported_dtdocs_by_agenda_134_v2(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '134.htm') meeting = html_parser.tdocs_by_agenda(file_name, v=2) self.assertEqual(meeting.meeting_number, '134', 'Expected 134') self.assertEqual(len(meeting.tdocs), 1802, 'Expected TDoc entries') test_row = meeting.tdocs.loc['S2-1908578', :] self.assertEqual(test_row['Revision of'], 'S2-1908544', 'Expected S2-1908544')
def test_137e_v2(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '2020.02.29 TdocsByAgenda SA2-137E.htm') meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, '137', 'Expected 137') self.assertEqual(len(meeting.tdocs), 713, 'Expected TDoc entries') test_row = meeting.tdocs.loc['S2-2001981', :] self.assertEqual(test_row['AI'], '5.4', 'Expected AI 5.4')
def test_inbox(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', 'inbox.htm') meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, '129BIS', 'Expected 129BIS') self.assertEqual(len(meeting.tdocs), 1640, 'Expected TDoc entries') # Check the length of the "Others" mapping self.assertEqual(len(meeting.others_cosigners), 16, 'Length of the "Others" contributors')
def test_test_df_by_wi_sa134(self): html = '134-2.html' file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', html) tdocs_by_agenda = html_parser.tdocs_by_agenda(file_name) df = tdocs_by_agenda.tdocs doc = word_parser.open_word_document() word_parser.insert_doc_data_to_doc_by_wi(df, doc, 'TSGS2_134_Sapporo', source='DT')
def test_130_2(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '2019.01.24 TdocsByAgenda.htm') meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, '130', 'Expected 130') self.assertEqual(len(meeting.tdocs), 1026, 'Expected TDoc entries') # Check the length of the "Others" mapping self.assertEqual(len(meeting.others_cosigners), 19, 'Length of the "Others" contributors')
def test_corrupt_dtdocs_by_agenda(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '2019.07.02 TDocsByAgenda_wrong.html') datetime = html_parser.tdocs_by_agenda.get_tdoc_by_agenda_date( file_name) self.assertEqual(datetime.year, 2019) self.assertEqual(datetime.month, 6) self.assertEqual(datetime.day, 28) self.assertEqual(datetime.hour, 16) self.assertEqual(datetime.minute, 36) meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, 'Unknown', 'Expected Unknown') self.assertEqual(len(meeting.tdocs), 1, 'Expected TDoc entries')
def test_test_df_by_wi_sa134_template(self): html = '134-2.html' file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', html) template = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'reports', 'Report_3GPP_DT_template.docx') tdocs_by_agenda = html_parser.tdocs_by_agenda(file_name) df = tdocs_by_agenda.tdocs doc = word_parser.open_word_document(filename=template) word_parser.insert_doc_data_to_doc_by_wi( df, doc, 'TSGS2_134_Sapporo', source='DT', save_to_folder=os.path.dirname(os.path.realpath(__file__)))
def test_test_df_by_wi_sa134_broken_formatting(self): html = '134-2.html' file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', html) tdocs_by_agenda = html_parser.tdocs_by_agenda(file_name) df = tdocs_by_agenda.tdocs # Broken between 6.14 and 6.15.1 # ais_to_skip = ['1', '2', '2.1', '3', '4', '4.1', '5.1', '5.2', '5.3', '5.4', '6.1', '6.2', '6.3', '6.4', '6.5', '6.5.1', '6.5.2', '6.5.3', '6.5.4', '6.5.5', '6.5.6', '6.5.7', '6.5.8', '6.5.9', '6.5.10', '6.5.11', '6.6', '6.6.1', '6.6.2', '6.7', '6.7.1', '6.7.2', '6.8', '6.8.1', '6.8.2', '6.9', '6.9.1', '6.9.2', '6.11', '6.12', '6.13.1', '6.13.2', '6.19', '6.19.1', '6.19.2', '6.20', '6.20.1', '6.20.2', '6.24', '6.28', '6.28.1', '6.28.2', '6.29', '6.30', '6.5.2', '6.5.3' ] ais_to_output = ['6.14', '6.15', '6.15.1'] doc = word_parser.open_word_document() word_parser.insert_doc_data_to_doc_by_wi(df, doc, 'TSGS2_134_Sapporo', ais_to_output=ais_to_output)
def test_136_missing_ais(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '136_missing_AIs.html') meeting = html_parser.tdocs_by_agenda(file_name, v=2) self.assertEqual(meeting.meeting_number, '136', 'Expected 136') self.assertEqual(len(meeting.tdocs), 1857, 'Expected TDoc entries') test_row = meeting.tdocs.loc['S2-1910969', :] self.assertEqual( test_row['Title'], 'LS from SA WG3LI: LS on Enhancing Location Information Reporting with Dual Connectivity' ) self.assertEqual(test_row['Result'], 'Noted') self.assertEqual(test_row['Comments'], 'Noted') self.assertEqual(test_row['AI'], '', 'Expected empty string')
def test_129Bis(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '129Bis.htm') meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, '129BIS', 'Expected 129BIS') self.assertEqual(len(meeting.tdocs), 1529, 'Expected TDoc entries') self.assertEqual(meeting.tdocs.at['S2-1812368', 'Revised to'], 'S2-1813194', 'Expected Revised do S2-1813194') self.assertEqual(meeting.tdocs.at['S2-1813194', 'Revision of'], 'S2-1812368', 'Expected Revision of S2-1812368') self.assertEqual(meeting.tdocs.at['S2-1812440', 'Merged to'], 'S2-1813085', 'Expected Merged to S2-1813085') self.assertEqual(meeting.tdocs.at['S2-1813085', 'Merge of'], 'S2-1812440', 'Expected Merge of S2-1812440') # Meetings 'from S2#129' or similar (i.e. postponed from a past meeting) are not taken as past references. This is so as to make reporting easier and to make the .htm file self.contained self.assertEqual(meeting.tdocs.at['S2-1813085', 'Original TDocs'], 'S2-1812299, S2-1812440', 'Expected Original TDocs S2-18122990,S2-1812440') self.assertEqual(meeting.tdocs.at['S2-1813085', 'Final TDocs'], 'S2-1813308', 'Expected Final TDocs S2-1813308') # Check some contribution mappings self.assertTrue(meeting.tdocs.at['S2-1811737', 'Contributed by DT'], 'DT contribution') self.assertTrue(meeting.tdocs.at['S2-1811737', 'Contributed by TIM'], 'Telecom Italia contribution') self.assertTrue(meeting.tdocs.at['S2-1811737', 'Contributed by Intel'], 'Intel contribution') self.assertFalse( meeting.tdocs.at['S2-1811737', 'Contributed by Qualcomm'], 'Qualcomm contribution') self.assertFalse( meeting.tdocs.at['S2-1811737', 'Contributed by Nokia'], 'Nokia contribution') # Check the length of the "Others" mapping self.assertEqual(len(meeting.others_cosigners), 16, 'Length of the "Others" contributors')
def test_130_4(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '2019.01.31 130.htm') meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, '130', 'Expected 130S') self.assertEqual(len(meeting.tdocs), 1306, 'Expected TDoc entries') test_row = meeting.tdocs.loc['S2-1900064', :] revised_to = test_row['Revised to'] revision_of = test_row['Revision of'] merged_to = test_row['Merged to'] merge_of = test_row['Merge of'] self.assertEqual(revised_to, 'S2-1901105') self.assertEqual(revision_of, '') self.assertEqual(merged_to, '') self.assertEqual( merge_of, 'S2-1900142, S2-1900585, S2-1900281, S2-1900147, S2-1900587') # Meetings 'from S2#129' or similar (i.e. postponed from a past meeting) are not taken as past references. This is so as to make reporting easier and to make the .htm file self.contained original_tdocs = test_row['Original TDocs'] final_tdocs = test_row['Final TDocs'] self.assertEqual( original_tdocs, 'S2-1900142, S2-1900147, S2-1900281, S2-1900585, S2-1900587') self.assertEqual(final_tdocs, 'S2-1901378') # Check some contribution mappings self.assertTrue(test_row['Contributed by DT'], 'DT contribution') self.assertFalse(test_row['Contributed by TIM'], 'Not a Telecom Italia contribution') self.assertFalse(test_row['Contributed by Intel'], 'Not a Intel contribution') self.assertFalse(test_row['Contributed by ZTE'], 'ZTE contribution') self.assertFalse(test_row['Contributed by Sprint'], 'Nokia Sprint')
def test_138e(self): file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tdocs_by_agenda', '138E_final.html') meeting = html_parser.tdocs_by_agenda(file_name) self.assertEqual(meeting.meeting_number, '138E', 'Expected 138E') self.assertEqual(len(meeting.tdocs), 824, 'Expected TDoc entries') test_row = meeting.tdocs.loc['S2-2002769', :] self.assertEqual(test_row['AI'], '7.10.3', 'Expected AI 7.10.3') self.assertEqual( test_row['Title'], "23.502 CR2190 (Rel-16, 'F'): UE radio capability for 5GS and IWK") self.assertEqual(test_row['Result'], 'Revised') test_row = meeting.tdocs.loc['S2-2002694', :] self.assertEqual(test_row['AI'], '6.3', 'Expected AI 6.3') self.assertEqual( test_row['Title'], "23.501 CR2226 (Rel-16, 'F'): Multiple N6 interfaces per Network Instance for Ethernet traffic" ) self.assertEqual(test_row['Comments'], 'Noted') self.assertEqual(test_row['Result'], 'Noted')
# Just use a script like this if you want to profile parts of the code # In Visual Studio, you may need to set this file as project startup import parsing.html as html_parser import os file_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tests', 'tdocs_by_agenda', '136_v2.html') meeting = html_parser.tdocs_by_agenda(file_name, v=2) print(meeting)
def export_and_open_excel(self, local_agenda_file, excel_export, meeting_folder, sheet_name, close_file=False, writer=None): try: tdocs_by_agenda = html_parser.tdocs_by_agenda( gui.main.get_tdocs_by_agenda_file_or_url(local_agenda_file)) # Do not export to Excel the last columns (just a lot of True/False columns for each vendor) tdocs_df = tdocs_by_agenda.tdocs.iloc[:, 0:19] tdocs = tdocs_df.index.tolist() server_urls = [(tdoc, server.get_remote_filename(meeting_folder, tdoc, use_inbox=False)) for tdoc in tdocs] tdocs_df.loc[:, parsing.excel.session_comments_column] = '' # Get TDoc comments from the comments files agenda_folder = os.path.dirname(os.path.abspath(local_agenda_file)) parsed_comments = parsing.excel.get_comments_from_dir_format( agenda_folder, merge_comments=True) fg_color = {} text_color = {} if parsed_comments is not None: comments = [] # Generate meta-comments for revision of and merge of for easier review for tdoc_idx in tdocs: row = tdocs_df.loc[tdoc_idx, :] merge_of = row['Merge of'] revision_of = row['Revision of'] session_comments = row[ parsing.excel.session_comments_column] tdoc_parent_list = [] if revision_of != '': tdoc_parent_list.append(revision_of) if merge_of != '': merge_of_parent_list = [ e.strip() for e in merge_of.split(',') if e.strip() != '' ] tdoc_parent_list.extend(merge_of_parent_list) comments_for_this_tdoc = [ (parent_tdoc, parsed_comments[parent_tdoc]) for parent_tdoc in tdoc_parent_list if parent_tdoc in parsed_comments and len(parsed_comments[parent_tdoc]) > 0 ] # Only if there are no comments and there are revisions to do if len(comments_for_this_tdoc) > 0: if len(comments_for_this_tdoc) > 0: list_of_sublists = [ tup[1] for tup in comments_for_this_tdoc ] fg_colors = [ e[2] for sublist in list_of_sublists for e in sublist ] text_colors = [ e[3] for sublist in list_of_sublists for e in sublist ] fg_color = parsing.excel.get_reddest_color( fg_colors) text_color = parsing.excel.get_reddest_color( text_colors) else: fg_color = None text_color = None merged_texts = [] for comment in comments_for_this_tdoc: comment_tdoc = comment[0] comment_list = comment[1] comment_list = [ parsing.excel.get_comment_full_text( e[0], '{0}'.format(e[1])) for e in comment_list ] merged_comment_list = '\n'.join(comment_list) merged_comment = parsing.excel.get_comment_full_text( comment_tdoc, '{{\n{0}\n}}'.format(merged_comment_list)) merged_texts.append(merged_comment) if len(merged_texts) > 0: merged_comment_text = '\n'.join( [e for e in merged_texts]) # 'None' generatese no tag for the comment parent_comment = (None, merged_comment_text, fg_color, text_color) else: parent_comment = None else: parent_comment = None # Store comments if tdoc_idx not in parsed_comments: parsed_comments[tdoc_idx] = [] if parent_comment is not None: parsed_comments[tdoc_idx].append(parent_comment) comments.append(parent_comment) # Apply comments for idx, comment_list in parsed_comments.items(): comment_list_txt = [ parsing.excel.get_comment_full_text( comment[0], comment[1], ) for comment in comment_list ] full_comment = '\n'.join(comment_list_txt) try: tdocs_df.at[idx, parsing.excel. session_comments_column] = full_comment except: print( 'Did not find TDoc entry for comment {0}. Skipping' .format(idx)) fg_color, text_color = parsing.excel.get_colors_from_comments( parsed_comments) # Need Pandas 0.24 for this .See https://stackoverflow.com/questions/42589835/adding-a-pandas-dataframe-to-existing-excel-file # or just use https://github.com/pandas-dev/pandas/issues/3441#issuecomment-24898286 # Needs Pandas >= 0.24.0 # Note that xlsxwriter does NOT support append mode if os.path.isfile(excel_export): write_mode = 'a' else: write_mode = 'w' with pandas.ExcelWriter(excel_export, engine='openpyxl', mode=write_mode) as writer: tdocs_df.to_excel(writer, sheet_name=sheet_name) parsing.excel.apply_comments_coloring_and_hyperlinks( excel_export, fg_color, text_color, server_urls) # Need to reinitialize COM on each thread # https://stackoverflow.com/questions/26745617/win32com-client-dispatch-cherrypy-coinitialize-has-not-been-called pythoncom.CoInitialize() wb = excel_parser.open_excel_document( excel_export, excel=excel_parser.get_excel(), sheet_name=sheet_name) excel_parser.set_first_row_as_filter(wb) excel_parser.adjust_tdocs_by_agenda_column_width(wb) excel_parser.set_tdoc_colors(wb, server_urls) excel_parser.vertically_center_all_text(wb) excel_parser.save_wb(wb) if close_file: excel_parser.close_wb(wb) print('Non-parsed vendors: {0}'.format('\n'.join( tdocs_by_agenda.others_cosigners))) except: print('Could not export TDoc list + comments to Excel') traceback.print_exc() finally: self.export_button.config(text=ToolsDialog.export_text, state='normal') self.tdoc_report_button.config(text=ToolsDialog.export_year_text, state='normal') self.year_entry.config(state='normal')