def test(): page = r"C:\Yahia\Home\Yahia-Dev\Python\training\pdf\data\ABH pages\ABH-025.png" outfile = f".\\out\\{os.path.split(page)[1].split('.')[0]}.txt" f = open(outfile, "w", encoding='utf-8') # filename = "page_"+str(i)+".jpg" filename = f'.\\data\\ABH pages\\{page}' # text = str(((pytesseract.image_to_string(Image.open(filename), lang='ara')))) words= str(((pytesseract.image_to_string(Image.open(page), lang='ara')))) # print(words) f.write(words) f.write ('----------------------------\n') for w in words.split(): f.write(w + "\n") f.close() conn, cursor = open_db() # print(words) for w in words.split(): # print (w) insert_row_list(conn, cursor, 'pdf_dict', [w,w], ignoreUnique=True) conn.commit() close_db(cursor)
def load_ACH_folder(): # clear_tk() data_folder = filedialog.askdirectory(initialdir=".", title="select data folder", mustexist=True) if not data_folder: return # no files selected # data_folder = r"C:\Yahia\Python\src\HDB\training\xml\ACH\tttt" _, cursor = open_db() if table_exists("GrpHdr", cursor) > 0: exec_db_cmd('delete from GrpHdr') # exec_db_cmd('delete from trx') if table_exists("pacs_008_001_01", cursor) > 0: exec_db_cmd('delete from pacs_008_001_01') if table_exists("pacs_002_001_02", cursor) > 0: exec_db_cmd('delete from pacs_002_001_02') if table_exists("pacs_004_001_01", cursor) > 0: exec_db_cmd('delete from pacs_004_001_01') close_db(cursor) for folder, subs, files in os.walk(data_folder): for f in files: filename, file_extension = os.path.splitext(f) if file_extension.upper() != ".XML": continue xml_to_sqlite(os.path.join(os.path.join(folder, f)))
def xml_to_sqlite(file_name): print(file_name) tree = ET.parse(file_name) root = tree.getroot() # f = open (".\\out\\parse_xml.txt", 'w', encoding = "UTF8") conn, cursor = open_db() for i, L0 in enumerate(root): # f.writelines (f"====== L0: {local_name(L0.tag)} {len(L0)} ===============\n") print(f"{i} - len(L0): {len(L0)}, {local_name(L0.tag)}") for L1 in L0: L1_rec = dict_to_1L_dict(xml_to_dict(L1)) if L1_rec: if local_name(L1.tag) == "GrpHdr": table_name = 'GrpHdr' MsgId = L1_rec.get('GrpHdr_MsgId') rec = { 'pacsId': local_name(root[0].tag), "filename": file_name } rec.update(L1_rec) insert_db_rec(conn, cursor, rec, table_name) else: table_name = local_name(L0.tag).replace('.', '_') L1_rec.update({'MsgId': MsgId}) insert_db_rec(conn, cursor, L1_rec, table_name) conn.commit() # f.writelines (f"L1: {local_name(L1.tag)}, {len(L1)}, {20* '-'}, \n, {json.dumps(L1_rec, indent=4)}\n") close_db(cursor)
def attendees_last_2_month(file_name): #update_meetings() sql = f"""SELECT meeting_date, COUNT(name), COUNT(firstname), COUNT(name) - COUNT(firstname) FROM ( SELECT DATE(join_time) as meeting_date, type as meeting_type, topic, name, firstname FROM attendees a LEFT Join students ON a.user_email = students.email LEFT Join meetings m ON a.meeting_uuid = m.uuid WHERE DATE(join_time) > DATE('now', '-2 Month') GROUP BY meeting_date, name ) GROUP BY meeting_date ORDER BY 1""" comm, curspr = open_db() rows = exec_query(curspr, sql) close_db(curspr) bars1 = [] bars2 = [] names = [] # plot last n lines for row in rows: names.append(row[0]) # meeting date bars1.append(row[1]) # count (first name) --> Academy students bars2.append(row[3]) # External Students plot_stacked_bar(bars1, bars2, names, file_name)
def load_ACH(): # clear_tk() # file_path = filedialog.askopenfilename(title="Select file",multiple=False, # filetypes=(("Excel files", "*,xlsx"), ("Excel files", "*.xlsx"))) # if not file_path: # return # no files selected # file_name = r"C:\Yahia\HDB\HDB-CBP\3- Execution\Interfaces\IRDs\ACH\0002\booking\ACH sample\29_4003076817_BOOKING_8034_1.xml" # file_name = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH\29_PACS008_2021080309241818109.XML" # file_name = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH\29_PACS008_2021080309241818109.XML" file_name = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH\tttt\29_PACS008_2021080811485962830.XML" data_folder = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH" fexception = open (r".\out\exceptions.txt", "wt", encoding = "UTF8") conn, cursor = open_db() exec_db_cmd('delete from GrpHdr') exec_db_cmd('delete from trx') exec_db_cmd('delete from pacs_002_004') if True: parse_pacs_file(file_name, conn, cursor, fexception) else: for folder, subs, files in os.walk(data_folder): for f in files: filename, file_extension = os.path.splitext(f) if file_extension.upper() != ".XML": continue parse_pacs_file(os.path.join(os.path.join(folder, f)), conn, cursor, fexception) conn.commit() close_db(cursor) fexception.close()
def load_increment_scan_result(file_path, output): output.insert(END, 'Loadind the files:' + file_path + ' .....\n') if type(file_path) == str: # is folder? scan_data = load_scan_dir_csv(file_path, output) else: scan_data = load_scan_files(file_path, output) scan_data = replace_temp_profile_name(scan_data) ips=[r[IP_ADDRESS_COL] for r in scan_data] output.insert(END," ------- del_old_vulners ---------------\n") ip_list = '"' + '","'.join(ips) + '"' cmd = f"DELETE FROM vulnerability WHERE ipaddress in ({ip_list})" exec_db_cmd(cmd) output.insert(END," ---- Inset new scan records \n") conn, cursor = open_db() for row in scan_data: insert_row_vulner(conn, cursor, row) conn.commit() close_db(cursor) # lst = check_new_assets(scan_data) # if new assets in this scan, add to assets table output.insert(END," Add new stats\n") if add_new_assets(): # there are new assets. stop here and continue after fixing new assets's data messagebox.showinfo("Warning", "Newly added assets to assets with profile_group '?', please arrange to adjust and then run menu 'Update Stats'" ) return # add_new_profile_stats() # append stats with newly added profiles output.insert(END," Update Stats \n") update_stats(output) # update 'current_' fields with the new stats
def attendees_per_day_of_week(file_name): #update_meetings() sql = f"""SELECT strftime('%w',meeting_date) as day_of_week, COUNT(name), COUNT(firstname), COUNT(name) - COUNT(firstname) FROM ( SELECT DATE(join_time) as meeting_date, name, firstname FROM attendees a LEFT Join students ON a.user_email = students.email LEFT Join meetings m ON a.meeting_uuid = m.uuid GROUP BY meeting_date, name ) GROUP BY day_of_week ORDER BY 1""" comm, curspr = open_db() rows = exec_query(curspr, sql) close_db(curspr) bars1 = [] bars2 = [] names = [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday ', 'Thursday', 'Friday', 'Saturday' ] # plot last n lines for row in rows: #names.append(row[0]) # meeting day bars1.append(row[1]) # count (first name) --> Academy students bars2.append(row[3]) # External Students plot_stacked_bar(bars1, bars2, names, file_name)
def check_new_assets(scan_data): # not used anymore conn, cursor = open_db() for row in scan_data: ip = row[IP_ADDRESS_COL] cmd = f"SELECT profile_name FROM assets WHERE ipaddress = '{ip}'" header, lst = query_to_list(cmd) if len(lst)== 0: # IP not found, add it as is until manual fix, add "(NEW)" print(f"Newly added server: {ip}, tmp profile:{row[0]}") new_servers = True profile_name = row[0] profile_list.append(profile_name) rec = [ip, profile_name, row[ASSET_NAME_COL], "?", "?", ""] try: insert_row_list(conn, cursor, "assets", rec) conn.commit() except: print (f"check_new_assets: record already exist") continue # record already exist frrom previous scan row else: # found, relace temp scan profile name with asset profile_name profile_list.append(lst[0][0]) close_db(cursor) if new_servers: messagebox.showinfo("Warning", "Newly added assets to assets name '(NEW)', please arrange to adjust") return profile_list
def load_zoom_meetings(from_dt, to_date=""): conn, cursor = open_db() create_tables(cursor) # create tables if not exist meeting_report_url = f"https://api.zoom.us/v2/report/users/{config.get('USER_ID')}/meetings?from={from_dt}&to={to_date}?page_size=300" headers = {"Authorization": "Bearer " + config.get("JWT_TOKEN")} r = requests.get(meeting_report_url, headers=headers) meetings = json.loads(r.content) if meetings.get("code") == 124: print(meetings.get("message")) # error return page_count = meetings.get('page_count') page_size = meetings.get('page_size') total_records = meetings.get('total_records') for key, value in meetings.items(): if key == 'meetings': for m in value: # print (m) print(f"meeting-id: {m.get('id')}, " f"type: {m.get('type')}, " f"topic: {m.get('topic')}, " f"start time: {m.get('start_time')}, " f"No of Participants: {m.get('participants_count')}") if insert_row_meeting(conn, cursor, m) == -1: print("meeting already loaded, skip it ...") continue # meeting already exist, skip it for k, v in m.items(): if k == "uuid": ## meeting id load_meeting_participants(v, None, conn, cursor) pass close_db(cursor)
def get_page_dict(page_no): conn, cursor = open_db() rows = exec_query (cursor, f"SELECT * FROM pdf_book WHERE page_no = {page_no}") close_db(cursor) # print (page_no, len (rows[0])) if len(rows) == 0: return None else: return rows
def load_book(): page_path = r"C:\Yahia\Home\Yahia-Dev\Python\training\pdf\data\books\ABH\pages" conn, cursor = open_db() for page_no in range (3,75): page = os.path.join(page_path, f"ABH-{format(page_no, '03')}.png") print (page_no) words= str(((pytesseract.image_to_string(Image.open(page), lang='ara')))) for w in words.split(): insert_row_list(conn, cursor, 'pdf_book', [w,w, page_no,None]) conn.commit() close_db(cursor)
def query_to_excel(cmd, file_name, header=None): conn, cursor = open_db() rows = exec_query(cursor, cmd) close_db(cursor) if not header: header = get_col_names(conn, cmd) wb = Workbook() ws = wb.active ws.append(header) for row in rows: ws.append(row) wb.save(file_name)
def rebuild_page_text(page_no): conn, cursor = open_db() rows = exec_query (cursor, f"SELECT page_raw FROM page_text WHERE page_no = {page_no}") if len(rows) == 0: tk.messagebox ('Page not found') close_db(cursor) return else: edited_page = edit_page_text_details(rows[0][0], cursor, page_no) cmd = f"UPDATE page_text SET page_edited_txt = '{edited_page}' WHERE page_no = {page_no}" r= exec_db_cmd_conn (cmd, cursor) if r == 0: #success update_dict(page_no, conn, cursor) conn.commit() close_db(cursor) return edited_page
def load_db(data_folder, output): if os.path.isdir(data_folder): # output.delete(0.0, END) # clear window output.insert(END, f'Loadind the files: {data_folder}\n') else: output.insert(END, f'Data folder does not exist: {data_folder}\n') return conn, cursor = open_db() create_tables(cursor) for folder, subs, files in os.walk(data_folder): if folder.split('\\')[-1] == 'old': output.insert( END, "*** skip folder ***: " + folder.split('\\')[-2] + "-->" + folder.split('\\')[-1] + "-->old") continue for f in files: filename, file_extension = os.path.splitext(f) if file_extension != ".csv": continue # print(folder.split('\\')[-2], ",", folder.split('\\')[-1], ",", filename) output.insert(END, folder.split('\\')[-1] + "-->" + f + '\n') with open(os.path.join(os.path.join(folder, f))) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: if len(row) != 28: output.insert( END, f'filename skipped, # col are not 28, it is: {len(row)}" \n' ) break if row[0] != 'name': # skip header # output.insert(END, filename + str(i) + '\n') insert_row_vulner(conn, cursor, row) conn.commit() exec_db_cmd( "delete from vulnerability WHERE ipaddress = '172.17.90.200' AND name = 'HQ-VLAN-90-Windows-Server-FULL'" ) close_db(cursor) insert_base_stats(output)
def stats_attendees_graph(): sql = f"""SELECT meeting_date, meeting_type, topic, COUNT(name), COUNT(firstname), COUNT(name) - COUNT(firstname) FROM ( SELECT DATE(join_time) as meeting_date, type as meeting_type, topic, name, firstname FROM attendees a LEFT Join students ON a.user_email = students.email LEFT Join meetings m ON a.meeting_uuid = m.uuid GROUP BY meeting_date, name ) GROUP BY meeting_date ORDER BY 1, 2, 3""" comm, curspr = open_db() rows = exec_query(curspr, sql) close_db(curspr) bars1 = [] bars2 = [] names = [] for row in rows: names.append(row[0]) bars1.append(row[4]) bars2.append(row[5]) plot_stacked_bar(bars1, bars2, names, r".\data\attendess by date.png")
def pdf2db(page_no): page = f".\\data\\books\\ABH\\pages\\ABH-{format(page_no, '03')}.png" words= str(((pytesseract.image_to_string(Image.open(page), lang='ara')))) # get last record number in dict conn, cursor = open_db() rows = exec_query (cursor, "select count(*) FROM pdf_dict") if len(rows) == 0: st_dict_no = 1 else: st_dict_no = rows[0][0]+1 for w in words.split(): insert_row_list(conn, cursor, 'pdf_dict', [w,w,page_no], ignoreUnique=True) conn.commit() close_db(cursor) words = words.replace("'", "*") # replace single quote words = words.replace('"', "*") # replace double quote cmd = f"""INSERT INTO page_text VALUES ({page_no}, '{words}', '', {st_dict_no})""" exec_db_cmd(cmd)