Пример #1
0
def test():


    page = r"C:\Yahia\Home\Yahia-Dev\Python\training\pdf\data\ABH pages\ABH-025.png"
    
    outfile = f".\\out\\{os.path.split(page)[1].split('.')[0]}.txt"
    f = open(outfile, "w", encoding='utf-8')
    # filename = "page_"+str(i)+".jpg"
    filename = f'.\\data\\ABH pages\\{page}'
    # text = str(((pytesseract.image_to_string(Image.open(filename), lang='ara'))))
    words= str(((pytesseract.image_to_string(Image.open(page), lang='ara'))))
    # print(words)
    f.write(words)
    f.write ('----------------------------\n')
    for w in words.split():
        f.write(w + "\n")
    f.close()
    conn, cursor = open_db()
    # print(words)
    for w in words.split():
        # print (w)
        insert_row_list(conn, cursor, 'pdf_dict', [w,w], ignoreUnique=True)
    
    conn.commit()
    close_db(cursor)
Пример #2
0
def load_ACH_folder():
    # clear_tk()
    data_folder = filedialog.askdirectory(initialdir=".",
                                          title="select data folder",
                                          mustexist=True)
    if not data_folder:
        return  # no files selected

    # data_folder = r"C:\Yahia\Python\src\HDB\training\xml\ACH\tttt"

    _, cursor = open_db()

    if table_exists("GrpHdr", cursor) > 0:
        exec_db_cmd('delete from GrpHdr')
    # exec_db_cmd('delete from trx')
    if table_exists("pacs_008_001_01", cursor) > 0:
        exec_db_cmd('delete from pacs_008_001_01')
    if table_exists("pacs_002_001_02", cursor) > 0:
        exec_db_cmd('delete from pacs_002_001_02')
    if table_exists("pacs_004_001_01", cursor) > 0:
        exec_db_cmd('delete from pacs_004_001_01')

    close_db(cursor)

    for folder, subs, files in os.walk(data_folder):
        for f in files:
            filename, file_extension = os.path.splitext(f)
            if file_extension.upper() != ".XML":
                continue
            xml_to_sqlite(os.path.join(os.path.join(folder, f)))
Пример #3
0
def xml_to_sqlite(file_name):

    print(file_name)
    tree = ET.parse(file_name)
    root = tree.getroot()
    # f = open (".\\out\\parse_xml.txt", 'w', encoding = "UTF8")
    conn, cursor = open_db()

    for i, L0 in enumerate(root):

        # f.writelines (f"====== L0: {local_name(L0.tag)} {len(L0)} ===============\n")
        print(f"{i} - len(L0): {len(L0)}, {local_name(L0.tag)}")
        for L1 in L0:
            L1_rec = dict_to_1L_dict(xml_to_dict(L1))

            if L1_rec:
                if local_name(L1.tag) == "GrpHdr":
                    table_name = 'GrpHdr'
                    MsgId = L1_rec.get('GrpHdr_MsgId')
                    rec = {
                        'pacsId': local_name(root[0].tag),
                        "filename": file_name
                    }
                    rec.update(L1_rec)
                    insert_db_rec(conn, cursor, rec, table_name)
                else:
                    table_name = local_name(L0.tag).replace('.', '_')
                    L1_rec.update({'MsgId': MsgId})
                    insert_db_rec(conn, cursor, L1_rec, table_name)
                conn.commit()
                # f.writelines (f"L1: {local_name(L1.tag)}, {len(L1)}, {20* '-'}, \n, {json.dumps(L1_rec, indent=4)}\n")

    close_db(cursor)
Пример #4
0
def attendees_last_2_month(file_name):
    #update_meetings()
    sql = f"""SELECT meeting_date, COUNT(name), COUNT(firstname), COUNT(name) - COUNT(firstname) 
                    FROM (
                        SELECT DATE(join_time) as meeting_date, type as meeting_type, topic,  name, firstname
                        FROM attendees a 
                        LEFT Join students ON a.user_email = students.email
                        LEFT Join meetings m ON a.meeting_uuid = m.uuid
                        WHERE DATE(join_time) > DATE('now', '-2 Month')
                        GROUP BY meeting_date, name 
                        )
                    GROUP BY meeting_date
                    ORDER BY 1"""
    comm, curspr = open_db()
    rows = exec_query(curspr, sql)
    close_db(curspr)
    bars1 = []
    bars2 = []
    names = []

    # plot last n lines
    for row in rows:
        names.append(row[0])  # meeting date
        bars1.append(row[1])  # count (first name) --> Academy students
        bars2.append(row[3])  # External Students
    plot_stacked_bar(bars1, bars2, names, file_name)
Пример #5
0
def load_ACH():   
    # clear_tk()
    # file_path = filedialog.askopenfilename(title="Select file",multiple=False,
                                           # filetypes=(("Excel files", "*,xlsx"), ("Excel files", "*.xlsx")))
    # if not file_path:
        # return  # no files selected

    
    # file_name = r"C:\Yahia\HDB\HDB-CBP\3- Execution\Interfaces\IRDs\ACH\0002\booking\ACH sample\29_4003076817_BOOKING_8034_1.xml"
    # file_name = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH\29_PACS008_2021080309241818109.XML"
    # file_name = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH\29_PACS008_2021080309241818109.XML"
    file_name = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH\tttt\29_PACS008_2021080811485962830.XML"
    
    data_folder = r"C:\Yahia\Home\Yahia-Dev\Python\training\xml\ACH"
    fexception = open (r".\out\exceptions.txt", "wt", encoding = "UTF8")
    
    conn, cursor = open_db()
    exec_db_cmd('delete from GrpHdr')
    exec_db_cmd('delete from trx')
    exec_db_cmd('delete from pacs_002_004')
    if True:
        parse_pacs_file(file_name, conn, cursor, fexception)
    else:
        for folder, subs, files in os.walk(data_folder):
            for f in files:
                filename, file_extension = os.path.splitext(f)
                if file_extension.upper() != ".XML":
                    continue
                    
                parse_pacs_file(os.path.join(os.path.join(folder, f)), conn, cursor, fexception)
        
    conn.commit()
    close_db(cursor)
    
    fexception.close()
Пример #6
0
def load_increment_scan_result(file_path, output):
    
    output.insert(END, 'Loadind the files:' +  file_path + ' .....\n')
    if type(file_path) == str:  # is folder?
        scan_data = load_scan_dir_csv(file_path, output)
    else:
        scan_data = load_scan_files(file_path, output)

    scan_data = replace_temp_profile_name(scan_data)

    ips=[r[IP_ADDRESS_COL] for r in scan_data]
    output.insert(END," ------- del_old_vulners ---------------\n")

    ip_list = '"' + '","'.join(ips) + '"'
    cmd = f"DELETE FROM vulnerability WHERE ipaddress in ({ip_list})"
    exec_db_cmd(cmd)

    output.insert(END," ---- Inset new scan records \n")
    conn, cursor = open_db()
    for row in scan_data:
        insert_row_vulner(conn, cursor, row)
    conn.commit()
    close_db(cursor)
    
    # lst = check_new_assets(scan_data)  # if new assets in this scan, add to assets table

    output.insert(END," Add new stats\n")
    if add_new_assets():    # there are new assets. stop here and continue after fixing new assets's data
        messagebox.showinfo("Warning", "Newly added assets to assets with profile_group '?', please arrange to adjust and then run menu 'Update Stats'" )
        return
   
    # add_new_profile_stats()  # append stats with newly added profiles
    output.insert(END," Update Stats \n")
    update_stats(output)              # update 'current_' fields with the new stats
Пример #7
0
def attendees_per_day_of_week(file_name):
    #update_meetings()
    sql = f"""SELECT strftime('%w',meeting_date) as day_of_week, COUNT(name), COUNT(firstname), COUNT(name) - COUNT(firstname) 
                    FROM (
                        SELECT DATE(join_time) as meeting_date,  name, firstname
                        FROM attendees a 
                        LEFT Join students ON a.user_email = students.email
                        LEFT Join meetings m ON a.meeting_uuid = m.uuid
                        GROUP BY meeting_date, name 
                        )
                    GROUP BY day_of_week
                    ORDER BY 1"""
    comm, curspr = open_db()
    rows = exec_query(curspr, sql)
    close_db(curspr)
    bars1 = []
    bars2 = []
    names = [
        'Sunday', 'Monday', 'Tuesday', 'Wednesday ', 'Thursday', 'Friday',
        'Saturday'
    ]

    # plot last n lines
    for row in rows:
        #names.append(row[0])    # meeting day
        bars1.append(row[1])  # count (first name) --> Academy students
        bars2.append(row[3])  # External Students
    plot_stacked_bar(bars1, bars2, names, file_name)
Пример #8
0
def check_new_assets(scan_data):            # not used anymore
    conn, cursor = open_db()
    for row in scan_data:
        ip = row[IP_ADDRESS_COL]
        cmd = f"SELECT profile_name FROM assets WHERE ipaddress = '{ip}'"
        header, lst = query_to_list(cmd)
        if len(lst)== 0:    # IP not found, add it as is until manual fix, add "(NEW)"
            print(f"Newly added server: {ip}, tmp profile:{row[0]}")
            new_servers = True
            profile_name =  row[0]
            profile_list.append(profile_name)
            rec = [ip, profile_name, row[ASSET_NAME_COL], "?", "?", ""]
            try:
                insert_row_list(conn, cursor, "assets", rec)
                conn.commit()
            except:
                print (f"check_new_assets: record already exist")
                continue    # record already exist frrom previous scan row
        else:   # found, relace temp scan profile name with asset profile_name
            profile_list.append(lst[0][0])
    
    close_db(cursor)
    if new_servers:
        messagebox.showinfo("Warning", "Newly added assets to assets name '(NEW)', please arrange to adjust")
    return profile_list
Пример #9
0
def load_zoom_meetings(from_dt, to_date=""):
    conn, cursor = open_db()
    create_tables(cursor)  # create tables if not exist

    meeting_report_url = f"https://api.zoom.us/v2/report/users/{config.get('USER_ID')}/meetings?from={from_dt}&to={to_date}?page_size=300"
    headers = {"Authorization": "Bearer " + config.get("JWT_TOKEN")}
    r = requests.get(meeting_report_url, headers=headers)

    meetings = json.loads(r.content)
    if meetings.get("code") == 124:
        print(meetings.get("message"))  # error
        return
    page_count = meetings.get('page_count')
    page_size = meetings.get('page_size')
    total_records = meetings.get('total_records')

    for key, value in meetings.items():
        if key == 'meetings':
            for m in value:
                # print (m)
                print(f"meeting-id: {m.get('id')}, "
                      f"type: {m.get('type')}, "
                      f"topic: {m.get('topic')}, "
                      f"start time: {m.get('start_time')}, "
                      f"No of Participants: {m.get('participants_count')}")
                if insert_row_meeting(conn, cursor, m) == -1:
                    print("meeting already loaded, skip it ...")
                    continue  # meeting already exist, skip it

                for k, v in m.items():
                    if k == "uuid":  ## meeting id
                        load_meeting_participants(v, None, conn, cursor)
                        pass
    close_db(cursor)
Пример #10
0
def get_page_dict(page_no):
    conn, cursor = open_db()
    rows = exec_query (cursor, f"SELECT * FROM pdf_book WHERE page_no = {page_no}")
    close_db(cursor)
    # print (page_no, len (rows[0]))
    if len(rows) == 0:        
        return None
    else:
        return rows
Пример #11
0
def load_book():

    page_path = r"C:\Yahia\Home\Yahia-Dev\Python\training\pdf\data\books\ABH\pages"
    conn, cursor = open_db()
    for page_no in range (3,75):
        page = os.path.join(page_path, f"ABH-{format(page_no, '03')}.png")
        print (page_no)
        words= str(((pytesseract.image_to_string(Image.open(page), lang='ara'))))
        for w in words.split():
            insert_row_list(conn, cursor, 'pdf_book', [w,w, page_no,None])
    
    conn.commit()
    close_db(cursor)
Пример #12
0
def query_to_excel(cmd, file_name, header=None):
    conn, cursor = open_db()
    rows = exec_query(cursor, cmd)
    close_db(cursor)

    if not header:
        header = get_col_names(conn, cmd)

    wb = Workbook()
    ws = wb.active
    ws.append(header)

    for row in rows:
        ws.append(row)
    wb.save(file_name)
Пример #13
0
def rebuild_page_text(page_no):

    conn, cursor = open_db()
    rows = exec_query (cursor, f"SELECT page_raw FROM page_text WHERE page_no = {page_no}")

    if len(rows) == 0:
        tk.messagebox ('Page not found')
        close_db(cursor)
        return
    else:
        edited_page = edit_page_text_details(rows[0][0], cursor, page_no)

    cmd = f"UPDATE page_text SET page_edited_txt = '{edited_page}' WHERE page_no = {page_no}"
    r= exec_db_cmd_conn (cmd, cursor)
    if r == 0:   #success
        update_dict(page_no, conn, cursor)
        conn.commit()

    close_db(cursor)
    return edited_page
Пример #14
0
def load_db(data_folder, output):

    if os.path.isdir(data_folder):
        # output.delete(0.0, END)        # clear window
        output.insert(END, f'Loadind the files: {data_folder}\n')
    else:
        output.insert(END, f'Data folder does not exist: {data_folder}\n')
        return
    conn, cursor = open_db()
    create_tables(cursor)
    for folder, subs, files in os.walk(data_folder):
        if folder.split('\\')[-1] == 'old':
            output.insert(
                END, "*** skip folder ***: " + folder.split('\\')[-2] + "-->" +
                folder.split('\\')[-1] + "-->old")
            continue
        for f in files:
            filename, file_extension = os.path.splitext(f)
            if file_extension != ".csv":
                continue
            # print(folder.split('\\')[-2], ",", folder.split('\\')[-1], ",", filename)
            output.insert(END, folder.split('\\')[-1] + "-->" + f + '\n')
            with open(os.path.join(os.path.join(folder, f))) as csv_file:
                csv_reader = csv.reader(csv_file, delimiter=',')
                for row in csv_reader:
                    if len(row) != 28:
                        output.insert(
                            END,
                            f'filename skipped, # col are not 28, it is: {len(row)}" \n'
                        )
                        break
                    if row[0] != 'name':  # skip header
                        # output.insert(END, filename +  str(i) + '\n')
                        insert_row_vulner(conn, cursor, row)
    conn.commit()
    exec_db_cmd(
        "delete from vulnerability WHERE ipaddress = '172.17.90.200' AND name = 'HQ-VLAN-90-Windows-Server-FULL'"
    )

    close_db(cursor)
    insert_base_stats(output)
Пример #15
0
def stats_attendees_graph():
    sql = f"""SELECT meeting_date, meeting_type, topic, COUNT(name), COUNT(firstname), COUNT(name) - COUNT(firstname) 
                    FROM (
                        SELECT DATE(join_time) as meeting_date, type as meeting_type, topic,  name, firstname
                        FROM attendees a 
                        LEFT Join students ON a.user_email = students.email
                        LEFT Join meetings m ON a.meeting_uuid = m.uuid
                        GROUP BY meeting_date, name 
                        )
                    GROUP BY meeting_date
                    ORDER BY 1, 2, 3"""
    comm, curspr = open_db()
    rows = exec_query(curspr, sql)
    close_db(curspr)
    bars1 = []
    bars2 = []
    names = []
    for row in rows:
        names.append(row[0])
        bars1.append(row[4])
        bars2.append(row[5])
    plot_stacked_bar(bars1, bars2, names, r".\data\attendess by date.png")
Пример #16
0
def pdf2db(page_no):
    
    page = f".\\data\\books\\ABH\\pages\\ABH-{format(page_no, '03')}.png"
    
    words= str(((pytesseract.image_to_string(Image.open(page), lang='ara'))))

    # get last record number in dict
    conn, cursor = open_db()
    rows = exec_query (cursor, "select count(*) FROM pdf_dict")
    if len(rows) == 0:
         st_dict_no = 1
    else:
        st_dict_no = rows[0][0]+1

    for w in words.split():
        insert_row_list(conn, cursor, 'pdf_dict', [w,w,page_no], ignoreUnique=True)    
    conn.commit()
    close_db(cursor)

    words = words.replace("'", "*") # replace single quote
    words = words.replace('"', "*") # replace double quote
    cmd = f"""INSERT INTO page_text VALUES ({page_no}, '{words}', '', {st_dict_no})"""
    exec_db_cmd(cmd)