def __init__(self, logdir, experimental_name, *, save_model_with_input=None): """ :param logdir: :param experimental_name: :param save_model_with_input: 默认不存储模型结构,当开启该参数时, """ from pyxllib.prog.pupil import check_install_package check_install_package('visualdl') from visualdl import LogWriter super().__init__() # 这样奇怪地加后缀,是为了字典序后,每个实验的train显示在eval之前 d = XlPath(logdir) / (experimental_name + '_train') # if d.exists(): shutil.rmtree(d) self.write = LogWriter(logdir=str(d)) d = XlPath(logdir) / (experimental_name + '_val') # if d.exists(): shutil.rmtree(d) self.eval_writer = LogWriter(logdir=str(d)) self.eval_times = 0 self.save_model_with_input = save_model_with_input
def ensure_content(ob=None, encoding=None): """ :param ob: 未输入:从控制台获取文本 存在的文件名:读取文件的内容返回 tex、py、 docx、doc pdf 有read可调用成员方法:返回f.read() 其他字符串:返回原值 :param encoding: 强制指定编码 """ # TODO: 如果输入的是一个文件指针,也能调用f.read()返回所有内容 # TODO: 增加鲁棒性判断,如果输入的不是字符串类型也要有出错判断 if ob is None: return sys.stdin.read() # 注意输入是按 Ctrl + D 结束 elif File(ob): # 如果存在这样的文件,那就读取文件内容(bug点:如果输入是目录名会PermissionError) if ob.endswith('.docx'): # 这里还要再扩展pdf、doc文件的读取 # 安装详见: https://blog.csdn.net/code4101/article/details/79328636 check_install_package('textract') text = textract.process(ob) return text.decode('utf8', errors='ignore') elif ob.endswith('.doc'): raise NotImplementedError elif ob.endswith('.pdf'): raise NotImplementedError else: # 按照普通的文本文件读取内容 return readtext(ob, encoding) else: # 判断不了的情况,也认为是字符串 return ob
def __init__(self, file, mode=None): """ :param file: 要处理的文件 :param mode: 要处理的格式,不输入会有一套智能匹配算法 'rar': 'zip': docx后缀的,默认采用zip格式解压 """ # 1 确定压缩格式 name, ext = os.path.splitext(file) ext = ext.lower() if not mode: if ext in ('.docx', '.zip'): mode = 'zip' elif ext == '.rar': mode = 'rar' else: dprint(ext) # 从文件扩展名无法得知压缩格式 raise ValueError self.mode = mode # 2 确定是用的解压“引擎” if mode == 'zip': self.proc = zipfile.ZipFile(file) elif mode == 'rar': # 安装详见: https://blog.csdn.net/code4101/article/details/79328636 check_install_package('unrar') from unrar.rarfile import RarFile self.proc = RarFile(file) # 3 解压文件夹目录,None表示还未解压 self.tempfolder = None
def to_pdf(cls, docx_file, pdf_file=None): check_install_package('docx2pdf') # 安装不成功的时候可以考虑加参数:--user import docx2pdf if pdf_file is None: pdf_file = docx_file.with_suffix('.pdf') docx2pdf.convert(str(docx_file), str(pdf_file)) return pdf_file
def type_text(text): """ 打印出文本内容 相比pyautogui.write,这里支持中文等unicode格式 这种需求一般也可以用剪切板实现,是剪切板不够静默、quit """ check_install_package('pynput') from pynput.keyboard import Controller keyboard = Controller() keyboard.type(text)
def to_docx(self, docx_file=None): """ pdf转docx """ check_install_package('pdf2docx') from pdf2docx import parse pdf_file = self.src_file if docx_file is None: docx_file = pdf_file.with_suffix('.docx') # 注意这里是日志显示进度,不是printf输出. parse(str(pdf_file), str(docx_file))
def f1_score(self, average='weighted'): """ 多分类任务是用F1分值 https://zhuanlan.zhihu.com/p/64315175 :param average: weighted:每一类都算出f1,然后(按样本数)加权平均 macro:每一类都算出f1,然后求平均值(样本不均衡下,有的类就算只出现1次,也会造成极大的影响) micro:按二分类形式直接计算全样本的f1,等价于accuracy all:我自己扩展的格式,会返回三种结果的字典值 """ check_install_package('sklearn', 'scikit-learn') from sklearn.metrics import f1_score if average == 'all': return { f'f1_{k}': self.f1_score(k) for k in ('weighted', 'macro', 'micro') } else: return round(f1_score(self.gt, self.pred, average=average), 4)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2020/06/15 """ oss2 · PyPI: https://pypi.org/project/oss2/ """ from pyxllib.prog.pupil import check_install_package check_install_package('oss2') import oss2 from pyxllib.file.specialist import File class OssBucket: def __init__(self, bucket_name, endpoint, access_key_id, access_key_secret): self.bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name) def upload(self, key, localfile, if_exists='replace', force=False): """ 如果云端已存在,默认会进行覆盖 :param key: 上传后存储的文件名 :param localfile: 本地文件 :param if_exists: replace, 如果oss上已存在也替换掉
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/06/06 17:00 from pyxllib.prog.pupil import check_install_package # 拼写检查库,即词汇库 # spellchecker模块主要有两个类,SpellChecker和WordFrequency # WordFrequency是一个词频类 # 一般导入SpellChecker就行了:from spellchecker import SpellChecker check_install_package('pyspellchecker') from spellchecker import SpellChecker from pyxllib.debug.pupil import dprint class MySpellChecker(SpellChecker): """ 拼写检查 190923周一21:54,源自 完形填空ocr 识别项目 """ def __init__(self, language="en", local_dictionary=None, distance=2, tokenizer=None, case_sensitive=False, df=None):
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/04/04 17:03 """ 专门给utools的快捷命令扩展的一系列python工具库 """ from pyxllib.prog.pupil import check_install_package check_install_package('fire') check_install_package('humanfriendly') check_install_package('pandas') check_install_package('pyautogui', 'PyAutoGui') # 其实pip install不区分大小写,不过官方这里安装是驼峰名 import pathlib import pyperclip import re import datetime import json import os import fire from humanfriendly import format_timespan import pandas as pd import pyautogui from pyxllib.robot.autogui import type_text, clipboard_decorator from pyxllib.file.specialist import File, Dir from pyxllib.debug.specialist import browser, TicToc, parse_datetime
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/08/31 09:56 from pyxllib.prog.pupil import check_install_package check_install_package('win32com', 'pypiwin32') import json import os import re import pythoncom from win32com.client import constants import win32com.client as win32 from pyxllib.prog.newbie import RunOnlyOnce from pyxllib.prog.pupil import DictTool, EnchantBase, EnchantCvt from pyxllib.text.pupil import strwidth from pyxllib.debug.specialist import File, Dir, get_etag, browser def __docx(): """ python-docx 相关封装 """ pass class DocxTools:
代码中,gt指ground truth,真实标注 dt指detection,模型检测出来的结果 除了 label.py 中定义的 CocoGtData 专门处理 gt 格式数据 CocoData 同时处理 gt dt 格式数据 这里对外有两个类 CocoEval 计算coco指标 CocoMatch 进行一些高级的结果分析 生成的结果可以用 xllabelme 打开 (pip install xllabelme) """ from pyxllib.prog.pupil import check_install_package check_install_package('xlcocotools') from collections import ChainMap, defaultdict, Counter import copy import json import os import pathlib import random import sys import pandas as pd from PIL import Image from tqdm import tqdm from pyxllib.stdlib.zipfile import ZipFile from pyxllib.prog.newbie import round_int
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2020/06/02 16:06 from pyxllib.prog.pupil import check_install_package check_install_package('fitz', 'PyMuPdf>=1.18.17') import json import os import pprint import re import fitz from pyxllib.prog.newbie import round_int, RunOnlyOnce, decode_bitflags from pyxllib.prog.pupil import DictTool, EnchantBase, EnchantCvt from pyxllib.algo.newbie import round_unit from pyxllib.algo.pupil import get_number_width from pyxllib.file.specialist import File, Dir, writefile, get_etag from pyxllib.debug.pupil import dprint from pyxllib.debug.specialist import browser from pyxllib.cv.expert import xlcv, xlpil from pyxllib.data.labelme import LabelmeDict def __fitz(): print(fitz.__doc__)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/06/03 14:26 """ 并查集相关功能 """ from pyxllib.prog.pupil import check_install_package check_install_package('disjoint_set', 'disjoint-set==0.6.3') from itertools import combinations from disjoint_set import DisjointSet def disjoint_set(items, join_checker): """ 按照一定的相连规则分组 :param items: 项目清单 :param join_checker: 检查任意两个对象是否相连,进行分组 :return: 算法:因为会转成下标,按照下标进行分组合并,所以支持items里有重复值,或者unhashable对象 >>> disjoint_set([-1, -2, 2, 0, 0, 1], lambda x, y: x*y>0) [[-1, -2], [2, 1], [0], [0]] """
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/05/26 17:24 from pyxllib.prog.pupil import check_install_package check_install_package('qtpy', 'QtPy') import json import os.path as osp import sys import time from PyQt5.QtCore import pyqtSignal from qtpy import QtWidgets from qtpy import QtGui from qtpy.QtWidgets import QFrame, QInputDialog, QApplication from pyxllib.prog.newbie import CvtType here = osp.dirname(osp.abspath(__file__)) class QHLine(QFrame): """ https://stackoverflow.com/questions/5671354/how-to-programmatically-make-a-horizontal-line-in-qt """ def __init__(self): super(QHLine, self).__init__() self.setFrameShape(QFrame.HLine) self.setFrameShadow(QFrame.Sunken)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2020/06/06 from pyxllib.prog.pupil import check_install_package check_install_package('pyautogui') check_install_package('keyboard') check_install_package('klembord') from collections import defaultdict import json import os import time import numpy as np from pandas.api.types import is_list_like import pyautogui import pyscreeze # NOQA pyautogui安装的时候会自动安装依赖的pyscreeze from pyxllib.prog.newbie import first_nonnone, round_int from pyxllib.prog.pupil import xlwait, DictTool, check_install_package from pyxllib.algo.geo import ComputeIou, ltrb2xywh, xywh2ltrb from pyxllib.algo.shapelylib import ShapelyPolygon from pyxllib.file.specialist import File, Dir from pyxllib.debug.specialist import TicToc from pyxllib.cv.expert import xlcv, xlpil from pyxllib.data.labelme import LabelmeDict
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/06/06 17:01 from pyxllib.prog.pupil import check_install_package # 这个需要C++14编译器 https://download.microsoft.com/download/5/f/7/5f7acaeb-8363-451f-9425-68a90f98b238/visualcppbuildtools_full.exe # 在需要的时候安装,防止只是想用pyxllib很简单的功能,但是在pip install阶段处理过于麻烦 # MatchSimString计算编辑距离需要 check_install_package('Levenshtein', 'python-Levenshtein') import Levenshtein import pandas as pd from pyxllib.text.pupil import briefstr from pyxllib.debug.specialist.common import dataframe_str class MatchSimString: """匹配近似字符串 mss = MatchSimString() # 1 添加候选对象 mss.append_candidate('福州+厦门2018初数暑假讲义-请录入-快乐学习\初一福厦培优-测试用') mss.append_candidate('2018_快乐数学_六年级_秋季_第01讲_圆柱与圆锥_教案(教师版)') mss.append_candidate('删除所有标签中间多余的空白') # 2 需要匹配的对象1
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/06/08 22:53 """ TODO 写一些图片相似度相关功能 """ from pyxllib.prog.pupil import check_install_package check_install_package('imagehash', 'ImageHash') import imagehash
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2020/05/30 21:14 """ 百度人工智能API接口 """ from pyxllib.prog.pupil import check_install_package check_install_package('aip', 'baidu-aip') import aip import base64 import cv2 from pyxllib.prog.pupil import is_url from pyxllib.prog.specialist import XlOsEnv from pyxllib.debug.specialist import TicToc from pyxllib.cv.expert import xlcv class AipOcr(aip.AipOcr): """ 封装该类 目的1:合并输入文件和url的识别 目的2:带透明底的png百度api识别不了,要先转成RGB格式 """
使用gitpython库,在python调用git进行一些版本分析的功能 Git list_commits,输出仓库的commit历史记录 bcompare,对比一个文件在不同版本的内容,也会输出这个文件的历史commit清单 show,获得一个文件某个版本的文本 TODO 清单 1、输入一个sha,分析某一次commit的细节(GUI有相应功能,不紧急) 2、按照周几、24小时制、时间轴等判断提交频率,结合files_changed、insertions、deletions判断工作量(不紧急) 3、将数据以图片的直观形式展现 """ from pyxllib.prog.pupil import check_install_package check_install_package('git', 'gitpython') import os import re import git import pandas as pd from pyxllib.prog.newbie import swap_rowcol from pyxllib.text.pupil import digit2weektag from pyxllib.file.specialist import Dir, File, filesmatch from pyxllib.debug.pupil import dprint from pyxllib.debug.specialist import dataframe_str, bcompare class Git:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/06/06 16:57 from pyxllib.prog.pupil import check_install_package check_install_package('ahocorasick', 'pyahocorasick') from collections import Counter import re import ahocorasick def make_automaton(words): """ 根据输入的一串words模式,生成一个AC自动机 """ a = ahocorasick.Automaton() for index, word in enumerate(words): a.add_word(word, (index, word)) a.make_automaton() return a def count_words(content, word, scope=2, exclude=None): # 1 统计所有词汇出现次数 c = Counter() c += Counter(re.findall(f'.{{,{scope}}}{word}.{{,{scope}}}', content)) # 2 排除掉不处理的词 (注意因为这里每句话都已经是被筛选过的,所以处理比较简单,并不需要复杂到用区间集处理) if exclude:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2020/06/03 09:52 from pyxllib.prog.pupil import check_install_package check_install_package('bidict') check_install_package('sqlalchemy') check_install_package('mysqlclient') import math from bidict import bidict import pandas as pd import sqlalchemy from pyxllib.file.specialist import File SQL_LIB_ACCOUNT_FILE = File(__file__).parent / 'sqllibaccount.pkl' def create_account_df(file='sqllibaccount.pkl'): """请在这里设置您个人的账户密码,并在运行完后,销毁明文信息""" df = pd.DataFrame.from_records( [ ['ckz', 'rm.sbsql.rds.aliyuncs.com', '', '', 'dddddd'], ['ckzlocal', '0.0.0.0', '', '', 'eeeeee'], ], columns=['index_name', 'host', 'port', 'user', 'passwd'])
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2020/06/02 """ 扩展了些自己的openpyxl工具 """ from pyxllib.prog.pupil import check_install_package check_install_package('openpyxl') check_install_package('premailer') check_install_package('xlrd2') check_install_package('yattag') import re import openpyxl from openpyxl import Workbook from openpyxl.cell.cell import MergedCell from openpyxl.styles import Font from openpyxl.utils.cell import get_column_letter import pandas as pd from pyxllib.prog.newbie import RunOnlyOnce from pyxllib.prog.pupil import EnchantBase, EnchantCvt from pyxllib.algo.specialist import product from pyxllib.debug.pupil import dprint from pyxllib.debug.specialist import browser
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author : 陈坤泽 # @Email : [email protected] # @Date : 2021/06/03 20:41 from pyxllib.prog.pupil import check_install_package check_install_package('paramiko') check_install_package('scp') # 对 paramiko 进一步封装的库 # check_install_package('fabric') import os import re import pathlib import paramiko from tqdm import tqdm import scp as scplib import humanfriendly from pyxllib.algo.pupil import natural_sort from pyxllib.file.specialist import XlPath from pyxllib.debug.specialist import get_xllog logger = get_xllog('location') class SshCommandError(Exception): pass