def write_csv(self, data, name, **kwargs): """ :param data: 数据,dict :param name: csv文件名 :param kwargs: path,默认DIR_dict.get('CSV_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('CSV_DIR')) name = name + '.csv' file = os.path.join(path, name) try: df = pd.DataFrame.from_dict(data, orient=kwargs.get('orient', 'columns'), dtype=kwargs.get('dtype'), columns=kwargs.get('columns') ) df.to_csv(file, sep=',', index=kwargs.get('index', False), header=kwargs.get('header', True), encoding=kwargs.get('encoding'), ) except Exception as e: print(e)
def write_excel(self, data, name, **kwargs): """ :param data: 数据,dict :param name: excel文件名 :param kwargs: path,默认DIR_dict.get('CSV_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('EXCEL_DIR')) name = name + '.xlsx' file = os.path.join(path, name) try: df = pd.DataFrame.from_dict(data, orient=kwargs.get('orient', 'columns'), dtype=kwargs.get('dtype'), columns=kwargs.get('columns') ) writer = pd.ExcelWriter(file) df.to_excel(writer, sheet_name=kwargs.get('sheet_name', "Sheet1"), index=kwargs.get('index', False), header=kwargs.get('header', True), encoding=kwargs.get('encoding'), ) writer.save() except Exception as e: print(e)
def save(self, info, **kwargs): save_name = kwargs.get("save_name", "untitle.pickle") path = kwargs.get("save_path", DIR_dict.get("PICKLE_DIR")) try: with open(path + '\\' + '{0}.pickle'.format(save_name), 'wb') as file: pickle.dump(info, file) except Exception as e: print(e)
def image_segmentation(self, name="starbucks.jpg", save_name="result-bull-starbucks.jpg"): # 导入图片数据 path = os.path.join(DIR_dict.get("PICTURE_DIR"), name) imgData, row, col = self.import_private_data(image_segmentation=True, path=path) # 创建模型 model = self.create_model(X_train=imgData, KMeans=True, n_clusters=4) label = model.fit_predict(imgData) # 数据展示 label = label.reshape([row, col]) pic_new = image.new("L", (row, col)) for i in range(row): for j in range(col): pic_new.putpixel((i, j), int(256 / (label[i][j] + 1))) pic_new.save( os.path.join(DIR_dict.get("PICTURE_DIR"), save_name, "JPEG"))
def ImportDatabase(self, db, host, **kwargs): path = kwargs.get('path', os.path.join(DIR_dict.get('JSON_DIR'), db)) cols = [col.split('.json')[0] for col in os.listdir(path)] print("待导入集合:{0}\n".format(cols)) for i, col in enumerate(cols): name = db + '\\' + col self.ImportCollection(col, name, host) print("{0}/{1}:集合{2}导入完毕!".format(i + 1, len(cols), col)) print("全部集合导入完毕!") if not kwargs.get('save_dir'): self.common.rmdir(path)
def read_png(self, name, **kwargs): """ :param name: png文件名 :param kwargs: path,默认DIR_dict.get('PNG_DIR') """ from PIL import Image path = kwargs.get('path', DIR_dict.get('PNG_DIR')) name = name + '.png' file = os.path.join(path, name) try: im = Image.open(file) im.show() except Exception as e: print(e)
def selenium(self): executable_path = os.path.join(DIR_dict.get('EXE_DIR'), 'chromedriver.exe') chrome_options = webdriver.ChromeOptions() """后台运行Chromedriver""" chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') # chrome_options.add_argument('--start-maximized') browser = webdriver.Chrome(executable_path=executable_path, chrome_options=chrome_options) """全屏显示""" browser.maximize_window() time.sleep(5) return browser
def write_png(self, name, **kwargs): """ :param data: 数据,任意格式 :param name: png文件名 :param kwargs: path,默认DIR_dict.get('PNG_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('PNG_DIR')) name = name + '.png' file = os.path.join(path, name) try: self.fig.savefig(file, dpi=500, bbox_inches='tight') except Exception as e: print(e)
def read_txt(self, name, **kwargs): """ 读取txt文件 :param name: txt文件名 :param kwargs: path,默认DIR_dict.get('TXT_DIR') :return: data, 任意格式 """ path = kwargs.get('path', DIR_dict.get('TXT_DIR')) name = name + '.txt' file = os.path.join(path, name) try: with open(file, 'r') as f: return f.readlines() except Exception as e: print(e)
def write_pickle(self, data, name, **kwargs): """ :param data: 数据,任意格式 :param name: pickle文件名 :param kwargs: path,默认DIR_dict.get('PICKLE_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('PICKLE_DIR')) name = name + '.pickle' file = os.path.join(path, name) try: with open(file, 'wb') as f: pickle.dump(data, f) except Exception as e: print(e)
def read_pickle(self, name, **kwargs): """ 读取pickle文件 :param name: pickle文件名 :param kwargs: path,默认DIR_dict.get('PICKLE_DIR') :return: data, 任意格式 """ path = kwargs.get('path', DIR_dict.get('PICKLE_DIR')) name = name + '.pickle' file = os.path.join(path, name) try: with open(file, 'rb') as f: return pickle.load(f) except Exception as e: print(e)
def write_json(self, data, name, **kwargs): """ :param data: 数据,dict :param name: json文件名 :param kwargs: path,默认DIR_dict.get('JSON_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('JSON_DIR')) name = name + '.json' file = os.path.join(path, name) try: with open(file, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False) except Exception as e: print(e)
def read_csv(self, name, **kwargs): """ 读取csv文件 :param name: csv文件名 :param kwargs: path,默认DIR_dict.get('CSV_DIR') :return: data, list/DataFrame """ path = kwargs.get('path', DIR_dict.get('CSV_DIR')) name = name + '.csv' file = os.path.join(path, name) try: data = pd.read_csv(file) return data except Exception as e: print(e)
def Backup(self, db, host, **kwargs): path = kwargs.get('path', DIR_dict.get('BACKUP_DIR')) file = os.path.join(path, 'mongo') USER = kwargs.get('USER', self.user) PASSWORD = kwargs.get('PASSWORD', self.passwd) print("正在备份数据库:{0}\n".format(db)) try: os.system( """mongodump -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} -o {4}""" .format(host, USER, PASSWORD, db, file)) except Exception as e: self.logger.warning("数据库备份失败,原因:{0}".format(e)) print("数据库{0}备份完毕!".format(db))
def read_excel(self, name, **kwargs): """ 读取excel文件 :param name: excel文件名 :param kwargs: path,默认DIR_dict.get('CSV_DIR') :return: data, list/DataFrame """ path = kwargs.get('path', DIR_dict.get('EXCEL_DIR')) name = name + '.xlsx' file = os.path.join(path, name) try: return pd.read_excel(file, ) except Exception as e: print(e) return
def read_h5(self, name, **kwargs): """ 读取h5文件 :param name: h5文件名 :param kwargs: path,默认DIR_dict.get('H5_DIR') :return: data, list/DataFrame """ import h5py path = kwargs.get('path', DIR_dict.get('H5_DIR')) name = name + '.h5' file = os.path.join(path, name) try: data = h5py.File(file, 'r') return data except Exception as e: print(e)
def write_h5(self, data, labels, name, **kwargs): """ :param data: 数据,dict :param name: h5文件名 :param kwargs: path,默认DIR_dict.get('H5_DIR') :return: None """ import h5py path = kwargs.get('path', DIR_dict.get('H5_DIR')) name = name + '.h5' file = os.path.join(path, name) try: with h5py.File(file, 'w') as f: f['data'] = data # 将数据写入文件的主键data下面 f['labels'] = labels # 将数据写入文件的主键labels下面 except Exception as e: print(e)
def write_txt(self, data, name, **kwargs): """ :param data: 数据,任意格式 :param name: txt文件名 :param kwargs: path,默认DIR_dict.get('TXT_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('TXT_DIR')) name = name + '.txt' file = os.path.join(path, name) try: with open(file, 'w') as f: for line in data: f.write(line) f.write('\n') except Exception as e: print(e)
def Restore(self, db, host, **kwargs): """ mongorestore -h 172.39.215.213 --authenticationDatabase admin -uroot -p 密码 -d epo --dir /root/backup/mongo/epo :return: """ path = kwargs.get('path', DIR_dict.get('BACKUP_DIR')) file = os.path.join(path, 'mongo/{0}'.format(db)) USER = kwargs.get('USER', self.user) PASSWORD = kwargs.get('PASSWORD', self.passwd) print("正在恢复数据库:{0}\n".format(db)) try: os.system( """mongorestore -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} --dir {4}""" .format(host, USER, PASSWORD, db, file)) except Exception as e: self.logger.warning("数据库恢复失败,原因:{0}".format(e)) print("数据库{0}恢复完毕!".format(db))
def read_json(self, name, **kwargs): """ 读取json文件 :param name: json文件名 :param kwargs: path,默认DIR_dict.get('JSON_DIR') :return: data, list/DataFrame """ path = kwargs.get('path', DIR_dict.get('JSON_DIR')) name = name + '.json' file = os.path.join(path, name) try: return json.load(file) except: try: with open(file, 'rb') as f: content = f.read().decode("utf-8") return json.loads(content) except Exception as e: print(e)
def ExportCollection(self, collection, name, host='127.0.0.1', **kwargs): """ 往当前数据库中导出文件,形成collection :param collection: 集合名 :param name: 文件名 :param kwargs: path: DIR_dict.get('JSON_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('JSON_DIR')) file = os.path.join(path, name + '.json') USER = kwargs.get('USER', self.user) PASSWORD = kwargs.get('PASSWORD', self.passwd) self.logger.warning( "正在将当前数据库的collection:{0}导出,形成json文件".format(collection)) try: # os.system("""mongoexport -h {0} -d {1} -c {2} -o {3}""".format(host, self.db.name, collection, file)) os.system( """mongoexport -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} -c {4} -o {5}""" .format(host, USER, PASSWORD, self.db.name, collection, file)) except Exception as e: self.logger.warning("集合导出失败,原因:{0}".format(e))
def ImportCollection(self, collection, name, host, **kwargs): """ 往当前数据库中导入文件,形成collection :param collection: 集合名 :param name: 文件名 :param kwargs: path: DIR_dict.get('JSON_DIR') :return: None """ path = kwargs.get('path', DIR_dict.get('JSON_DIR')) file = os.path.join(path, name + '.json') USER = kwargs.get('USER', 'root') PASSWORD = kwargs.get('PASSWORD', '密码') self.logger.warning( "正在往当前数据库中导入json文件,形成collection:{0}".format(collection)) try: # os.system("""mongoimport -h 192.168.0.253 --authenticationDatabase admin -uroot -p 密码 -d {0} -c {1} --file {2}""".format(self.db.name, collection, file)) os.system( """mongoimport -h {0} --authenticationDatabase admin -u {1} -p {2} -d {3} -c {4} --file {5}""" .format(host, USER, PASSWORD, self.db.name, collection, file)) except Exception as e: self.logger.warning("集合导入失败,原因:{0}".format(e))
def import_private_data(self, **kwargs): if kwargs.get("province_expense"): def loadData(filePath): fr = open(filePath, 'r+', encoding='gbk') lines = fr.readlines() retData = [] retCityName = [] for line in lines: items = line.strip().split(",") retCityName.append(items[0]) retData.append( [float(items[i]) for i in range(1, len(items))]) return retData, retCityName path = os.path.join(DIR_dict.get("TXT_DIR"), "city.txt") return loadData(path) elif kwargs.get("online_times"): def loadData(filePath): mac2id = dict() onlinetimes = [] f = open(filePath, encoding='utf-8') for line in f: mac = line.split(',')[2] onlinetime = int(line.split(',')[6]) starttime = int( line.split(',')[4].split(' ')[1].split(':')[0]) if mac not in mac2id: mac2id[mac] = len(onlinetimes) onlinetimes.append((starttime, onlinetime)) else: onlinetimes[mac2id[mac]] = [(starttime, onlinetime)] real_X = np.array(onlinetimes).reshape((-1, 2)) return real_X path = os.path.join(DIR_dict.get("TXT_DIR"), "online_times.txt") return loadData(path) elif kwargs.get("image_segmentation"): def loadData(filePath): f = open(filePath, 'rb') data = [] img = image.open(f) m, n = img.size for i in range(m): for j in range(n): x, y, z = img.getpixel((i, j))[:3] data.append([x / 256.0, y / 256.0, z / 256.0]) f.close() return np.mat(data), m, n return loadData(filePath=kwargs.get("path")) elif kwargs.get("posture"): def load_dataset(feature_paths, label_paths): """ 读取特征文件列表和标签文件列表中的内容,归并后返回 :param feature_paths: :param label_paths: :return: """ # 定义空的标签变量label,特征数组feature feature = np.ndarray(shape=(0, 41)) label = np.ndarray(shape=(0, 1)) for file in feature_paths: # 使用逗号分隔符读取特征数据,将问号替换标记为缺失值,文件中不包含表头 df = pd.read_table(file, delimiter=',', na_values='?', header=None) # 使用平均值补全缺失值,然后将数据进行补全 imp = SimpleImputer(missing_values="NaN", strategy="mean") # imp = Imputer(missing_values="NaN", strategy="mean", axis=0) imp.fit(df) df = imp.transform(df) # 将新读入的数据合并到特征集合中 feature = np.concatenate((feature, df)) for file in label_paths: # 读入标签数据,文件中不包含表头 df = pd.read_table(file, header=None) # 将新读入的数据合并到标签集合中 label = np.concatenate((label, df)) return feature, label # 设置数据路径 feature_paths = [] label_paths = [] paths = [ os.path.join(DIR_dict.get("TXT_DIR"), "posture") + '\\' + letter for letter in ["A", "B", "C", "D", "E"] ] for path in paths: for file in os.listdir(path): if ".feature" in file: feature_paths.append(os.path.join(path, file)) elif ".label" in file: label_paths.append(os.path.join(path, file)) # 将前4个数据作为训练集读入 X_train, y_train = load_dataset(feature_paths=feature_paths[:4], label_paths=label_paths[:4]) # 将最后一个数据作为测试集读入 X_test, y_test = load_dataset(feature_paths=feature_paths[4:], label_paths=label_paths[4:]) return X_train, X_test, y_train, y_test elif kwargs.get("stock"): # read_csv:参数一:数据源.encoding:编码格式.parse_dates:第n列解析为日期.index_col:用作索引的列编号 # sort_index:参数一:按0列排,ascending(true)升序,inplace:排序后是否覆盖原数据 data = pd.read_csv(os.path.join(DIR_dict.get("CSV_DIR"), '000777.csv'), encoding='gbk', parse_dates=[0], index_col=0) data.sort_index(0, ascending=True, inplace=True) # dayfeature:选取150天的数据 # featurenum:选取5个特征*天数 # x:记录150天的5个特征值 # y:记录涨或者跌 # data.shape[0]-dayfeature:因为我们要用150天数据做训练,对于条目为200条的数据,只有50条数据有前150天的数据来训练的,所以训练集的大小就是200-150 # 对于每一条数据,他的特征是前150天的甩有特征数据,即150*5,+1是将当天的开盘价引入作为一条特征数据 dayfeature = 150 featurenum = 5 * dayfeature x = np.zeros((data.shape[0] - dayfeature, featurenum + 1)) y = np.zeros((data.shape[0] - dayfeature)) for i in range(0, data.shape[0] - dayfeature): x[i, 0:featurenum] = np.array(data[i:i + dayfeature] \ [['收盘价', '最高价', '最低价', '开盘价', '成交量']]).reshape((1, featurenum)) x[i, featurenum] = data.iloc[i + dayfeature]['开盘价'] for i in range(0, data.shape[0] - dayfeature): if data.iloc[i + dayfeature]['收盘价'] >= data.iloc[ i + dayfeature]['开盘价']: y[i] = 1 else: y[i] = 0 return x, y elif kwargs.get("house_price"): X = [] y = [] with open(os.path.join(DIR_dict.get("TXT_DIR"), 'prices.txt'), 'r') as file: lines = file.readlines() for line in lines: items = line.strip().split(',') X.append(int(items[0])) y.append(int(items[1])) length = len(X) X = np.array(X).reshape([length, 1]) y = np.array(y) return X, y elif kwargs.get("traffic"): data = np.genfromtxt(os.path.join(DIR_dict.get("CSV_DIR"), 'traffic.csv'), delimiter=',', skip_header=True) X = data[:, 1:5] y = data[:, 5] return X, y elif kwargs.get("handwriting"): def img2vector(fileName): retMat = np.zeros([1024], int) # 定义返回的矩阵,大小为1*1024 with open(fileName) as file: lines = file.readlines() # 读取文件的所有行 for i in range(32): # 遍历文件所有行 for j in range(32): # 并将01数字存放在retMat中 retMat[i * 32 + j] = lines[i][j] return retMat def readDataSet(path): fileList = os.listdir(path) # 获取文件夹下的所有文件 numFiles = len(fileList) # 统计需要读取的文件的数目 dataSet = np.zeros([numFiles, 1024], int) # 用于存放所有的数字文件 hwLabels = np.zeros([numFiles]) # 用于存放对应的标签(与神经网络的不同) for i in range(numFiles): # 遍历所有的文件 filePath = fileList[i] # 获取文件名称/路径 digit = int(filePath.split('_')[0]) # 通过文件名获取标签 hwLabels[i] = digit # 直接存放数字,并非one-hot向量 dataSet[i] = img2vector(path + '/' + filePath) # 读取文件内容 return dataSet, hwLabels # read dataSet path = os.path.join(DIR_dict.get("TXT_DIR"), "digits") train_dataSet, train_hwLabels = readDataSet( path=os.path.join(path, 'trainingDigits')) test_dataSet, test_hwLabels = readDataSet( path=os.path.join(path, 'testDigits')) return train_dataSet, test_dataSet, train_hwLabels, test_hwLabels
def job(self, urls): client = YouGet( path=self.path if self.path else DIR_dict.get('RB_DIR')) for url in urls: client.download(url)