def lowPrice_data_main(params): write_idList(params, cursor) utils.delete_before4_localData(params["localFileName_lowPrice_idList"], params) if os.path.exists(params["localFileName_org_lowPrice_data"]): os.remove(params["localFileName_org_lowPrice_data"]) columns = ["queryDate", 'price', 'id', 'org', 'dst'] with open(params["localFileName_org_lowPrice_data"], 'a') as f_write: f_write.write(",".join(columns)) f_write.write("\n") f_write.seek(2) p = Pool(10) counter = 0 with open(params["localFileName_lowPrice_idList"], 'r') as f_read: for line in f_read: counter += 1 L = line.strip().split(',') p.apply_async(lowPrice_data, args=( params, L, )) p.close() p.join() logger.info("=====\"{}\" finished======".format( params["localFileName_org_lowPrice_data"].split('/')[-1])) utils.delete_before4_localData(params["localFileName_org_lowPrice_data"], params) utils.upload_to_hdfs(params["localFileName_org_lowPrice_data"], params["sparkDirName_org_lowPrice_data"], params)
def infoBase_data(params): columns = ["infoBase_id", "departTime", "arriveTime", "isShare"] with open(params["localFileName_org_infoBase_data"], 'w') as f: f.write(','.join(columns)) f.write('\n') for sample in cursor4.find( {'date': { '$gte': datetime.today().strftime("%Y-%m-%d") }}): infoBase_id = sample.get("_id") departtime = sample.get('origindeparttime') arrivetime = sample.get('originarrivetime') isShare = sample.get('isshare') try: content = ','.join( [infoBase_id, departtime, arrivetime, str(isShare)]) f.write(content + '\n') except: continue logger.info("====\"{}\" finished====".format( params["localFileName_org_infoBase_data"].split('/')[-1])) utils.delete_before4_localData(params["localFileName_org_infoBase_data"], params) utils.upload_to_hdfs(params["localFileName_org_infoBase_data"], params["sparkDirName_org_infoBase_data"], params)
def seatleft_data(params): columns = ['queryDatetime', 'seatLeft', 'seatLeft_id'] with open(params["localFileName_org_seatLeft_data"], 'w') as f: f.write(','.join(columns)) f.write('\n') f.seek(2) for sample in cursor3.find({}): seatLeft_id = sample.get('_id') df = pd.DataFrame.from_dict( sample.get('fc'), orient='index').reset_index().rename(columns={ 'index': 'queryDatetime', 0: 'seatLeft' }) df['seatLeft_id'] = seatLeft_id df.to_csv(params["localFileName_org_seatLeft_data"], header=False, index=False, mode='a') logger.info("====\"{}\" finished====".format( params["localFileName_org_seatLeft_data"].split('/')[-1])) utils.delete_before4_localData(params["localFileName_org_seatLeft_data"], params) utils.upload_to_hdfs(params["localFileName_org_seatLeft_data"], params["sparkDirName_org_seatLeft_data"], params)
def orgPrice_data(params): columns = ["orgPrice_id", "fc", "orgPrice"] with open(params["localFileName_org_orgPrice_data"], 'w') as f: f.write(','.join(columns)) f.write('\n') for sample in cursor1.find( {'date': { '$gte': datetime.today().strftime("%Y-%m-%d") }}): orgPrice_id = sample.get('_id') del sample['_id'] del sample["date"] del sample["ut"] del sample["src"] for key, value in sample.items(): try: orgPrice = value.get('Y').get('price') content = ','.join([orgPrice_id, key, orgPrice]) f.write(content + '\n') except: continue logger.info("====\"{}\" finished====".format( params["localFileName_org_orgPrice_data"].split('/')[-1])) utils.delete_before4_localData(params["localFileName_org_orgPrice_data"], params) utils.upload_to_hdfs(params["localFileName_org_orgPrice_data"], params["sparkDirName_org_orgPrice_data"], params)
def write_to_HDFS(df_all, params): #起飞时间大于generateDate为需要预测的线上数据,起飞时间小于generateDate为训练数据,起飞时间等于generateDate的数据不需要保留 #从所有数据中随机抽取4千万的数据,作为训练集 df_train_DNN = df_all.filter(df_all.departDate < params["generateDate_str2"])\ .drop(*list(set(params['dropFeatures'])-set(params['baseColumns'])))\ .sample(False, float("%.4f" % (6e7/df_all.count())))\ .orderBy(rand()) # df_train_DNN.cache() df_train_DNN.write.format('parquet').save(params["sparkHost"] + params["sparkDirName_trainData"], mode='overwrite') logger.info("====\"{}\" write to HDFS finished ====".format( params["sparkDirName_trainData"])) utils.delete_before4_sparkData(params["sparkDirName_trainData"], params) # df_trainSample_DNN = df_train_DNN.sample(False, float("%.4f" % (2e5/df_train_DNN.count()))) # df_trainSample_DNN.write.format('parquet').save(params["sparkHost"] + params["sparkDirName_trainSampleData"], mode='overwrite') # logger.info("====\"{}\" write to HDFS finished ====".format(params["sparkDirName_trainSampleData"])) # utils.delete_before4_sparkData(params["sparkDirName_trainSampleData"], params) df_online_DNN = df_all.filter(df_all.departDate > params["generateDate_str2"])\ .drop(*list(set(params['dropFeatures']) - set(params['baseColumns']))) df_online_DNN.write.format('parquet').save( params["sparkHost"] + params["sparkDirName_onlineData"], mode='overwrite') logger.info("====\"{}\" write to HDFS finished ====".format( params["sparkDirName_onlineData"])) utils.delete_before4_sparkData(params["sparkDirName_onlineData"], params)
def org_ticketHistory_data(params): with open(params["localFileName_org_ticketHistory_data"], 'w') as f: columns = ['queryDatetime', 'hasTicket', 'id'] f.write(','.join(columns)) f.write('\n') f.seek(0, 2) for sample in cursor_ticket.find({}): id = sample.get('_id') del sample["_id"] try: del sample["ut"] del sample["noticket"] del sample["hasticket"] except: pass df = pd.DataFrame.from_dict( sample, orient='index').reset_index().rename(columns={ 'index': 'queryDatetime', 0: 'hasTicket' }) df['id'] = id df.to_csv(params["localFileName_org_ticketHistory_data"], mode='a', header=False, index=False) logger.info("====\"{}\" finished====".format( params["localFileName_org_ticketHistory_data"].split('/')[-1])) utils.delete_before2_localData( params["localFileName_org_ticketHistory_data"], params) utils.upload_to_hdfs(params["localFileName_org_ticketHistory_data"], params["sparkDirName_org_ticketHistory_data"], params)
def delete_before2_localData(fileName, params): before2_dateStr1 = datetime.strftime( params["generateDate"] - timedelta(days=2), "%Y%m%d") pattern = re.compile(r'\d{8}') before2_fileName = re.sub(pattern, before2_dateStr1, fileName) if os.path.exists(before2_fileName): os.remove(before2_fileName) logger.info("====\"{}\" delete finished ====".format(before2_fileName))
def upload_to_hdfs(localFileName, sparkDirName, params): clientHdfs = client.InsecureClient(params["hdfsHost"], user="******") if sparkDirName.split('/')[-1] in clientHdfs.list( os.path.dirname(sparkDirName)): clientHdfs.delete(sparkDirName, recursive=True) clientHdfs.upload(sparkDirName, localFileName) logger.info("====\"{}\" upload to HDFS finished====".format( localFileName.split('/')[-1])) delete_before2_sparkData(sparkDirName, params)
def delete_before2_sparkData(fileName, params): clientHdfs = client.InsecureClient(params["hdfsHost"], user="******") before2_dateStr1 = datetime.strftime( params["generateDate"] - timedelta(days=2), "%Y%m%d") pattern = re.compile(r'\d{8}') before2_fileName = re.sub(pattern, before2_dateStr1, fileName) if before2_fileName in clientHdfs.list(os.path.dirname(fileName)): clientHdfs.delete(before2_fileName, recursive=True) logger.info("====\"{}\" delete finished ====".format(before2_fileName))
def cronTask(): logger.info("启动定时任务,间隔 {} 分钟".format(interval)) # 创建版本库对象 repo = Repo('sys.argv[1]') repo.commit('-m', '自动上传至GitHub') # 获取远程仓库 remote = repo.remote() # 推送本地修改到远程仓库 remote.push()
def _init_write_to_mongo(self): data = json.loads(self.df.to_json(orient='records')) requests = [] for sample in data: requests.append( UpdateOne({'_id': sample.get('_id')}, {'$set': sample}, upsert=True)) self.cursor.bulk_write(requests, ordered=False) logger.info("====update mongoDB finished====") self.cursor.delete_many( {'queryDate': { '$lt': self.params["yesterday_str2"] }})
def run_load_model_predict(params, dfm_params): clientHdfs = client.InsecureClient(params['hdfsHost'], user="******") fileNames = clientHdfs.list(params['sparkDirName_onlineData']) fileNames.remove('_SUCCESS') featureDict, _, _ = get_featureDict_info(params) dataParser = DataParser(params, featureDict) df_online = load_data(clientHdfs, params) df_online_index, df_online_value = dataParser.data_parser(df_online, has_label=False) deep_model_predict = DeepFM_model_predict(params, dfm_params) deep_model_predict.write_result(params, df_online, df_online_index, df_online_value) deep_model_predict.sess.close() logger.info("====\"{}\" write finished====".format( params["localFileName_deepFM_result"].split("/")[-1]))
def write_idList(params, cursor): stringIO_temp = StringIO() count = 0 with open(params["localFileName_lowPrice_idList"], 'w') as f: # 遍历mongoDB中的ID,写入文件 for sample in cursor.find(): count += 1 stringIO_temp.write(sample.get('_id') + ',') if count % 10000 == 0: content = stringIO_temp.getvalue().strip(',') + '\n' f.write(content) stringIO_temp = StringIO() content = stringIO_temp.getvalue().strip(',') + '\n' f.write(content) logger.info("====\"{}\" finished====".format( params["localFileName_lowPrice_idList"].split('/')[-1]))
def early_stop(self, df_index, df_value, y_label, epoch, counter, params): valScore, _ = self.evaluate(df_index, df_value, y_label) if self.best_valScore < valScore: self.best_valScore = valScore self.lessScores_container = [] self.best_valScore_epoch = epoch + 1 self.best_valScore_counter = counter self.saver.save(self.sess, "{}-{}".format(params['localDirName_deepFM_model'], params["generateDate_str1"]), global_step=0) else: self.lessScores_container.append(valScore) if len(self.lessScores_container) > 10: logger.info( "====deepFM train Model best_counter is {}-{}".format( self.best_valScore_epoch, self.best_valScore_counter)) self.earlyStop_info = True return self.earlyStop_info
def org_trainStation_data(params): with open(params["localFileName_org_trainStation_data"], 'w') as f: columns = [ '_id', 'jianpin', 'code', 'quanpin', 'name', 'citycn', 'latitude', 'longitude', 'citycode', 'realdistancesmap', 'site', 'distancesmap' ] f.write(','.join(columns)) f.write('\n') f.seek(0, 2) for sample in cursor_station.find({}): content = ','.join(map(str, sample.values())) f.write(content + '\n') logger.info("====\"{}\" finished====".format( params["localFileName_org_trainStation_data"].split('/')[-1])) utils.delete_before2_localData( params["localFileName_org_trainStation_data"], params) utils.upload_to_hdfs(params["localFileName_org_trainStation_data"], params["sparkDirName_org_trainStation_data"], params)
def write_DCN_featureDict(df_all, params): df = df_all.drop(*params["dropFeatures"]).drop(params['label']) featureDict = {} tc = 0 for colName in df.columns: if colName in params["numericCols"]: continue else: # colName in categoryCols uniqueFeature = df.select( colName).distinct().toPandas()[colName].astype('float').values featureDict[colName] = dict( zip(uniqueFeature, range(tc, len(uniqueFeature) + tc))) tc = tc + len(uniqueFeature) with open(params["featureDict_fileName"], 'wb') as f: pickle.dump(featureDict, f) logger.info("====\"{}\" finished ====".format( params["featureDict_fileName"].split('/')[-1])) utils.delete_before4_localData(params["featureDict_fileName"], params)
def lowPrice_online_data(params): tomorrow_str2 = datetime.strftime( params["generateDate"] + timedelta(days=1), '%Y-%m-%d') after30_str2 = datetime.strftime( params["generateDate"] + timedelta(days=30), '%Y-%m-%d') monthDay_list = pd.date_range( start=tomorrow_str2, end=after30_str2, freq='d').map(lambda x: datetime.strftime(x, '%m-%d')) columns = ["queryDate", 'price', 'id', 'org', 'dst'] with open(params["localFileName_org_lowPrice_onlineData"], 'w') as f: f.write(','.join(columns)) f.write('\n') for monthDay in monthDay_list: #对于online数据,根据id的条件筛选,然后只要拿到最大查询日期对应的价格即可(一条记录)。进一步的判断信息在spark上完成 #如果最大查询日期距离今天相隔7天以内,即该id在最近7天有价格记录,则将最大查询日期改为yesterday(模型只能通过昨天的信息,预测今天之后的价格趋势) for sample in cursor.find( {'_id': { "$regex": r'.*{}$'.format(monthDay) }}): lowPrice_id = sample.get('_id') del sample['_id'] org = sample.get('dairport') del sample['dairport'] dst = sample.get('aairport') del sample['aairport'] # df = pd.DataFrame.from_dict(sample, orient='index').reset_index().rename( # columns={'index': 'queryDate', 0: 'price'}) queryDate = max(sample.keys()) price = sample.get(queryDate) # historyLowPrice_fn_domestic中有异常,例如_id="3U3100_null_null_09-24" try: content = ','.join( [queryDate, price, lowPrice_id, org, dst]) f.write(content + '\n') except: continue logger.info("=====\"{}\" finished======".format( params["localFileName_org_lowPrice_onlineData"].split('/')[-1])) utils.delete_before4_localData( params["localFileName_org_lowPrice_onlineData"], params) utils.upload_to_hdfs(params["localFileName_org_lowPrice_onlineData"], params["sparkDirName_org_lowPrice_onlineData"], params)
def globalAirport_data(params): columns = ["Airport_code", "latitude", "longitude"] with open(params["localFileName_org_Airport_data"], 'w') as f: f.write(','.join(columns)) f.write('\n') for sample in cursor2.find({}): Airport_code = sample.get("_id") latitude = sample.get("latitude") longitude = sample.get("longitude") try: content = ','.join([Airport_code, latitude, longitude]) f.write(content + '\n') except: continue logger.info("====\"{}\" finished====".format( params["localFileName_org_Airport_data"].split('/')[-1])) utils.delete_before4_localData(params["localFileName_org_Airport_data"], params) utils.upload_to_hdfs(params["localFileName_org_Airport_data"], params["sparkDirName_org_Airport_data"], params)
def initialize(): """ 初始化函数,在项目每次启动都杀掉项目中已有的进程,然后重新根据任务状态重新启动。重启之后之前的进程会退出销毁。 :return: """ logger.info("start init function") task_list = list(mongodb.TASK.find()) if len(task_list) == 0: return 'no tasks need init' for task in task_list: pid = task.get('pid') if pid: try: os.kill(pid, signal.SIGKILL) logger.info('initialize function kill pid:{} success'.format(pid)) except Exception as e: logger.info('initialize function kill pid:{} failed'.format(pid)) logger.info(e) mongodb.TASK.find_one_and_update({'_id': task['_id']}, {'$set': {'pid': ''}}) logger.info("finish init") return 'initialize success'
def generate_test_data(params): df_train_DNN = spark.read.format('parquet').load( params["sparkHost"] + params["sparkDirName_trainData"]) df_train_DNN_test = df_train_DNN.filter(col('queryDate') < valDate) df_train_DNN_test.repartition(200).write.format('parquet').save( params["sparkHost"] + params["sparkDirName_trainData_test"], mode='overwrite') logger.info("====\"{}\" write to HDFS finished ====".format( params["sparkDirName_trainData_test"])) df_trainSample_DNN_test = df_train_DNN_test.sample( False, float("%.4f" % (2e5 / df_train_DNN_test.count()))) df_trainSample_DNN_test.write.format('parquet').save( params["sparkHost"] + params["sparkDirName_trainSampleData_test"], mode='overwrite') logger.info("====\"{}\" write to HDFS finished ====".format( params["sparkDirName_trainSampleData_test"])) df_val_DNN_test = df_train_DNN.filter(col('queryDate') == valDate) df_val_DNN_test.write.format('parquet').save( params["sparkHost"] + params["sparkDirName_valData_test"], mode='overwrite') logger.info("====\"{}\" write to HDFS finished ====".format( params["sparkDirName_valData_test"]))
def seatleft_data_add(params): tomorrow_str2 = datetime.strftime( params["generateDate"] + timedelta(days=1), '%Y-%m-%d') today_monthDay = datetime.strftime(params["generateDate"], '%m-%d') yesterday_str2 = (params["generateDate"] - timedelta(days=1)).strftime('%Y-%m-%d') after30_str2 = datetime.strftime( params["generateDate"] + timedelta(days=30), '%Y-%m-%d') monthDay_list = pd.date_range( start=yesterday_str2, end=after30_str2, freq='d').map(lambda x: datetime.strftime(x, '%m-%d')).to_list() monthDay_list.remove(today_monthDay) columns = ['queryDatetime', 'seatLeft', 'seatLeft_id'] with open(params["localFileName_org_seatLeft_data_add"], 'w') as f: f.write(','.join(columns)) f.write('\n') f.seek(2) for monthDay in monthDay_list: for sample in cursor3.find( {'_id': { "$regex": r'.*{}$'.format(monthDay) }}): seatLeft_id = sample.get('_id') df = pd.DataFrame.from_dict( sample.get('fc'), orient='index').reset_index().rename(columns={ 'index': 'queryDatetime', 0: 'seatLeft' }) df['seatLeft_id'] = seatLeft_id df.to_csv(params["localFileName_org_seatLeft_data_add"], header=False, index=False, mode='a') logger.info("====\"{}\" finished====".format( params["localFileName_org_seatLeft_data_add"].split('/')[-1])) utils.delete_before4_localData( params["localFileName_org_seatLeft_data_add"], params) utils.upload_to_hdfs(params["localFileName_org_seatLeft_data_add"], params["sparkDirName_org_seatLeft_data_add"], params)
def timer(task_id): """ mongo是线程安全的,然后进程传递会有警告.查询说是可以设置参数connect=Flase,但是好像没有啥卵用。 定时函数。 1.告警轮询任务:根据规则的需要能够根据指定分钟间隔执行某种逻辑功能。 2.周期数据统计任务:根据设置配置的crontab规则,执行需要查询的规则。 1.type=1,默认执行原本的轮训监控任务 :return: """ pid = os.getpid() logger.info('task run pid:{}'.format(pid)) # 记录该任务的执行进程,并记录,然后control process就可以不走判断,pass,然后该进程下定时任务定期轮询。 db.tasks.find_one_and_update({'_id': task_id}, {'$set': {'pid': pid}}) task_config = db.tasks.find_one({'_id': task_id}) # 该进程中执行的轮询任务的相关配置信息,可以对一个ObjectId转ObjectId型 type = task_config.get('type') # 如果type为1,触发的是周期告警轮询任务 if type == 1: timeCell = task_config.get('timeCell') # 轮询周期 # logger.info('task timeCell:{}'.format(timeCell)) sched = BlockingScheduler() sched.add_job(logAlert_update_info, 'interval', seconds=timeCell*60, args=(task_id,)) logger.info('logAlert_update_info function will start,task_id is {}, timeCell is {}min'.format(task_id, timeCell)) sched.start() # 周期统计类型,利用icon进行指定时间周期指执行统计,将项目里面配置的所有查询参数,查询汇总到邮件里直接发送。没有短信的选项了。 elif type == 2: interval = task_config.get('interval') # 按照不同的时间周期,有三种,日,周,月,需要根据这个判断查询的起始和结束时间 crontab = task_config.get('crontab') logger.info(crontab) # 按照crontab配置规则,决定统计的周期等参数 sched = BlockingScheduler() sched.add_job(count_info_interval, CronTrigger.from_crontab(crontab), args=(task_id,)) logger.info('count_info_interval will start and interval = {},task_id is {}'.format(interval, task_id)) sched.start() # 统计指定时间间隔的日志错误数量,不是轮询任务,运行一次获取到相关参数就结束统计 elif type == 3: logger.info('count_info_once will start, task_id is {}'.format(task_id)) count_once_info(task_id)
def lowPrice_train_data_add(params): columns = ["queryDate", 'price', 'id', 'org', 'dst'] yesterday_monthDay_str = (params["generateDate"] - timedelta(days=1)).strftime('%m-%d') with open(params["localFileName_org_lowPrice_data_add"], 'w') as f: f.write(','.join(columns)) f.write('\n') f.seek(2) for sample in cursor.find( {'_id': { "$regex": r'.*{}$'.format(yesterday_monthDay_str) }}): lowPrice_id = sample.get('_id') del sample['_id'] org = sample.get('dairport') del sample['dairport'] dst = sample.get('aairport') del sample['aairport'] df = pd.DataFrame.from_dict( sample, orient='index').reset_index().rename(columns={ 'index': 'queryDate', 0: 'price' }) df['id'] = lowPrice_id df['org'] = org df['dst'] = dst df.to_csv(params["localFileName_org_lowPrice_data_add"], header=False, index=False, mode='a') logger.info("=====\"{}\" finished======".format( params["localFileName_org_lowPrice_data_add"].split('/')[-1])) utils.delete_before4_localData( params["localFileName_org_lowPrice_data_add"], params) utils.upload_to_hdfs(params["localFileName_org_lowPrice_data_add"], params["sparkDirName_org_lowPrice_data_add"], params)
def org_TrainPass_data(params): with open(params["localFileName_org_TrainPass_data"], 'w') as f: columns = [ 'tn', 'arriveTime', 'stationCode', 'departTime', 'stationName', 'orderNum' ] f.write(','.join(columns)) f.write('\n') f.seek(0, 2) for sample in cursor_pass.find({}): tn = sample.get("_id") List = sample.get("array") counter = 0 for sub in List: counter += 1 content = ','.join([tn] + list(map(str, sub.values())) + [str(counter)]) f.write(content + '\n') logger.info("====\"{}\" finished====".format( params["localFileName_org_TrainPass_data"].split('/')[-1])) utils.delete_before2_localData(params["localFileName_org_TrainPass_data"], params) utils.upload_to_hdfs(params["localFileName_org_TrainPass_data"], params["localFileName_org_TrainPass_data"], params)
def startCronTask(task, **interval_config): # 定义全局变量scheduler,用于控制定时任务的启动和停止 global scheduler scheduler = BlockingScheduler() scheduler.add_listener(CronTask_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) scheduler._logger = logger logger.info( '==================================== 新的日志分段 ==============================================' ) scheduler.add_job(func=task, trigger='interval', **interval_config, id='push_to_github') logger.info('当前所有定时任务job1:%s', scheduler.get_jobs()) logger.info('定时任务调度器状态1:%s', scheduler.state) scheduler.start()
def judge(self,receiveRequest,userName,pwd): try: nowUser = MyBaseModel.returnList(User.select(User.userName,User.userPwd).where(User.userName==userName)) if len(nowUser) > 0: if nowUser[0]['userPwd'] == pwd: my_session = Session(receiveRequest) session_id = my_session.getSessionId() my_session['name'] = userName logger.info('用户:%s 登录' % userName) return {"status": 1,'session_id': session_id,'username':userName} else: logger.info('用户:%s 登录失败,原因:密码错误' % userName) return {"status": 0, "errorInfo": "密码错误"} else: logger.info('用户:%s 登录失败,原因:该用户不存在' % userName) return {"status": 0, "errorInfo": "该用户名不存在"} except: raise
def change(self, user_name, user_oldpwd, user_newpwd1, user_newpwd2): try: with db.execution_context(): nowuser = new_users.select().where( new_users.username == user_name) if len(nowuser) > 0: if nowuser[0].userpass == user_oldpwd: #user_roles=getNowUserRole(user_name) if user_newpwd1 == user_newpwd2: nowuser[0].userpass = user_newpwd1 nowuser[0].save() logger.info('用户:%s 修改密码成功' % user_name) return json.dumps({ "status": 1, 'info': '修改密码成功' }, ensure_ascii=False) else: return json.dumps( { "status": 0, "errorInfo": "两次输入的新密码不一致" }, ensure_ascii=False) else: logger.info('用户:%s 修改密码失败,原因:原密码输入错误' % user_name) return json.dumps({ "status": 0, "errorInfo": "原密码不正确" }, ensure_ascii=False) else: logger.info('用户:%s 修改密码失败,原因:该用户不存在' % user_name) return json.dumps({ "status": 0, "errorInfo": "修改失败,该用户不存在" }, ensure_ascii=False) except: raise
def judge(self, response_self, user_name, user_pwd): try: nowuser = MyBaseModel.returnList( new_users.select().where(new_users.username == user_name)) if len(nowuser) > 0: if nowuser[0]['userpass'] == user_pwd: user_roles = getUserRole(user_name) #user_roles=getNowUserRole(user_name) my_session = Session(response_self) session_id = my_session.get_session_id() my_session['name'] = user_name logger.info('用户:%s 登录' % user_name) return json.dumps( { "status": 1, 'data': user_roles, 'session_id': session_id }, ensure_ascii=False) else: logger.info('用户:%s 登录失败,原因:密码错误' % user_name) return json.dumps({ "status": 0, "errorInfo": "密码错误" }, ensure_ascii=False) else: logger.info('用户:%s 登录失败,原因:该用户不存在' % user_name) return json.dumps({ "status": 0, "errorInfo": "该用户名不存在" }, ensure_ascii=False) except: raise
dataParser = DataParser(params, featureDict) gc.collect() fileNames = clientHdfs.list(params['sparkDirName_trainData']) # fileNames = os.listdir(params['sparkDirName_trainData']) fileNames.remove('_SUCCESS') fileNames_num = len(fileNames) dfm = deepFM.DeepFM(**dfm_params) train_scores = [] val_scores = [] train_losses = [] val_losses = [] for epoch in range(params['epoches']): counter = 0 for fileName in fileNames: counter += 1 train_model(fileName, params, dataParser) # logger.info("====Train {}-{} finished====".format(epoch, counter)) if counter % 100 == 0: dfm.saver.save(dfm.sess, "{}-{}-{}".format( params['localDirName_deepFM_model'], params['generateDate_str1'], epoch + 1), global_step=counter) dfm.sess.close() logger.info("====deepFM Train Model finished====") load_model_to_predict.run_load_model_predict(params, dfm_params) write_result_to_mongo.Result_To_Mongo(params)
def count_info_interval(task_id): """ 针对设置的参数定时统计数据,然后可以系统内可以控制添加和移除查询参数,也可以关闭项目的统计任务,不需要进行短信通知,只邮件发送。 :param task_id: :return: """ logger.info('count info interval task id is {}'.format(task_id)) task_config = db.tasks.find_one({'_id': ObjectId(task_id)}) name = task_config.get('name') app = task_config.get('app') interval = task_config.get('interval') if interval == 1: # 当前日期前一天的起始和结束时间 start_time, end_time = get_yesterday_timestamp() elif interval == 7: # 当前日期的前一周的起始和结束时间 start_time, end_time = get_last_week_times() else: # 当前日期的前一个月的起始和结束时间 start_time, end_time = get_last_month_times() URL = 'http://test.yuxisoft.cn:19200/logstash-{}-*/doc/_search'.format(app) person = task_config.get('person') params = task_config.get('params') # way = task_config.get('way') _range = {"range": { "@timestamp": { "gt": "{}".format(start_time), "lt": "{}".format(end_time) } } } # 统计任务,汇总同一个项目下的不同查询规则的参数,汇总所有查询结果,然后统一一个邮件发送。 content = '' for params in params: filters = convert_params(params) filters.insert(0, _range) query_params = { "size": 1000, "sort": { "@timestamp": "desc" }, "query": { "bool": { "filter": filters } } } logging.info('-------------') logging.info('query params:') logging.info('{}'.format(query_params)) logging.info('-------------') headers = { 'Content-Type': 'application/json' } query_params = json.dumps(query_params) result = requests.post(URL, headers=headers, data=query_params) resp_str = result.text resp_conn = json.loads(resp_str) logging.info('-------------') logging.info('query from elk result:') logging.info('{}'.format(resp_conn)) logging.info('-------------') total = resp_conn['hits']['total'] content = content + "查询规则:{}, 出现次数:{} 次.\n".format(params, total) start_time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time/1000)) end_time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time/1000)) logger.info('project:{}count info interval will send email'.format(app)) result = send_email3(person, name, content, start_time_str, end_time_str, interval) logger.info('count info interval send email result is {}'.format(result))