def formatError(dirName,errorDir): files=os.listdir(dirName) for file in files: absPath=dirName+file spider_say("Helperl.formatError>>>%s" % absPath) dn=file.split(".")[0] if re.match("^"+dn+"([.]\d{8}){2}([.]\d{2,3})+\d+\.(CHECK|VAL|DAT)(\.gz)?$",file) is None: move(absPath,errorDir)
def isExist(path): boolean=True [dirname,filename]=os.path.split(path) if os.path.exists(path)==True: spider_say("%s文件存在" % (filename)) else: spider_say("%s文件不存在" % (filename)) boolean=False return boolean
def deleteEmpty(dirName,errorDir): files=os.listdir(dirName) for file in files: absPath=dirName+file spider_say("Helperl.deleteEmpty>>>%s" % absPath) if isExist(absPath)==True: if get_FileSize(absPath)==0: spider_say("%s 是空文件,文件大小为0,被移动到错误目录..."%file) move(absPath,errorDir)
def scan_file(path, ftpobj, dtype): log = "bigdata_day" + Helper.dateNow(0) + ".log" files = fileValid(path, dtype) for file in files: absPath = path + file spider_say(absPath) if read_log(file, log) == True: write_log(file, log) ftp_upload(ftpobj, file, absPath, dtype)
def ftpconnect(): ftpobj = None try: ftpobj = ftplib.FTP(conStr['host']) ftpobj.login(conStr['user'], conStr['password']) msg = ftpobj.getwelcome() spider_say(msg) except Exception as e: spider_say(e) return ftpobj
def notConf(dirName,errorDir): files=os.listdir(dirName) for file in files: absPath=dirName+file datName=file.split(".")[0] spider_say("Helperl.notConf>>>%s"%absPath) if datName not in jt.getDAPX("day"): if datName not in jt.getDAPX("hs_data"): spider_say("%s 并不属于配置中的文件,可能是错误文件,将被移动到错误目录..."%file) move(absPath,errorDir)
def datDate(path): files = os.listdir(path) for f in files: absPath = path + f spider_say("datDate>>>%s" % absPath) dn = f.split('.')[0] d1 = f.split('.')[1] d2 = f.split(".")[2] dc = Helper.dateOper(d1, d2) #日期差 if dc == 1 or dc == 0: if Helper.dateNow(-1) == d2: spider_say("%s--当前账期是%s,文件的账期是%s ,[%s],文件可以上传" % (f, Helper.dateNow(-1), d2, dc)) #Helper.move(absPath,paths["jt_day"]) elif dc == 2: if dn in jt.getDAPX("hs_data"): #pass # spider_say("%s--当前账期是%s,文件的账期是%s ,[%s],华盛可以上传" %(f,Helper.dateNow(-1),d2,dc)) # Helper.move(absPath,paths["jt_day"]) d3 = Helper.dateNow(0) #当前日期 if Helper.dateOper(d3, d1) == 1 and Helper.dateOper(d3, d2) == 1: pass #这种情况,仅做保留,不做挪动,也不上传 elif Helper.dateOper(d3, d1) == 0 and Helper.dateOper(d3, d2) == 2: pass #这种情况,直接上传 else: spider_say("%s--非合理账期内数据,将被移动到错误目录" % f) Helper.move(absPath, paths["day_err"]) else: spider_say("%s--非合理账期内数据,将被移动到错误目录" % f) Helper.move(absPath, paths["day_err"])
def readVal(file): #如果读取的文件是VAL文件 obj = Fileinfo() if os.path.splitext(file)[1] == ".VAL": with open(file, "r") as f: line = f.readline() obj.setDatName(line.split("")[0]) obj.setSize(line.split("")[1]) obj.setRow(line.split("")[2]) obj.setDatDate(line.split("")[3]) obj.setTime(line.split("")[4]) if obj.getRow() == "0" or obj.getSize() == "0": spider_say("文件有误不能上传--记录数为0或文件大小为0") return obj
def formatSize(bytes): try: bytes = float(bytes) kb = bytes / 1024 except: spider_say("传入的字节格式不对") return "Error" if kb >= 1024: M = kb / 1024 if M >= 1024: G = M / 1024 return "%fG" % (G) else: return "%fM" % (M) else: return "%fkb" % (kb)
def checkRPT(path): spider_say("现在开始检查回执文件......") narmal = {} files = os.listdir(path) if len(files) != 0: for file in files: absPath = path + file if Helper.dateOper(Helper.getFileCreateTime(absPath), Helper.dateNow(0)) == 0: #只搜索当前日期的回执文件 if re.match("\w*([.]\d+)+([.]\d+)+([.]\w+)+\.(RPT|ERR)$", file) is not None: #只搜索rpt和err文件 if file.split(".")[0] in jt.getDAPX("day"): #只搜索与属于配置文件的 #spider_say("%s--%s---%s"%(file,Helper.getFileCreateTime(absPath),os.path.splitext(file)[1])) if file.split(".")[0] not in narmal.keys(): if os.path.splitext(file)[1] == ".RPT": narmal[file.split(".")[0]] = "正常" else: narmal[file.split(".")[0]] = "异常" #if len(narmal)==len(jt.getDAPX("day")): for (k, v) in narmal.items(): spider_say(k + "---" + v) ncount = 0 fcount = 0 for v in narmal.values(): if v == "正常": ncount = ncount + 1 if v == "异常": fcount = fcount + 1 spider_say("%d个回执全部收到,其中正常[%d],异常[%d]" % (len(narmal), ncount, fcount))
def arthropoda(path): # 0.查看回执文件 check.check_day() #check.check_month() # 1. 删除大小为0的空文件 Helper.deleteEmpty(path, paths["day_err"]) # 2. 剔除名称不属于配置的文件 Helper.notConf(path, paths["day_err"]) # 3. 剔除名称不符合规范的文件 Helper.formatError(path, paths["day_err"]) # 4. 剔除非账期的文件 datDate(path) files = os.listdir(path) #读取目录中的所有文件 if len(files) != 0: #如果目录中有文件 spider_say("=====扫描到 %s 个文件" % (len(files))) for f in files: absPath = path + f if Helper.isExist(absPath) == True: if re.match('^.*?\.(CHECK|VAL|gz)$', f) is not None: if os.path.splitext(f)[1] == ".CHECK": #如果是CHECK文件 checkFileOper(absPath) else: spider_say("=====扫描到 %s 个文件" % (len(files)))
def move(file,toPath): [dirname,filename]=os.path.split(file) if not os.path.isfile(file): spider_say("%s 不是一个文件" %(file)) else: if not os.path.exists(toPath): os.mkdir(toPath) if os.path.exists(toPath+filename)==False: spider_say("%s-->%s" %(file,shutil.move(file,toPath))) else: spider_say("%s 文件已经存在,不能再传" % (toPath+filename))
def checkRPT(path,*dM): msg='' msg+=(str(datetime.now())+"现在开始检查"+dM[0]+"回执文件......\n") spider_say(msg) narmal={} files=os.listdir(path) if len(files)!=0: for file in files: absPath=path+file if re.match("\w*([.]\d+)+([.]\d+)+([.]\w+)+\.(RPT|ERR)$",file)is not None: #只搜索rpt和err文件 if dM[0]=='day': if Helper.dateOper(Helper.getFileCreateTime(absPath),Helper.dateNow(0))==0:#只搜索当前日期的回执文件 if file.split(".")[0] in jt.getDAPX("day"):#只搜索与属于配置文件的 if file.split(".")[0] not in narmal.keys(): if os.path.splitext(file)[1]==".RPT": narmal[file.split(".")[0]]="正常" else: narmal[file.split(".")[0]]="异常" elif dM[0]=='month': if file.split(".")[2]==Helper.monthNow(-1):#只搜索M-1的回执文件 if file.split(".")[0] in jt.getDAPX("month"):#只搜索与属于配置文件的 if file.split(".")[0] not in narmal.keys(): if os.path.splitext(file)[1]==".RPT": narmal[file]="正常" else: narmal[file]="异常" for (key,value) in narmal.items(): str1="{key}:{value}".format(key=key,value=value) spider_say(str1) msg+=str1+'\n' ncount=0 fcount=0 for v in narmal.values(): if v=="正常": ncount=ncount+1 if v=="异常": fcount=fcount+1 str2=str(datetime.now())+"当前收到%d个回执文件,其中正常[%d],异常[%d]"%(len(narmal),ncount,fcount) spider_say(str2) msg+=str2+'\n' write2log_m(msg,"check.log")
#!/usr/bin/env python #-*- coding=utf-8 -*- #filename: monitor_dir.py import os import time from mylog import spider_say #monitor_dir = "E:/PUT_JT_DATA/The_BWT_Data/DayData/" monitor_dir = "E:/PUT_JT_DATA/The_BWT_Data/DayRPT/" now_file = dict([(f, None) for f in os.listdir(monitor_dir)]) while True: new_file = dict([(f, None) for f in os.listdir(monitor_dir)]) added = [f for f in new_file if not f in now_file] removed = [f for f in now_file if not f in new_file] if added: spider_say("\n Added: %s" % (",".join(added))) if removed: spider_say("\n Removed: %s" % (",".join(removed))) now_file = new_file
str1="{key}:{value}".format(key=key,value=value) spider_say(str1) msg+=str1+'\n' ncount=0 fcount=0 for v in narmal.values(): if v=="正常": ncount=ncount+1 if v=="异常": fcount=fcount+1 str2=str(datetime.now())+"当前收到%d个回执文件,其中正常[%d],异常[%d]"%(len(narmal),ncount,fcount) spider_say(str2) msg+=str2+'\n' write2log_m(msg,"check.log") def check_day(): checkRPT('E:/PUT_JT_DATA/The_BWT_Data/DayRPT/','day') def check_month(): checkRPT('E:/PUT_JT_DATA/The_BWT_Data/MonthRPT/','month') if __name__=='__main__': try: while True: time.sleep(10*60) check_day() check_month() except Exception as e: spider_say(e)
def log(): day=jt.getDAPX("day") spider_say("|*日文件共%d个 当前已上传%d" %(len(day),2))
if file.split(".")[0] not in narmal.keys(): if os.path.splitext(file)[1] == ".RPT": narmal[file.split(".")[0]] = "正常" else: narmal[file.split(".")[0]] = "异常" #if len(narmal)==len(jt.getDAPX("day")): for (k, v) in narmal.items(): spider_say(k + "---" + v) ncount = 0 fcount = 0 for v in narmal.values(): if v == "正常": ncount = ncount + 1 if v == "异常": fcount = fcount + 1 spider_say("%d个回执全部收到,其中正常[%d],异常[%d]" % (len(narmal), ncount, fcount)) if __name__ == '__main__': try: while True: spider_say("代号蜘蛛:Start Job......") time.sleep(10 * 60) arthropoda(paths["hs_day"]) arthropoda(paths["ods_day"]) spider_say("代号蜘蛛:Finish Job...") except Exception as e: spider_say(e)
def prn_obj(obj): spider_say("----------------------------------------------------------------") spider_say ('\n'.join(['%s:%s' % item for item in obj.__dict__.items()])) spider_say("----------------------------------------------------------------")
def getDocSize(path): try: size = os.path.getsize(path) spider_say( formatSize(size)) except Exception as err: spider_say(err)