def Run(setLink): global dayTimeStart dayTimeStart = Tools.InitTimeSet() for i in range(1, 5): GenMergeIC(setLink + DLSet.link_Feature_I % i, setLink + DLSet.link_dfUIC_Label % i, setLink + DLSet.link_Feature_IC % i)
def Main(): global dayTimeStart dayTimeStart = Tools.InitTimeSet() print('start') str = 'OrgSet' CalCRTNum(DLSet.link_ChoiceSet % str + DLSet.link_orgData) """
def CutData(dataLink, storeLink): global dayTimeStart batch = 0 for df in pd.read_csv(open(dataLink, 'r'), header=None, names=DLSet.orgDataHead, dtype=DLSet.orgDataType, chunksize=100000): try: # 初始化数据集合 daySet = {} for i in range(9): daySet[i] = [] # 划分至不同日期集合里 for index, row in df.iterrows(): idx = Tools.UpperBound(dayTimeStart, row['timeStamp']) - 1 if idx < 0 or idx > 8: continue aList = [] for i in range(5): aList.append(row[i]) daySet[idx].append(aList) # 存储至各个文件之中 for i in range(9): with open(storeLink % (i + 1), 'a') as f: for each in daySet[i]: for k in range(4): f.write(str(each[k]) + ',') f.write(str(each[4]) + '\n') batch += 1 print('chunk %d done.' % batch) # break except StopIteration: print("finish data process") break
def Run(setLink): global dayTimeStart dayTimeStart = Tools.InitTimeSet() for i in range(1, 5): GenMergeC(setLink + DLSet.link_dayX2Y, setLink + DLSet.link_Feature_C % i, i)
def Run(setLink): global dayTimeStart dayTimeStart = Tools.InitTimeSet() CutData(setLink + DLSet.link_orgData, setLink + DLSet.link_dayN)