def get(self): try: parser = reqparse.RequestParser() parser.add_argument('token', type=str, required=True) parser.add_argument('fileUid', type=str, required=True) args = parser.parse_args() if not tokenValidator(args['token']): return { "status": "error", "msg": "token error", "data": {} }, 401 args.pop('token') logging.info(f'[API_getFileParameter] args:{args}') fid = args['fileUid'] _, _, _, _, _, preprocessActionFile = getFileInfo(fid)[0] if preprocessActionFile: with open(preprocessActionFile) as file: action = json.load(file) else: return { "status": "error", "msg": f"file {fid} doesnt have related preprocessing records", "data": {} }, 400 return {"status": "success", "msg": "", "data": action}, 200 except Exception as e: logging.error(f"[API_getModelParameter]{traceback.format_exc()}") return { "status": "error", "msg": str(traceback.format_exc()), "data": {} }, 400
def post(self): ''' @ fileUid: file id @ tokenstr: keypair1 @ tokenint: keypair2 ''' fName = '[API_getCol]' parser = reqparse.RequestParser() parser.add_argument('fileUid', type=str, required=True) parser.add_argument('token', type=str, required=True) args = parser.parse_args() fid = args['fileUid'] token = args['token'] #check token if not tokenValidator(token): return {"status": "error", "msg": "token error", "data": {}}, 401 args.pop('token') logging.debug(f"[API_getCol] args: {args}") try: fileInfo = getFileInfo(fid) except Exception as e: logging.error(f'{fName}{e}') return {'status': 'error', 'msg': str(e), 'data': {}}, 400 fileInfo = fileInfo[0] filePath = fileInfo[3] dataType = fileInfo[1] try: gct = getColType(filePath, dataType).get() except Exception as e: logging.error(f'{fName}{e}') return {'status': 'error', 'msg': str(e), 'data': {}}, 400 return {'status': 'success', 'msg': '', 'data': {"cols": gct}}, 200
def post(self): ''' @ type: num/cv/nlp @ file: a file @ tokenstr: keypair1 @ tokenint: keypair2 ''' parser = reqparse.RequestParser() parser.add_argument('fileUid', type=str, required=True) parser.add_argument('token', type=str, required=True) args = parser.parse_args() fid = args['fileUid'] token = args['token'] #check token if not tokenValidator(token): return {"status": "error", "msg": "token error", "data": {}}, 401 try: fileInfo = getFileInfo(fid) except Exception as e: logging.error(f'[Delfile]{e}') return {'status': 'error', 'msg': str(e), 'data': {}}, 400 fileInfo = fileInfo[0] if fileInfo[3] == 1: logging.warning(f'[API_DelFile] file {fid} in use') return { "status": "error", "msg": "The file is in-used", "data": {} }, 400 filePath = fileInfo[2] dataType = fileInfo[1] actionFile = fileInfo[5] if dataType == 'cv': shutil.rmtree(filePath) else: os.remove(filePath) if actionFile: os.remove(actionFile) try: db = sql() db.cursor.execute(f"delete from files where fid='{fid}'") db.conn.commit() except Exception as e: logging.error(f"[API_DelFile] {e}") finally: db.conn.close() logging.info(f"[API_DelFile] OK with file uid {fid}") return {"status": "success", "msg": "", "data": {}}, 201
def get(self): try: parser = reqparse.RequestParser() parser.add_argument('fileUid', type=str, required=True) # parser.add_argument('fileName',type=str) # parser.add_argument('tokenstr',type=str,required=True) # parser.add_argument('tokenint',type=int,required=True) args = parser.parse_args() logging.debug(f"[API_Download] args: {args}") fileUid = args['fileUid'] # fileName=args['fileName'] # tokenstr=args['tokenstr'] # tokenint=args['tokenint'] # #check token # if not tokenValidator(tokenstr,tokenint): # return {"status":"error","msg":"token error","data":{}},201 try: fileInfo = getFileInfo(fileUid) except Exception as e: logging.error(f'[API_Download]{e}') return {'status': 'error', 'msg': str(e), 'data': {}}, 400 #fileInfo=fileInfo[0] logging.debug(f'[API_Download] FileInfo: {fileInfo}') if len(fileInfo) == 0: logging.debug("[API_Download] file not found") abort(404) table = fileInfo[0] if table[1] == 'cv': filepath = table[2] + '.zip' shutil.make_archive(table[2], 'zip', table[2]) filetype = '.zip' else: filepath = table[2] filetype = filepath[filepath.rfind('.'):] with open(filepath, 'rb') as file: data = file.read() if filetype == '.zip': os.remove(filepath) headers = {} # if fileName==None: # fileName=fileUid headers['Content-Type'] = 'application/octet-stream' # headers['Content-Disposition'] = 'attachment; filename='+fileName+filetype # return {"status":"success","msg":"","data":data},200 return make_response(data, 200, headers) except Exception as e: logging.error(f'[API_Download]{e}') return {"status": "error", "msg": str(e), "data": {}}, 400
def __init__(self, fid, action): try: self.params = params() self.fid = fid self.action = action fid, self.dataType, self.path, self.numFile, status, actionFile = getFileInfo( self.fid)[0] self.colType = getColType(self.numFile, self.dataType).get() self.df = getDf(self.numFile, self.dataType).get() self.data = {} for c in self.colType: self.data[c['name']] = { 'colType': c['type'], 'classifiable': c['classifiable'], 'do': False } self.data[c['name']]['data'] = np.asarray(self.df[c['name']]) #self.data[c['name']]['missingFiltering']=None #self.data[c['name']]['outlierFiltering']=None #self.data[c['name']]['normalize']=None #self.data[c['name']]['stringCleaning']=None self.data[c['name']]['do'] = False # self.data={"col1":{"type":"int","action":action,"data":data}} for c in self.action: if c['col'] in self.data: self.data[ c['col']]['missingFiltering'] = c['missingFiltering'] self.data[ c['col']]['outlierFiltering'] = c['outlierFiltering'] self.data[c['col']]['normalize'] = c['normalize'] self.data[c['col']]['stringCleaning'] = c['stringCleaning'] #self.data[c['col']]['data']=np.asarray(self.df[c['col']]) self.data[c['col']]['do'] = True for col in self.data: if 'data' not in self.data[col]: pass okCount = 0 for k, v in self.data.items(): if 'data' in v: okCount += 1 break if okCount == 0: raise Exception( f"[Preprocess Init] actionCol and fileCol not corresponed at all" ) except Exception as e: raise Exception(f"[Preprocess Init]{traceback.format_exc()}")
def __init__(self, fid, algoName, friendlyName): self.fid = fid self.algoName = algoName self.friendlyName = friendlyName _, self.dataType, self.path, self.numFile, _, _ = getFileInfo( self.fid)[0] colType = getColType(self.numFile, self.dataType).get() self.colType = {} for d in colType: self.colType[d['name']] = d['type'] self.df = getDf(self.numFile, self.dataType).get() toDrop = [] for k, v in self.colType.items(): if v != 'int' and v != 'float': toDrop.append(k) self.df = self.df.drop(columns=toDrop) if len(self.df.columns.tolist()) == 0: raise NoDataException( "[Correlation] No numerical columns in this file") self.corr = None self.component = None
def post(self): try: parser=reqparse.RequestParser() parser.add_argument('token',type=str,required=True) parser.add_argument('modelUid',type=str,required=True) parser.add_argument('fileUid',type=str,required=True) parser.add_argument('preprocess',type=int,required=True) args=parser.parse_args() if not tokenValidator(args['token']): return {"status":"error","msg":"token error","data":{}},401 args.pop('token') logging.info(f'[API_doModelPredict] args:{args}') mid=args['modelUid'] fid=args['fileUid'] _,modelFid,_,_,_,status,_,_=getModelInfo(mid)[0] if status!='success': return {"status":"error","msg":f"model {mid} is still training or failed. Can't predict","data":{}},400 preprocessedFid="None" if args['preprocess']==1: _,_,_,_,_,preprocessActionFile=getFileInfo(modelFid)[0] if preprocessActionFile: with open(preprocessActionFile) as file: action=json.load(file) preprocessedFid=preprocessCore(fid,action).do() fid=preprocessedFid with open(os.path.join(param.modelpath,mid,'algoInfo.pkl'),'rb') as file: algoInfo=pickle.load(file) module=importlib.import_module(f"service.analyticService.core.analyticCore.{algoInfo['dataType']}.{algoInfo['projectType']}.{algoInfo['algoName']}") importlib.reload(module) attr=getattr(module,algoInfo['algoName']) algo=attr(algoInfo,fid,'predict',mid=mid) algo.predictWrapper() predictedFid=algo.predict() return {"status":"success","msg":"","data":{"preprocessedFileUid":preprocessedFid,"predictedFileUid":predictedFid}},200 except Exception as e: logging.error(f"[API_doModelPredict]{traceback.format_exc()}") return {"status":"error","msg":str(traceback.format_exc()),"data":{}},400
def post(self): ''' @ fileUids: file id @ tokenstr: keypair1 @ tokenint: keypair2 ''' fName = 'getFileStatus' parser = reqparse.RequestParser() parser.add_argument('fileUids', type=str, required=True) parser.add_argument('token', type=str, required=True) args = parser.parse_args() #logging.info(f"[API_getFileStatus] args: {args}") fids = args['fileUids'] token = args['token'] fids = json.loads(fids) #check token if not tokenValidator(token): return {"status": "error", "msg": "token error", "data": {}}, 401 args.pop('token') #logging.info(f"[API_getFileStatus] args: {args}") try: fileInfo = [getFileInfo(fid)[0] for fid in fids] except Exception as e: logging.error(f'[API_{fName}]{e}') return {'status': 'error', 'msg': str(e), 'data': {}}, 400 fileInfo = [f[4] for f in fileInfo] #logging.debug(f"{fileInfo}") #logging.debug(f'[API_{fName}]{json.dumps(fileInfo)}') return { "status": "success", "msg": "", "data": { "status": fileInfo } }, 200
def __init__(self, algoInfo, fid, action='train', mid=None, testLabel=None): try: self.action = action # 'train' / 'preview' / 'test' / 'predict' self.algoInfo = algoInfo self.sysparam = params() self.dataType = self.algoInfo['dataType'] # 'num' / 'cv' / 'nlp' self.projectType = self.algoInfo[ 'projectType'] # 'regression' / 'classification' ..... self.algoName = self.algoInfo['algoName'] self.fid = fid _, dataType, self.path, self.numFile, _, self.preprocessActionFile = getFileInfo( self.fid)[0] if dataType != self.dataType: raise Exception( f'{self.fid} has dataType {dataType} but a {self.dataType} file is required' ) self.thread = None if not mid: self.mid = modelUidGenerator().uid else: self.mid = mid self.paramDef = json.load( open( self.sysparam.analyticServiceRoot + f'core/analyticCore/{self.dataType}/{self.projectType}/{self.algoName}.json' )) self.lib = self.paramDef["lib"] self.param = None # the input parameter self.inputDict = json.loads( algoInfo['input']) # input columns mapping self.outputDict = json.loads( algoInfo['output']) # output columns mapping self.dataDf = None # raw dataframe self.inputData = {} self.outputData = {} self.d2c = {"label": {"-1": 1, "1": 0}} # data to category mapping self.c2d = {"label": {"0": 1, "1": -1}} # category to data mapping self.model = None #model self.result = {} # A outputData liked structure self.vizRes = { } # {"figname":{"div":"bokehDiv","script":"scriptDiv"}} self.txtRes = "" # "string" self.formRes = {} self.customObj = { } #other to-saved variable should place here e.g. text tokenization {"objName":obj} if action == 'test': if not testLabel: raise Exception( "test label must be given under abnormal testing mode") self.outputDict = {"label": testLabel} self.paramDef["output"] = [{ "name": "label", "type": "classifiable" }] if action == 'predict': self.outputDict = {"label": "label"} self.paramDef["output"] = [{ "name": "label", "type": "classifiable" }] self.getParams() if action == 'test' or action == 'predict': self.loadModel() self.colType = { c["name"]: { "type": c["type"], "classifiable": c["classifiable"] } for c in getColType(self.numFile, self.dataType).get() } self.getData() except Exception as e: raise Exception(f'[{self.algoName}][init]{traceback.format_exc()}')
def getData(self): fileInfo=getFileInfo(self.fid) try: fileInfo=getFileInfo(self.fid) if len(fileInfo)==0: raise Exception(f'fileUid not found') fileInfo=fileInfo[0] data={} colType=getColType(fileInfo[3],fileInfo[1]).get() colTypes={c["name"]:c['type'] for c in colType} self.colTypes=colTypes classifiables={c["name"]:c['classifiable'] for c in colType} rawdata=getDf(fileInfo[3],fileInfo[1]).get() data['all']=rawdata if 'x' in self.dataCol: data['x']=np.asarray(rawdata[self.dataCol['x']]) if self.dataCol['x']!="none": if self.algoInfo['data']['x']=="float": if colTypes[self.dataCol['x']]!="float" and colTypes[self.dataCol['x']]!="int": raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}") if self.algoInfo['data']['x']=="int": if colTypes[self.dataCol['x']]!="int": raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}") if self.algoInfo['data']['x']=="path": if colTypes[self.dataCol['x']]!="path": raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}") if self.algoInfo['data']['x']=="string": if colTypes[self.dataCol['x']]!="string": raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}") if self.algoInfo['data']['x']=='classifiable': if classifiables[self.dataCol['x']]==0: raise Exception(f"col type of x error: {self.dataCol['x']} is not classifiable") if 'y' in self.dataCol: data['y']=np.asarray(rawdata[self.dataCol['y']]) if self.dataCol['y']!="none": if self.algoInfo['data']['y']=="float": if colTypes[self.dataCol['y']]!="float" and colTypes[self.dataCol['y']]!="int": raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}") if self.algoInfo['data']['y']=="int": if colTypes[self.dataCol['y']]!="int": raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}") if self.algoInfo['data']['y']=="path": if colTypes[self.dataCol['y']]!="path": raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}") if self.algoInfo['data']['y']=="string": if colTypes[self.dataCol['y']]!="string": raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}") if self.algoInfo['data']['y']=='classifiable': if classifiables[self.dataCol['y']]==0: raise Exception(f"col type of y error: {self.dataCol['y']} is not classifiable") if 'value' in self.dataCol: data['value']=np.asarray(rawdata[self.dataCol['value']]) if self.dataCol['value']!="none": if self.algoInfo['data']['value']=="float": if colTypes[self.dataCol['value']]!="float" and colTypes[self.dataCol['value']]!="int": raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}") if self.algoInfo['data']['value']=="int": if colTypes[self.dataCol['value']]!="int": raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}") if self.algoInfo['data']['value']=="path": if colTypes[self.dataCol['value']]!="path": raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}") if self.algoInfo['data']['value']=="string": if colTypes[self.dataCol['value']]!="string": raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}") if self.algoInfo['data']['value']=='classifiable': if classifiables[self.dataCol['value']]==0: raise Exception(f"col type of x error: {self.dataCol['value']} is not classifiable") except Exception as e: raise Exception(f'[getData] {e}') return data