Пример #1
0
 def get(self):
     try:
         parser = reqparse.RequestParser()
         parser.add_argument('token', type=str, required=True)
         parser.add_argument('fileUid', type=str, required=True)
         args = parser.parse_args()
         if not tokenValidator(args['token']):
             return {
                 "status": "error",
                 "msg": "token error",
                 "data": {}
             }, 401
         args.pop('token')
         logging.info(f'[API_getFileParameter] args:{args}')
         fid = args['fileUid']
         _, _, _, _, _, preprocessActionFile = getFileInfo(fid)[0]
         if preprocessActionFile:
             with open(preprocessActionFile) as file:
                 action = json.load(file)
         else:
             return {
                 "status": "error",
                 "msg":
                 f"file {fid} doesnt have related preprocessing records",
                 "data": {}
             }, 400
         return {"status": "success", "msg": "", "data": action}, 200
     except Exception as e:
         logging.error(f"[API_getModelParameter]{traceback.format_exc()}")
         return {
             "status": "error",
             "msg": str(traceback.format_exc()),
             "data": {}
         }, 400
Пример #2
0
    def post(self):
        '''
        @ fileUid: file id
        @ tokenstr: keypair1
        @ tokenint: keypair2
        '''
        fName = '[API_getCol]'
        parser = reqparse.RequestParser()
        parser.add_argument('fileUid', type=str, required=True)
        parser.add_argument('token', type=str, required=True)
        args = parser.parse_args()

        fid = args['fileUid']
        token = args['token']

        #check token
        if not tokenValidator(token):
            return {"status": "error", "msg": "token error", "data": {}}, 401
        args.pop('token')
        logging.debug(f"[API_getCol] args: {args}")
        try:
            fileInfo = getFileInfo(fid)
        except Exception as e:
            logging.error(f'{fName}{e}')
            return {'status': 'error', 'msg': str(e), 'data': {}}, 400
        fileInfo = fileInfo[0]

        filePath = fileInfo[3]
        dataType = fileInfo[1]
        try:
            gct = getColType(filePath, dataType).get()
        except Exception as e:
            logging.error(f'{fName}{e}')
            return {'status': 'error', 'msg': str(e), 'data': {}}, 400
        return {'status': 'success', 'msg': '', 'data': {"cols": gct}}, 200
Пример #3
0
    def post(self):
        '''
        @ type: num/cv/nlp
        @ file: a file
        @ tokenstr: keypair1
        @ tokenint: keypair2
        '''
        parser = reqparse.RequestParser()
        parser.add_argument('fileUid', type=str, required=True)
        parser.add_argument('token', type=str, required=True)
        args = parser.parse_args()
        fid = args['fileUid']
        token = args['token']

        #check token
        if not tokenValidator(token):
            return {"status": "error", "msg": "token error", "data": {}}, 401
        try:
            fileInfo = getFileInfo(fid)
        except Exception as e:
            logging.error(f'[Delfile]{e}')
            return {'status': 'error', 'msg': str(e), 'data': {}}, 400

        fileInfo = fileInfo[0]

        if fileInfo[3] == 1:
            logging.warning(f'[API_DelFile] file {fid} in use')
            return {
                "status": "error",
                "msg": "The file is in-used",
                "data": {}
            }, 400

        filePath = fileInfo[2]
        dataType = fileInfo[1]
        actionFile = fileInfo[5]
        if dataType == 'cv':
            shutil.rmtree(filePath)
        else:
            os.remove(filePath)
        if actionFile:
            os.remove(actionFile)

        try:
            db = sql()
            db.cursor.execute(f"delete from files where fid='{fid}'")
            db.conn.commit()
        except Exception as e:
            logging.error(f"[API_DelFile] {e}")
        finally:
            db.conn.close()

        logging.info(f"[API_DelFile] OK with file uid {fid}")
        return {"status": "success", "msg": "", "data": {}}, 201
Пример #4
0
    def get(self):
        try:
            parser = reqparse.RequestParser()
            parser.add_argument('fileUid', type=str, required=True)
            # parser.add_argument('fileName',type=str)
            # parser.add_argument('tokenstr',type=str,required=True)
            # parser.add_argument('tokenint',type=int,required=True)
            args = parser.parse_args()
            logging.debug(f"[API_Download] args: {args}")
            fileUid = args['fileUid']
            # fileName=args['fileName']
            # tokenstr=args['tokenstr']
            # tokenint=args['tokenint']

            # #check token
            # if not tokenValidator(tokenstr,tokenint):
            #     return {"status":"error","msg":"token error","data":{}},201

            try:
                fileInfo = getFileInfo(fileUid)
            except Exception as e:
                logging.error(f'[API_Download]{e}')
                return {'status': 'error', 'msg': str(e), 'data': {}}, 400
            #fileInfo=fileInfo[0]
            logging.debug(f'[API_Download] FileInfo: {fileInfo}')
            if len(fileInfo) == 0:
                logging.debug("[API_Download] file not found")
                abort(404)

            table = fileInfo[0]
            if table[1] == 'cv':
                filepath = table[2] + '.zip'
                shutil.make_archive(table[2], 'zip', table[2])
                filetype = '.zip'
            else:
                filepath = table[2]
                filetype = filepath[filepath.rfind('.'):]

            with open(filepath, 'rb') as file:
                data = file.read()

            if filetype == '.zip':
                os.remove(filepath)
            headers = {}
            # if fileName==None:
            #     fileName=fileUid
            headers['Content-Type'] = 'application/octet-stream'
            # headers['Content-Disposition'] = 'attachment; filename='+fileName+filetype
            # return {"status":"success","msg":"","data":data},200
            return make_response(data, 200, headers)
        except Exception as e:
            logging.error(f'[API_Download]{e}')
            return {"status": "error", "msg": str(e), "data": {}}, 400
Пример #5
0
 def __init__(self, fid, action):
     try:
         self.params = params()
         self.fid = fid
         self.action = action
         fid, self.dataType, self.path, self.numFile, status, actionFile = getFileInfo(
             self.fid)[0]
         self.colType = getColType(self.numFile, self.dataType).get()
         self.df = getDf(self.numFile, self.dataType).get()
         self.data = {}
         for c in self.colType:
             self.data[c['name']] = {
                 'colType': c['type'],
                 'classifiable': c['classifiable'],
                 'do': False
             }
             self.data[c['name']]['data'] = np.asarray(self.df[c['name']])
             #self.data[c['name']]['missingFiltering']=None
             #self.data[c['name']]['outlierFiltering']=None
             #self.data[c['name']]['normalize']=None
             #self.data[c['name']]['stringCleaning']=None
             self.data[c['name']]['do'] = False
             # self.data={"col1":{"type":"int","action":action,"data":data}}
         for c in self.action:
             if c['col'] in self.data:
                 self.data[
                     c['col']]['missingFiltering'] = c['missingFiltering']
                 self.data[
                     c['col']]['outlierFiltering'] = c['outlierFiltering']
                 self.data[c['col']]['normalize'] = c['normalize']
                 self.data[c['col']]['stringCleaning'] = c['stringCleaning']
                 #self.data[c['col']]['data']=np.asarray(self.df[c['col']])
                 self.data[c['col']]['do'] = True
         for col in self.data:
             if 'data' not in self.data[col]:
                 pass
         okCount = 0
         for k, v in self.data.items():
             if 'data' in v:
                 okCount += 1
                 break
         if okCount == 0:
             raise Exception(
                 f"[Preprocess Init] actionCol and fileCol not corresponed at all"
             )
     except Exception as e:
         raise Exception(f"[Preprocess Init]{traceback.format_exc()}")
Пример #6
0
 def __init__(self, fid, algoName, friendlyName):
     self.fid = fid
     self.algoName = algoName
     self.friendlyName = friendlyName
     _, self.dataType, self.path, self.numFile, _, _ = getFileInfo(
         self.fid)[0]
     colType = getColType(self.numFile, self.dataType).get()
     self.colType = {}
     for d in colType:
         self.colType[d['name']] = d['type']
     self.df = getDf(self.numFile, self.dataType).get()
     toDrop = []
     for k, v in self.colType.items():
         if v != 'int' and v != 'float':
             toDrop.append(k)
     self.df = self.df.drop(columns=toDrop)
     if len(self.df.columns.tolist()) == 0:
         raise NoDataException(
             "[Correlation] No numerical columns in this file")
     self.corr = None
     self.component = None
Пример #7
0
 def post(self):
     try:
         parser=reqparse.RequestParser()
         parser.add_argument('token',type=str,required=True)
         parser.add_argument('modelUid',type=str,required=True)
         parser.add_argument('fileUid',type=str,required=True)
         parser.add_argument('preprocess',type=int,required=True)
         args=parser.parse_args()
         if not tokenValidator(args['token']):
             return {"status":"error","msg":"token error","data":{}},401
         args.pop('token')
         logging.info(f'[API_doModelPredict] args:{args}')
         mid=args['modelUid']
         fid=args['fileUid']
         _,modelFid,_,_,_,status,_,_=getModelInfo(mid)[0]
         if status!='success':
             return {"status":"error","msg":f"model {mid} is still training or failed. Can't predict","data":{}},400
         preprocessedFid="None"
         if args['preprocess']==1:
             _,_,_,_,_,preprocessActionFile=getFileInfo(modelFid)[0]
             if preprocessActionFile:
                 with open(preprocessActionFile) as file:
                     action=json.load(file)
                 preprocessedFid=preprocessCore(fid,action).do()
                 fid=preprocessedFid
         with open(os.path.join(param.modelpath,mid,'algoInfo.pkl'),'rb') as file:
             algoInfo=pickle.load(file)
         module=importlib.import_module(f"service.analyticService.core.analyticCore.{algoInfo['dataType']}.{algoInfo['projectType']}.{algoInfo['algoName']}")
         importlib.reload(module)
         attr=getattr(module,algoInfo['algoName'])
         algo=attr(algoInfo,fid,'predict',mid=mid)
         algo.predictWrapper()
         predictedFid=algo.predict()
         return {"status":"success","msg":"","data":{"preprocessedFileUid":preprocessedFid,"predictedFileUid":predictedFid}},200
     except Exception as e:
         logging.error(f"[API_doModelPredict]{traceback.format_exc()}")
         return {"status":"error","msg":str(traceback.format_exc()),"data":{}},400
Пример #8
0
    def post(self):
        '''
        @ fileUids: file id
        @ tokenstr: keypair1
        @ tokenint: keypair2
        '''
        fName = 'getFileStatus'
        parser = reqparse.RequestParser()
        parser.add_argument('fileUids', type=str, required=True)
        parser.add_argument('token', type=str, required=True)
        args = parser.parse_args()
        #logging.info(f"[API_getFileStatus] args: {args}")
        fids = args['fileUids']
        token = args['token']

        fids = json.loads(fids)
        #check token
        if not tokenValidator(token):
            return {"status": "error", "msg": "token error", "data": {}}, 401
        args.pop('token')
        #logging.info(f"[API_getFileStatus] args: {args}")
        try:
            fileInfo = [getFileInfo(fid)[0] for fid in fids]
        except Exception as e:
            logging.error(f'[API_{fName}]{e}')
            return {'status': 'error', 'msg': str(e), 'data': {}}, 400
        fileInfo = [f[4] for f in fileInfo]
        #logging.debug(f"{fileInfo}")
        #logging.debug(f'[API_{fName}]{json.dumps(fileInfo)}')
        return {
            "status": "success",
            "msg": "",
            "data": {
                "status": fileInfo
            }
        }, 200
Пример #9
0
 def __init__(self,
              algoInfo,
              fid,
              action='train',
              mid=None,
              testLabel=None):
     try:
         self.action = action  # 'train' / 'preview' / 'test' / 'predict'
         self.algoInfo = algoInfo
         self.sysparam = params()
         self.dataType = self.algoInfo['dataType']  # 'num' / 'cv' / 'nlp'
         self.projectType = self.algoInfo[
             'projectType']  # 'regression' / 'classification' .....
         self.algoName = self.algoInfo['algoName']
         self.fid = fid
         _, dataType, self.path, self.numFile, _, self.preprocessActionFile = getFileInfo(
             self.fid)[0]
         if dataType != self.dataType:
             raise Exception(
                 f'{self.fid} has dataType {dataType} but a {self.dataType} file is required'
             )
         self.thread = None
         if not mid:
             self.mid = modelUidGenerator().uid
         else:
             self.mid = mid
         self.paramDef = json.load(
             open(
                 self.sysparam.analyticServiceRoot +
                 f'core/analyticCore/{self.dataType}/{self.projectType}/{self.algoName}.json'
             ))
         self.lib = self.paramDef["lib"]
         self.param = None  # the input parameter
         self.inputDict = json.loads(
             algoInfo['input'])  # input columns mapping
         self.outputDict = json.loads(
             algoInfo['output'])  # output columns mapping
         self.dataDf = None  # raw dataframe
         self.inputData = {}
         self.outputData = {}
         self.d2c = {"label": {"-1": 1, "1": 0}}  # data to category mapping
         self.c2d = {"label": {"0": 1, "1": -1}}  # category to data mapping
         self.model = None  #model
         self.result = {}  # A outputData liked structure
         self.vizRes = {
         }  # {"figname":{"div":"bokehDiv","script":"scriptDiv"}}
         self.txtRes = ""  # "string"
         self.formRes = {}
         self.customObj = {
         }  #other to-saved variable should place here e.g. text tokenization {"objName":obj}
         if action == 'test':
             if not testLabel:
                 raise Exception(
                     "test label must be given under abnormal testing mode")
             self.outputDict = {"label": testLabel}
             self.paramDef["output"] = [{
                 "name": "label",
                 "type": "classifiable"
             }]
         if action == 'predict':
             self.outputDict = {"label": "label"}
             self.paramDef["output"] = [{
                 "name": "label",
                 "type": "classifiable"
             }]
         self.getParams()
         if action == 'test' or action == 'predict':
             self.loadModel()
         self.colType = {
             c["name"]: {
                 "type": c["type"],
                 "classifiable": c["classifiable"]
             }
             for c in getColType(self.numFile, self.dataType).get()
         }
         self.getData()
     except Exception as e:
         raise Exception(f'[{self.algoName}][init]{traceback.format_exc()}')
Пример #10
0
    def getData(self):
        fileInfo=getFileInfo(self.fid)
        try:
            fileInfo=getFileInfo(self.fid)
            if len(fileInfo)==0:
                raise Exception(f'fileUid not found')
            fileInfo=fileInfo[0]
            data={}
            colType=getColType(fileInfo[3],fileInfo[1]).get()
            colTypes={c["name"]:c['type'] for c in colType}
            self.colTypes=colTypes
            classifiables={c["name"]:c['classifiable'] for c in colType}
            rawdata=getDf(fileInfo[3],fileInfo[1]).get()

            data['all']=rawdata
            if 'x' in self.dataCol:
                data['x']=np.asarray(rawdata[self.dataCol['x']])
                if self.dataCol['x']!="none":
                    if self.algoInfo['data']['x']=="float":
                        if colTypes[self.dataCol['x']]!="float" and colTypes[self.dataCol['x']]!="int":
                            raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}")
                    if self.algoInfo['data']['x']=="int":
                        if colTypes[self.dataCol['x']]!="int":
                            raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}")
                    if self.algoInfo['data']['x']=="path":
                        if colTypes[self.dataCol['x']]!="path":
                            raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}")
                    if self.algoInfo['data']['x']=="string":
                        if colTypes[self.dataCol['x']]!="string":
                            raise Exception(f"col type of x error: can't convert {colTypes[self.dataCol['x']]} to {self.algoInfo['data']['x']}")
                    if self.algoInfo['data']['x']=='classifiable':
                        if classifiables[self.dataCol['x']]==0:
                            raise Exception(f"col type of x error: {self.dataCol['x']} is not classifiable")
            if 'y' in self.dataCol:
                data['y']=np.asarray(rawdata[self.dataCol['y']])
                if self.dataCol['y']!="none":
                    if self.algoInfo['data']['y']=="float":
                        if colTypes[self.dataCol['y']]!="float" and colTypes[self.dataCol['y']]!="int":
                            raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}")
                    if self.algoInfo['data']['y']=="int":
                        if colTypes[self.dataCol['y']]!="int":
                            raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}")
                    if self.algoInfo['data']['y']=="path":
                        if colTypes[self.dataCol['y']]!="path":
                            raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}")
                    if self.algoInfo['data']['y']=="string":
                        if colTypes[self.dataCol['y']]!="string":
                            raise Exception(f"col type of y error: can't convert {colTypes[self.dataCol['y']]} to {self.algoInfo['data']['y']}")
                    if self.algoInfo['data']['y']=='classifiable':
                        if classifiables[self.dataCol['y']]==0:
                            raise Exception(f"col type of y error: {self.dataCol['y']} is not classifiable")
            if 'value' in self.dataCol:
                data['value']=np.asarray(rawdata[self.dataCol['value']])
                if self.dataCol['value']!="none":
                    if self.algoInfo['data']['value']=="float":
                        if colTypes[self.dataCol['value']]!="float" and colTypes[self.dataCol['value']]!="int":
                            raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}")
                    if self.algoInfo['data']['value']=="int":
                        if colTypes[self.dataCol['value']]!="int":
                            raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}")
                    if self.algoInfo['data']['value']=="path":
                        if colTypes[self.dataCol['value']]!="path":
                            raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}")
                    if self.algoInfo['data']['value']=="string":
                        if colTypes[self.dataCol['value']]!="string":
                            raise Exception(f"col type of value error: can't convert {colTypes[self.dataCol['value']]} to {self.algoInfo['data']['value']}")
                    if self.algoInfo['data']['value']=='classifiable':
                        if classifiables[self.dataCol['value']]==0:
                            raise Exception(f"col type of x error: {self.dataCol['value']} is not classifiable")
        except Exception as e:
            raise Exception(f'[getData] {e}')
            
        return data