def set_pend(self, nn_id): """ set stop flag :param nnid: :return: """ try: current_time = datetime.now() obj = models.JobManagement.objects.get(nn_id=str(nn_id), status__in=['1']) obj.end = current_time obj.status = '7' obj.progress = '0' obj.save() except Exception as e: tfmsa_logger(e) raise Exception(e)
def get_next(self): """ get first priority job info :param net_id: :return: """ try: tfmsa_logger("get_next Task Job!") data_set = models.JobManagement.objects.filter( status__contains="1").order_by('request') if (len(data_set) > 0): return data_set[0] else: return None except Exception as e: tfmsa_logger(e) raise Exception(e)
def delete_table(self, db_name, table_name): """ delete table :param db_name:target database name :param table_name:target table name :return: """ try: directory = "{0}{1}/{2}".format(self.root, db_name, table_name) if not os.path.exists(directory): raise Exception( "request table : {0} not exist".format(table_name)) shutil.rmtree("{0}{1}/{2}".format(self.root, db_name, table_name)) return table_name except Exception as e: tfmsa_logger("Error : {0}".format(e)) raise Exception(e)
def init_job_info(self, nn_id): """ get table info :param nn_id: :return: """ try: net_info = netconf.get_network_config(nn_id) table_info = self.get_table_info(net_info['dir'], net_info['table']) data_set = models.JobManagement.objects.get(nn_id=str(nn_id)) data_set.endpointer = str(table_info.row_len) data_set.datapointer = '0' data_set.save() return data_set except Exception as e: tfmsa_logger(e) return False
def remove_trained_data(nn_id): """ remove Net Trained Weights and Bias from data base :param nn_id: neural network id :param mdoe : tflearn model :return:tflearn model """ try: directory = settings.HDFS_MODEL_ROOT + "/" + nn_id + "/" if not os.path.exists(directory): os.makedirs(directory) if os.path.isfile(directory + "/" + nn_id + "_model"): os.remove(directory + "/" + nn_id + "_model") tfmsa_logger("remove model completed for [ " + nn_id + "]") except Exception as e: tfmsa_logger("remove trained error : {0}".format(e))
def remove_conf(net_id): """ remove json from path and return it as python object form :param net_id: neural network id :return: -------------------------------------------------------------- 16.10.22 jh100 bugfix add "/" make folder od nnid """ directory = settings.HDFS_CONF_ROOT + "/" + net_id + "/" #Bug fix by jh100 16.10.22 net_id = net_id + "_conf.json" if not os.path.exists(directory): os.makedirs(directory) try: if os.path.isfile(directory + net_id): os.remove(directory + net_id) except Exception as e: tfmsa_logger("removing conf fail : {0}".format(e)) raise Exception(e)
def test_gete(self): host_name = "{0}:{1}".format(os.environ['HOSTNAME'], "8989") resp = requests.get( 'http://' + host_name + '/api/v1/type/common/item/category//', ) data = json.loads(resp.json()) if (data['status'] == "200"): tfmsa_logger("==========PASS==========") else: raise Exception(resp.json()) resp = requests.get( 'http://' + host_name + '/api/v1/type/common/item/subactegory/mes/', ) data = json.loads(resp.json()) if (data['status'] == "200"): tfmsa_logger("==========PASS==========") else: raise Exception(resp.json())
def get_selected_job_info(self, nn_id): """ get selected netowrks job parms :param nn_id: :return: """ try: data_set = models.JobManagement.objects.get(nn_id=str(nn_id)) if (data_set.batchsize == ''): data_set.batchsize = '1000' if (data_set.epoch == ''): data_set.epoch = '10' if (data_set.datapointer == ''): data_set.datapointer = '0' data_set.save() return data_set except Exception as e: tfmsa_logger(e) return False
def set_request_time(self, nn_id, time): """ update request time for job execute priority :param net_id: :return: """ try: # example :.datetime(2013, 6, 5, 23, 59, 59, 999999) set_time = datetime(int(time['year']), int(time['month']), int(time['day']), int(time['hour']), int(time['min']), int(time['sec']), 999999) obj = models.JobManagement.objects.get(nn_id=str(nn_id), status__in=['1']) obj.request = set_time obj.save() return set_time except Exception as e: tfmsa_logger(e) raise Exception(e)
def post(self, request, nnid): """ - desc : predict result with given data - Request json data example \n <textfield> <font size = 1> [{"pclass": "1st","survived": "tag","sex": "female","age": "30","embarked": "Southampton","boat": "2"}, {"pclass": "1st","survived": "tag","sex": "female","age": "30","embarked": "Southampton","boat": "2"}, {"pclass": "1st","survived": "tag","sex": "female","age": "30","embarked": "Southampton","boat": "2"}] </textfield> --- parameters: - name: body paramType: body pytype: json """ try: logger.tfmsa_logger("[Predict] start uploading csv on file system") if len(request.FILES.keys()) > 0: # loop files for key, requestSingileFile in request.FILES.items(): file = requestSingileFile filename = file._name directory = "{0}/{1}/{2}".format(settings.FILE_ROOT, "predict", nnid) if not os.path.exists(directory): os.makedirs(directory) fp = open( "{0}/{1}/{2}/{3}".format(settings.FILE_ROOT, "predict", nnid, filename), 'wb') for chunk in file.chunks(): fp.write(chunk) fp.close() result = predict.wdnn_predict().wdd_predict(nnid, filename) return_data = json.dumps(result) return Response(json.dumps(return_data)) except Exception as e: return_data = {"status": "404", "result": str(e)} return Response(json.dumps(return_data))
def inc_job_data_pointer(self, nn_id): """ :param nn_id: :return: """ try: data_set = models.JobManagement.objects.get(nn_id=str(nn_id)) netx_size = str( int(data_set.datapointer) + int(data_set.batchsize)) if (int(data_set.endpointer) >= int(netx_size)): data_set.datapointer = str( int(data_set.datapointer) + int(data_set.batchsize)) else: data_set.datapointer = str(data_set.endpointer) data_set.save() return data_set except Exception as e: tfmsa_logger(e) return False
def check_exist(self, nn_id, job_type): """ :param nn_id: :param job_type: :return: """ try: current_time = datetime.now() obj, created = models.JobManagement.objects.get_or_create( nn_id=nn_id) if created: obj.type = job_type obj.status = "1" obj.request = current_time obj.start = None obj.end = None obj.progress = "0" obj.save() except Exception as e: tfmsa_logger(e)
def test_create_database(self): """ test create_database works correctly :return: """ base_list = ImageManager().search_all_database() if "test" in base_list: self.assertEqual(ImageManager().delete_database("test"), "test") self.assertEqual(ImageManager().create_database("test"), "test") load_base = ImageManager().search_all_database() if "test" not in load_base: raise Exception("creation fail ") self.assertEqual(ImageManager().delete_database("test"), "test") load_base = ImageManager().search_all_database() if "test" in load_base: raise Exception("deletion fail ") tfmsa_logger("==========PASS==========")
def query_random_sample(self, data_frame, table_name, query_str, sample_per=0.1): """ get query data from spark :param table_name: name of table you want to get data :param query_str: sql strings :return: query result as json Object """ try: sc = self.spark_session_create("query_radom_sample") tfmsa_logger("start query data !") hdfs_path = settings.HDFS_DF_ROOT + "/" + data_frame + "/" + table_name sqlContext = SQLContext(sc) df = sqlContext.read.load(hdfs_path, "parquet") df.registerTempTable(table_name) result = sqlContext.sql(str(query_str)).sample(False, float(sample_per), seed=0).collect() return result except Exception as e: tfmsa_logger(e) raise Exception(e) finally: df.unpersist() sqlContext.clearCache() sqlContext.dropTempTable(table_name) sc.clearFiles() sc.stop() tfmsa_logger("stop context")
def set_table_info(self, base, table, col_len, row_len): """ set table info :param nn_id: :return: """ try: obj, created = models.DataTableInfo.objects.get_or_create( table_name=base + ":" + table) if created: tfmsa_logger("create new with state ready") obj.col_len = col_len obj.row_len = row_len obj.save() else: tfmsa_logger("update finished state to ready") obj = models.DataTableInfo.objects.get(table_name=base + ":" + table) obj.col_len = col_len obj.row_len = row_len obj.save() return True except Exception as e: tfmsa_logger(e) return False
def load_data(self, data_frame, table_name, st_pnt="0", end_pnt="10"): """ delete label folder under table :param db_name: :param table_name: :return: """ return_list = [] try: # get hbase trasaction table tfmsa_logger("[1]start - load image data") conn, table = self.get_target_table(data_frame, table_name) #rows = table.scan(row_start=st_pnt, row_stop=end_pnt) rows = table.scan() for row in rows: return_list.append({ 'bt': row[1][b'data:filebyte'], 'label': row[1][b'data:label'], 'decoder': row[1][b'data:decoder'], 'width': row[1][b'data:width'], 'height': row[1][b'data:height'] }) tfmsa_logger("[2]Finish - load image data ") return return_list except Exception as e: tfmsa_logger("Error : {0}".format(e)) raise Exception(e)
def post(self, req): """ set new server configuration :param net_id: :return: """ try: # set state Dead to current Alive conf quey_set = models.ServerConf.objects.filter(state__contains="A") for obj in quey_set: obj.state = "D" obj.save() # set new conf with request data serializer = serializers.ServerConfSerializer(data=req) if serializer.is_valid(): serializer.save() return "success" except Exception as e: tfmsa_logger(e) raise Exception(e)
def test_create_format(self): host_name = "{0}:{1}".format(os.environ['HOSTNAME'], "8989") resp = requests.post('http://' + host_name + '/api/v1/type/common/nninfo/', json={ "nn_id": self.__class__.rand_name, "category": "img", "subcate": "img_test", "name": "img_cnn", "desc": "img_cnn" }) resp = requests.post( 'http://' + host_name + '/api/v1/type/imagefile/base/mes/table/' \ + self.__class__.rand_name + '/format/' + self.__class__.rand_name + '/', json={"x_size": 100, "y_size": 100 }) data = json.loads(resp.json()) if (data['status'] == "200"): tfmsa_logger("==========PASS==========") else: raise Exception(data['result'])
def test_get_train_data(self): """ :return: """ base_list = ImageManager().search_all_database() if "test" not in base_list: self.assertEqual(ImageManager().create_database("test"), "test") load_table = ImageManager().search_database("test") if "test_table" not in load_table: ImageManager().create_table("test", "test_table") label_list = ImageManager().search_table("test", "test_table") if "1" not in label_list: ImageManager().create_label("test", "test_table", "1") temp_file = TemporaryUploadedFile("img_test_data", "byte", 66666, "xxxxxxxxxxxxxxxxxx") self.assertEqual(ImageManager().put_data("test", "1", [temp_file]), 1) #ImageManager().load_data("test", "test_table", "1") tfmsa_logger("==========PASS==========")
def get_distinct_dataframe(self, data_frame, table_name, columns): """ get distinct table columns :param table_name: name of table you want to get data :param query_str: sql strings :return: query result as json Object """ try: sc = self.spark_session_create("get_distinct_dataframe") tfmsa_logger("start find distinct column !") hdfs_path = settings.HDFS_DF_ROOT + "/" + data_frame + "/" + table_name query_str = "select * from " + table_name sqlContext = SQLContext(sc) df = sqlContext.read.load(hdfs_path, "parquet") df.registerTempTable(table_name) result = sqlContext.sql(str(query_str)) return_data = {} for column in columns: return_data[column.encode("UTF8")] = result.select(column).map( lambda x: str(x[0]).encode("UTF8")).distinct().collect() tfmsa_logger("End find distinct column !") return return_data except Exception as e: tfmsa_logger(e) raise Exception(e) finally: df.unpersist() sqlContext.clearCache() sqlContext.dropTempTable(table_name) sc.clearFiles() sc.stop() tfmsa_logger("stop context")
def test_image_predict(self): host_name = "{0}:{1}".format(os.environ['HOSTNAME'], "8989") tfmsa_logger("[1] Image file format update") resp = requests.post( 'http://' + host_name + '/api/v1/type/imagefile/base/mes/table/testtable2/format/nn0000090/', json={ "x_size": 32, "y_size": 32 }) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[2] Network info update") resp = requests.post('http://' + host_name + '/api/v1/type/common/nninfo/', json={ "nn_id": "nn0000090", "category": "SCM", "subcate": "csv", "name": "CENSUS_INCOME", "desc": "INCOME PREDICT" }) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[3] Predict Neural Network") img = self.simple_resize( "/home/dev/TensorMSA/tfmsacore/resources/test.png", 32, 32) resp = requests.put('http://' + host_name + '/api/v1/type/cnn/predict/nn0000090/', json=[img]) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[4] PASS TEST")
def create(self, nn_id, job_type): """ :param nn_id: :param job_type: :return: """ try: current_time = datetime.now() obj, created = models.JobManagement.objects.get_or_create( nn_id=nn_id) if created: tfmsa_logger("create new with state ready") #obj.nn_id = nn_id obj.type = job_type obj.status = "1" obj.request = current_time obj.start = None obj.end = None obj.progress = "0" obj.save() else: tfmsa_logger("update finished state to ready") obj = models.JobManagement.objects.get( nn_id__contains=nn_id, status__in=['5', '7', '9']) obj.type = job_type obj.status = "1" obj.request = current_time obj.start = None obj.end = None obj.progress = "0" obj.save() return len(models.JobManagement.objects.filter(status__in=['3'])) except Exception as e: tfmsa_logger(e) return len(models.JobManagement.objects.filter(status__in=['3']))
def setUp(self): tfmsa_logger('####### START ########')
def tearDown(self): tfmsa_logger('####### FINISH ########')
def put_data(self, data_frame, table_name, label, file_set, nnid): """ delete label folder under table :param db_name: :param table_name: :return: """ try: tfmsa_logger("[1]Start upload images...") self.make_inital_path(nnid) # get network base info tfmsa_logger("[2]get network base info") net_info = netconf.get_network_config(nnid) # get data format info tfmsa_logger("[3]get network format info") format_info = json.loads(netconf.load_ori_format(nnid)) # get hbase trasaction table tfmsa_logger("[4]get hbase trasaction table") conn, train_table, test_table = self.get_divided_target_table( data_frame, table_name) train_buffer = train_table.batch(transaction=True) test_buffer = test_table.batch(transaction=True) #get Label list tfmsa_logger("[5]Updata Label List ") self.label_info_update(net_info, label) # get Label list tfmsa_logger("[6]upload image on Hbase - start ") file_list = [] train_key_set, test_key_set = self.divide_train_sample( file_set.keys()) for key in file_set.keys(): file = file_set[key] row_value = dict() row_key = table_name + ":" + self.make_hbasekey() byte_buffer, width, height = self.image_preprocess( file, net_info, format_info, label) row_value[':'.join( ('data', 'filebyte'))] = str(list(byte_buffer)) row_value[':'.join(('data', 'label'))] = str(label) row_value[':'.join( ('data', 'decoder'))] = str(key).split(".")[1] row_value[':'.join(('data', 'width'))] = str(width) row_value[':'.join(('data', 'height'))] = str(height) file_list.append(file._name) if (key in train_key_set): train_buffer.put(row_key, row_value) if (key in test_key_set): test_buffer.put(row_key, row_value) train_buffer.send() test_buffer.send() tfmsa_logger("[7]upload image on Hbase - finish") return file_list except Exception as e: tfmsa_logger("Error : {0}".format(e)) raise Exception(e) finally: conn.close() tfmsa_logger("Finish upload image...")
def __init__(self): tfmsa_logger("[1] Create wdnn_train ") WdnnCommonManager.__init__(self)
def run_wdd_train(self, nnid, start_pnt=1, batch_size=1000): """ Wide & Deep Network Training :param nnid : network id in tfmsacore_nninfo :return: acturacy """ try: tfmsa_logger("[2] start run wdd_Train " + nnid) #make wide & deep model wdnn_model = WdnnCommonManager.wdnn_build(self, nnid=nnid) #get json from postgres by nnid json_string = WdnnCommonManager.get_all_info_json_by_nnid( self, nnid=nnid) database = json_string["dir"] table_name = json_string["table"] #Make NetworkConfiguration Json Objct json_string = netconf.load_ori_format(nnid) json_ob = json.loads(json_string) #get label column from hbase nn config json t_label = json_ob["label"] label_column = list(t_label.keys())[0] #get train hyper param job_parm = JobStateLoader().get_selected_job_info(nnid) batch_size = int(job_parm.batchsize) model_lint_cnt = int(job_parm.epoch) tfmsa_logger( "[3] Get Dataframe from Hbase ##Start## {0},{1},{2},{3} ". format(start_pnt, database, table_name, label_column)) df, pnt = data.DataMaster().query_data(database, table_name, "a", use_df=True, limit_cnt=batch_size, with_label=label_column, start_pnt=start_pnt) df_eval = df.copy() tfmsa_logger("[4] Get Dataframe from Hbase ##End## (" + str(batch_size) + ")") ##MAKE MONITOR tfmsa_logger("[5] Make Monitor Class") customsMonitor = Monitors.MonitorCommon(p_nn_id=nnid, p_max_steps=model_lint_cnt, p_every_n_steps=1000) tfmsa_logger("[6] start fitting") wdnn_model.fit( input_fn=lambda: WdnnCommonManager.input_fn(self, df, nnid), steps=model_lint_cnt, monitors=[customsMonitor]) if (len(df_eval) < 10): tfmsa_logger("[7] Train Result") results = wdnn_model.evaluate( input_fn=lambda: WdnnCommonManager.input_fn( self, df_eval, nnid), steps=1) for key in sorted(results): tfmsa_logger("%s: %s" % (key, results[key])) return nnid else: JobStateLoader().inc_job_data_pointer(nnid) self.run_wdd_train(nnid=nnid, start_pnt=pnt) return nnid except Exception as e: print("Error Message : {0}".format(e)) raise Exception(e)
def test_image_train(self): host_name = "{0}:{1}".format(os.environ['HOSTNAME'], "8989") tfmsa_logger("[1] Image file format update") resp = requests.post( 'http://' + host_name + '/api/v1/type/imagefile/base/mes/table/testtable2/format/nn0000090/', json={ "x_size": 32, "y_size": 32 }) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[2] Network info update") resp = requests.post('http://' + host_name + '/api/v1/type/common/nninfo/', json={ "nn_id": "nn0000090", "category": "SCM", "subcate": "csv", "name": "CENSUS_INCOME", "desc": "INCOME PREDICT" }) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[3] Network configuration update") resp = requests.post('http://' + host_name + '/api/v1/type/cnn/conf/nn0000090/', json={ "data": { "datalen": 1024, "taglen": 2, "matrix": [32, 32], "learnrate": 0.01, "epoch": 10 }, "layer": [{ "type": "input", "active": "relu", "cnnfilter": [2, 2], "cnnstride": [2, 2], "maxpoolmatrix": [2, 2], "maxpoolstride": [2, 2], "node_in_out": [1, 16], "regualizer": "", "padding": "SAME", "droprate": "" }, { "type": "cnn", "active": "relu", "cnnfilter": [2, 2], "cnnstride": [2, 2], "maxpoolmatrix": [2, 2], "maxpoolstride": [2, 2], "node_in_out": [16, 32], "regualizer": "", "padding": "SAME", "droprate": "" }, { "type": "reshape", }, { "type": "drop", "active": "relu", "regualizer": "", "droprate": "0.5" }, { "type": "out", "active": "softmax", "cnnfilter": "", "cnnstride": "", "maxpoolmatrix": "", "maxpoolstride": "", "node_in_out": [32, 2], "regualizer": "", "padding": "SAME", "droprate": "" }] }) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[4] Train Neural Network") resp = requests.post('http://' + host_name + '/api/v1/type/cnn/train/nn0000090/', json={ "epoch": "10", "testset": "10" }) if (json.loads(resp.json())['status'] != "200"): raise Exception("RESI Service Fail") tfmsa_logger("[5] PASS TEST")
def __init__(self): tfmsa_logger("initialize JobManager!!")
def __init__(self): tfmsa_logger("initialize serverStateChecker!!")