def __init__(self): ############################################################### # # Sets up all default requirements and placeholders # needed for the NLU engine to run. # # - Helpers: Useful global functions # - JumpWay/jumpWayClient: iotJumpWay class and connection # - Logging: Logging class # ############################################################### self.Helpers = Helpers() self._confs = self.Helpers.loadConfigs() self.JumpWay = JumpWay() self.MySql = MySql() self.MySql.setMysqlCursorRows() self.Logging = Logging() self.LogFile = self.Logging.setLogFile(self._confs["aiCore"]["Logs"] + "Client/")
def _scale(self, train_df): ''' centers the data around 0 and scales it :param: train_df: Dataframe that contains the training data :return: dataframe with additional column scaled_FEATURE_X containing scaled features :return: trained_scalers: dictionary - Per feature stores scaler object that is needed in the testing phase to perform identical scaling, with key as column name ''' Logging().log("Scaling Features...") trained_scalers = {} for col in train_df.columns: # 1. consider only relevant columns if not col.startswith("FEATURE"): continue # 2. standard scaler scaler = preprocessing.StandardScaler() scaler = scaler.fit(train_df[col]) train_df['scaled_' + col] = scaler.transform(train_df[col]) trained_scalers[col] = copy.deepcopy(scaler) return train_df, trained_scalers
class gHumans(): def __init__(self): ############################################################### # # Sets up all default requirements and placeholders # needed for the NLU engine to run. # # - Helpers: Useful global functions # - JumpWay/jumpWayClient: iotJumpWay class and connection # - Logging: Logging class # ############################################################### self.Helpers = Helpers() self._confs = self.Helpers.loadConfigs() self.JumpWay = JumpWay() self.MySql = MySql() self.MySql.setMysqlCursorRows() self.Logging = Logging() self.LogFile = self.Logging.setLogFile(self._confs["aiCore"]["Logs"] + "Client/") def getHumanByFace(self, response, entities=None): ############################################################### # # Checks to see who was seen in the system camera within the # last few seconds # ############################################################### results = None resultsLength = None try: self.MySql.mysqlDbCur.execute( "SELECT users.id, users.name, users.zone FROM a7fh46_users_logs logs INNER JOIN a7fh46_users users ON logs.uid = users.id WHERE logs.timeSeen > (NOW() - INTERVAL 10 SECOND) " ) results = self.MySql.mysqlDbCur.fetchall() resultsLength = len(results) if resultsLength > 0: if resultsLength == 1: message = "I detected " + str( responseLength) + " human, #" + str( results[0]["id"]) + " " + results[0]["name"] else: message = "I detected " + str(responseLength) + " humans" else: message = "I didn't detect any humans in the system camera feed, please stand in front of the camera" return message except Exception as errorz: print('FAILED1') print(errorz) return results def getCurrentHuman(self, responses, entities=None): ############################################################### # # Checks to see who was seen in the system camera within the # last few seconds # ############################################################### results = None resultsLength = None try: self.MySql.mysqlDbCur.execute( "SELECT users.id, users.name, users.zone FROM a7fh46_user_current currentH INNER JOIN a7fh46_users users ON currentH.uid = users.id WHERE currentH.timeSeen > (NOW() - INTERVAL 1 MINUTE) ORDER BY id DESC LIMIT 1" ) results = self.MySql.mysqlDbCur.fetchone() if results != None: return random.choice(responses).replace( "%%HUMAN%%", results["name"]) else: return "Sorry I could not identify you, this system will now self destruct! You have 5 seconds..." except Exception as errorz: print('FAILED1') print(errorz) return results def updateHuman(self, responses, entities): results = None try: self.MySql.mysqlDbCur.execute( "SELECT id, name FROM a7fh46_users users WHERE name = '%s'" % (entities[0])) results = self.MySql.mysqlDbCur.fetchone() if results != None: timeSeen = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') self.MySql.mysqlDbCur.execute( """ INSERT INTO a7fh46_user_current (uid, lid, fid, zid, did, timeSeen) VALUES (%s, %s, %s, %s, %s, %s); """, (results["id"], self._confs["iotJumpWay"]["Location"], 0, self._confs["iotJumpWay"]["Zone"], self._confs["iotJumpWay"]["Device"], timeSeen[:-3])) self.MySql.mysqlDbConn.commit() return random.choice(responses).replace( "%%HUMAN%%", results["name"]) else: return "Sorry I could not identify you, this system will now self destruct! You have 5 seconds..." except Exception as errorz: print('FAILED2') print(errorz) return results
class NLU(): def __init__(self): self.Helpers = Helpers() self.Logging = Logging() self._confs = self.Helpers.loadConfigs() self.LogFile = self.Logging.setLogFile(self._confs["AI"]["Logs"] + "NLU/") self.ChatLogFile = self.Logging.setLogFile(self._confs["AI"]["Logs"] + "Chat/") self.Logging.logMessage(self.LogFile, "NLU", "INFO", "NLU Classifier LogFile Set") self.startMQTT() def commandsCallback(self, topic, payload): self.Logging.logMessage( self.LogFile, "iotJumpWay", "INFO", "Recieved iotJumpWay Command Data : " + str(payload)) commandData = json.loads(payload.decode("utf-8")) def startMQTT(self): try: self.jumpwayClient = jumpWayDevice.DeviceConnection({ "locationID": self._confs["iotJumpWay"]["Location"], "zoneID": self._confs["iotJumpWay"]["Zone"], "deviceId": self._confs["iotJumpWay"]["Device"], "deviceName": self._confs["iotJumpWay"]["DeviceName"], "username": self._confs["iotJumpWay"]["MQTT"]["Username"], "password": self._confs["iotJumpWay"]["MQTT"]["Password"] }) self.jumpwayClient.connectToDevice() self.jumpwayClient.subscribeToDeviceChannel("Commands") self.jumpwayClient.deviceCommandsCallback = self.commandsCallback self.Logging.logMessage(self.LogFile, "iotJumpWay", "INFO", "iotJumpWay Client Ready") except Exception as e: self.Logging.logMessage(self.LogFile, "iotJumpWay", "INFO", "iotJumpWay Client Initiation Failed") print(str(e)) sys.exit() def setup(self): self.Logging.logMessage(self.LogFile, "NLU", "INFO", "NLU Classifier Initiating") self.Data = Data(self.Logging, self.LogFile) self.Model = Model() self.Context = Context() self.user = {} self.ner = None self.trainingData = self.Data.loadTrainingData() self.trainedData = self.Data.loadTrainedData() self.trainedWords = self.trainedData["words"] self.trainedClasses = self.trainedData["classes"] self.x = self.trainedData["x"] self.y = self.trainedData["y"] self.intentMap = self.trainedData["iMap"][0] self.restoreEntitiesModel() self.restoreModel() self.Logging.logMessage(self.LogFile, "NLU", "INFO", "NLU Ready") def restoreEntitiesModel(self): if os.path.exists(self._confs["ClassifierSettings"]["EntitiesDat"]): self.ner = named_entity_extractor( self._confs["ClassifierSettings"]["EntitiesDat"]) self.Logging.logMessage(self.LogFile, "NER", "OK", "Restored NLU NER Model") def restoreModel(self): self.tmodel = self.Model.buildDNN(self.x, self.y) self.Logging.logMessage(self.LogFile, "NLU", "INFO", "Restored NLU Model") def setupEntities(self): if self._confs["ClassifierSettings"]["Entities"] == "Mitie": self.entityExtractor = Entities() self.Logging.logMessage(self.LogFile, "NER", "INFO", "NLU Entity Extractor Initiated") def initiateSession(self, userID): self.userID = userID if not self.userID in self.user: self.user[self.userID] = {} self.user[self.userID]["history"] = {} self.Logging.logMessage(self.LogFile, "Session", "INFO", "NLU Session Ready For User #" + self.userID) def setThresholds(self, threshold): self.threshold = float(threshold) self.entityThrshld = self._confs["ClassifierSettings"]["Mitie"][ "Threshold"] def predict(self, parsedSentence): predictions = [[index, confidence] for index, confidence in enumerate( self.tmodel.predict([ self.Data.makeInferenceBag(parsedSentence, self.trainedWords) ])[0]) if confidence > self.threshold] predictions.sort(key=lambda x: x[1], reverse=True) classification = [] for prediction in predictions: classification.append( (self.trainedClasses[prediction[0]], prediction[1])) return classification def talk(self, sentence, debug=False): self.Logging.logMessage(self.LogFile, "GeniSys", "STATUS", "Processing") parsed, fallback, entityHolder, parsedSentence = self.entityExtractor.parseEntities( sentence, self.ner, self.trainingData) classification = self.predict(parsedSentence) if len(classification) > 0: clearEntities = False theIntent = self.trainingData["intents"][self.intentMap[ classification[0][0]]] if len(entityHolder) and not len(theIntent["entities"]): clearEntities = True if (self.Context.checkSessionContext(self.user[self.userID], theIntent)): if self.Context.checkClearContext(theIntent, 0): self.user[self.userID]["context"] = "" contextIn, contextOut, contextCurrent = self.Context.setContexts( theIntent, self.user[self.userID]) if fallback and "fallbacks" in theIntent and len( theIntent["fallbacks"]): response = self.entityExtractor.replaceResponseEntities( random.choice(theIntent["fallbacks"]), entityHolder) action, actionResponses = self.Helpers.setAction(theIntent) elif "entityType" in theIntent and theIntent[ "entityType"] == "Numbers": response = random.choice(theIntent["responses"]) action, actionResponses = self.Helpers.setAction(theIntent) elif not len(entityHolder) and len(theIntent["entities"]): response = self.entityExtractor.replaceResponseEntities( random.choice(theIntent["fallbacks"]), entityHolder) action, actionResponses = self.Helpers.setAction(theIntent) elif clearEntities: entityHolder = [] response = random.choice(theIntent["responses"]) action, actionResponses = self.Helpers.setAction(theIntent) else: response = self.entityExtractor.replaceResponseEntities( random.choice(theIntent["responses"]), entityHolder) action, actionResponses = self.Helpers.setAction(theIntent) if action != None: classParts = action.split(".") classFolder = classParts[0] className = classParts[1] module = __import__(classParts[0] + "." + classParts[1], globals(), locals(), [className]) actionClass = getattr(module, className)() response = getattr(actionClass, classParts[2])( random.choice(actionResponses)) return { "Response": "OK", "ResponseData": [{ "Received": sentence, "Intent": classification[0][0], "Confidence": str(classification[0][1]), "Response": response, "ContextIn": contextIn, "ContextOut": contextOut, "Context": contextCurrent, "Action": action, "Entities": entityHolder }] } else: self.user[self.userID]["context"] = "" contextIn, contextOut, contextCurrent = self.Context.setContexts( theIntent, self.user[self.userID]) if fallback and fallback in theIntent and len( theIntent["fallbacks"]): response = self.entityExtractor.replaceResponseEntities( random.choice(theIntent["fallbacks"]), entityHolder) action, actionResponses = None, [] else: response = self.entityExtractor.replaceResponseEntities( random.choice(theIntent["responses"]), entityHolder) action, actionResponses = self.Helpers.setAction(theIntent) if action != None: classParts = action.split(".") classFolder = classParts[0] className = classParts[1] module = __import__(classParts[0] + "." + classParts[1], globals(), locals(), [className]) actionClass = getattr(module, className)() response = getattr(actionClass, classParts[2])( random.choice(actionResponses)) else: response = self.entityExtractor.replaceResponseEntities( random.choice(theIntent["responses"]), entityHolder) return { "Response": "OK", "ResponseData": [{ "Received": sentence, "Intent": classification[0][0], "Confidence": str(classification[0][1]), "Response": response, "ContextIn": contextIn, "ContextOut": contextOut, "ContextCurrent": contextCurrent, "Action": action, "Entities": entityHolder }] } else: contextCurrent = self.Context.getCurrentContext( self.user[self.userID]) return { "Response": "FAILED", "ResponseData": [{ "Received": sentence, "Intent": "UNKNOWN", "Confidence": "NA", "Responses": [], "Response": random.choice( self._confs["ClassifierSettings"]["defaultResponses"]), "ContextIn": "NA", "ContextOut": "NA", "ContextCurrent": contextCurrent, "Action": "NA", "Entities": entityHolder }] }
class Trainer(): def __init__(self, jumpWay): self.Helpers = Helpers() self.Logging = Logging() self.jumpwayCl = jumpWay self._confs = self.Helpers.loadConfigs() self.LogFile = self.Logging.setLogFile(self._confs["AI"]["Logs"] + "Train/") self.Logging.logMessage(self.LogFile, "LogFile", "INFO", "NLU Trainer LogFile Set") self.Model = Model() self.Data = Data(self.Logging, self.LogFile) self.intentMap = {} self.words = [] self.classes = [] self.dataCorpus = [] self.setupData() self.setupEntities() def setupData(self): self.trainingData = self.Data.loadTrainingData() self.Logging.logMessage(self.LogFile, "Trainer", "INFO", "Loaded NLU Training Data") self.words, self.classes, self.dataCorpus, self.intentMap = self.Data.prepareData( self.trainingData) self.x, self.y = self.Data.finaliseData(self.classes, self.dataCorpus, self.words) self.Logging.logMessage(self.LogFile, "TRAIN", "INFO", "NLU Trainer Data Ready") def setupEntities(self): if self._confs["ClassifierSettings"]["Entities"] == "Mitie": self.entityExtractor = Entities() self.Logging.logMessage(self.LogFile, "TRAIN", "OK", "NLU Trainer Entity Extractor Ready") self.entityExtractor.trainEntities( self._confs["ClassifierSettings"]["Mitie"]["ModelLocation"], self.trainingData) def trainModel(self): while True: self.Logging.logMessage(self.LogFile, "TRAIN", "ACTION", "Ready To Begin Training ? (Yes/No)") userInput = input(">") if userInput == 'Yes': break if userInput == 'No': exit() humanStart, trainingStart = self.Helpers.timerStart() self.Logging.logMessage(self.LogFile, "TRAIN", "INFO", "NLU Model Training At " + humanStart) self.jumpwayCl.publishToDeviceChannel( "Training", { "NeuralNet": "NLU", "Start": trainingStart, "End": "In Progress", "Total": "In Progress", "Message": "NLU Model Training At " + humanStart }) self.Model.trainDNN(self.x, self.y, self.words, self.classes, self.intentMap) trainingEnd, trainingTime, humanEnd = self.Helpers.timerEnd( trainingStart) self.Logging.logMessage( self.LogFile, "TRAIN", "OK", "NLU Model Trained At " + humanEnd + " In " + str(trainingEnd) + " Seconds") self.jumpwayCl.publishToDeviceChannel( "Training", { "NeuralNet": "NLU", "Start": trainingStart, "End": trainingEnd, "Total": trainingTime, "Message": "NLU Model Trained At " + humanEnd + " In " + str(trainingEnd) + " Seconds" })
def _outlier_removal(self, train_df, remove_empty, nr_iterations, split_windows, std_threshold): ''' outliers are removed from the training dataframe per feature by windowing and removing all values per window that are further away than std_threshold times the standard deviation :param: train_df: Dataframe that contains the training data :param: remove_empty: Boolean - if true empty features are removed :param: nr_iterations: Number of iterations that are repeated to remove outliers per window :param: split_windows: Data is split into split_windows equal length window that are between minimal risk and 1 :param: std_threshold: data that is further away than std_threshold * std of the feature is removed :return: output_dfs: list of dataframes with each having a column scaled_FEATURE_X that is outlierfree now and a column risk which is the risk for that feature at its row ''' if not self._remove_outliers: print("Outlier removal disabled!") # 1. Initialize output_dfs = [] iteration = range(nr_iterations) first = True # Per feature and window for col in train_df.columns: # 2. only scaled features are considered if not col.startswith("scaled_FEATURE"): continue #Logging().log("CURRENT -> "+ col) result_df = train_df.sort_values("RISK") # 3. iterate multiple times over window # on each iteration remove outliers for i in iteration: sub_dfs = [] indices = [] rs = 0 # 4. iterate over windows for r in np.linspace(result_df["RISK"].min(), 1, 10): sub_df = result_df[(rs <= result_df["RISK"]) & (r > result_df["RISK"])] if self._remove_outliers: sub_df = sub_df[( (sub_df[col] - sub_df[col].mean()) / sub_df[col].std()).abs() < std_threshold] sub_dfs.append(sub_df) rs = r result_df = pd.concat(sub_dfs) # 5. Merge result to common dataframe output_dfs.append(result_df[["RISK", col]]) # 6. Remove empty if (remove_empty and len(result_df[col].unique()) < 2): continue # 7. Plot results if self._visualize_outlier: Logging().log("Pre - Standard Deviation vorher: " + str(train_df[col].std())) Visual().plot_scatter(train_df["RISK"], train_df[col]) #, "RISK", "feature") Logging().log("Post - Standard Deviation nachher: " + str(result_df[col].std())) Visual().plot_scatter(result_df["RISK"], result_df[col]) return output_dfs
def _build_heat_map(self, output_dfs): ''' using convolution for each point of a 2d array risk vs. feature value per feature a heat map is generated :param output_dfs: list of dataframes with each having a column scaled_FEATURE_X (that is outlierfree and scaled now) and a column risk which is the risk for that feature at its row :return a dictionary is returned that contains the feature name as key and its 2d heatmap as output ''' dimensions = {} for feature_df in output_dfs: # each output_df has one risk and value Logging().log("Processing Feature: " + feature_df.columns[1]) # Testmode if self._test_mode and (feature_df.columns[1] == "scaled_FEATURE_5"): print("Testing thus, break now!") break try: values = np.empty(len(feature_df)) values.fill(1) # Assign X Y Z X = feature_df.RISK.as_matrix() Y = feature_df[feature_df.columns[1]].as_matrix() Z = values # create x-y points to be used in heatmap of identical size risk_min = min([ rm for rm in [df.RISK.min() for df in output_dfs] if not math.isnan(rm) ]) risk_max = 1 feature_min = min([ rm for rm in [df[df.columns[1]].min() for df in output_dfs] if not math.isnan(rm) ]) feature_max = max([ rm for rm in [df[df.columns[1]].max() for df in output_dfs] if not math.isnan(rm) ]) xi = np.linspace(risk_min, risk_max, self._grid_area) yi = np.linspace(feature_min, feature_max, self._grid_area) # Z is a matrix of x-y values interpolated (!) zi = griddata((X, Y), Z, (xi[None, :], yi[:, None]), method=self._interpol_method) zmin = 0 zmax = 1 zi[(zi < zmin) | (zi > zmax)] = None # Convolve each point with a gaussian kernel giving the heat value at point xi,yi being Z # Advantage: kee horizontal and vertical influence grid_cur = np.nan_to_num(zi) # Smooth with a Gaussian kernel kernel = Gaussian2DKernel(stddev=self._std_gaus, x_size=self._kernel_size, y_size=self._kernel_size) grad = scipy_convolve(grid_cur, kernel, mode='same', method='direct') # Store the model in memory dimensions[feature_df.columns[1]] = [ copy.deepcopy(np.absolute(grad)), copy.deepcopy(xi), copy.deepcopy(yi) ] #print("GRAD") #print(str(grad)) if self._visualize_heatmap: fig, (ax_orig, ax_mag) = plt.subplots(1, 2) ax_orig.imshow(grid_cur[::-1, ::-1], cmap='RdYlGn') ax_orig.set_title('Original') ax_mag.imshow( np.absolute(grad)[::-1, ::-1], cmap='RdYlGn' ) # https://matplotlib.org/examples/color/colormaps_reference.html ax_mag.set_title('Heat') fig.show() plt.show() except: Logging().log("No chance") #traceback.print_exc() dimensions[feature_df.columns[1]] = None return dimensions, xi
def _build_one_heat_map(self, feature_df, risk_min, feature_min, feature_max): Logging().log("Processing Feature: " + feature_df.columns[1]) try: values = np.empty(len(feature_df)) values.fill(1) # Assign X Y Z X = feature_df.RISK.as_matrix() Y = feature_df[feature_df.columns[1]].as_matrix() Z = values # create x-y points to be used in heatmap of identical size #risk_min = min([rm for rm in [df.RISK.min() for df in output_dfs] if not math.isnan(rm)]) risk_max = 1 xi = np.linspace(risk_min, risk_max, self._grid_area) yi = np.linspace(feature_min, feature_max, self._grid_area) # Z is a matrix of x-y values interpolated (!) zi = griddata((X, Y), Z, (xi[None, :], yi[:, None]), method=self._interpol_method) zmin = 0 zmax = 1 zi[(zi < zmin) | (zi > zmax)] = None # Convolve each point with a gaussian kernel giving the heat value at point xi,yi being Z # Advantage: kee horizontal and vertical influence grid_cur = np.nan_to_num(zi) # Smooth with a Gaussian kernel kernel = Gaussian2DKernel(stddev=self._std_gaus, x_size=self._kernel_size, y_size=self._kernel_size) grad = scipy_convolve(grid_cur, kernel, mode='same', method='direct') # Store the model in memory feature_name = feature_df.columns[1] result = [ feature_name, [ copy.deepcopy(np.absolute(grad)), copy.deepcopy(xi), copy.deepcopy(yi) ], grid_cur ] except: #traceback.print_exc() feature_name = feature_df.columns[1] Logging().log( str(feature_df.columns[1]) + ": No heat map created due to error") result = [feature_name, None, None] return result
def _score_feature_quality(self, test_df, whole_df, model_per_feature, cluster_id, data_in, r_min, r_max, finetuner_index): # wenn einmal berechnet wegspeichern und im Zweifel wieder laden print("_________SCORING: " + str(r_min) + " to " + str(r_max)) # jedes model is ein heat map tester = HeatmapConvolutionTester(smooth_per_feature=True, enable_all_print=False, visualize_summed_curve=False, visualize_per_feature_curve=False) abs_max_rul = whole_df["RUL"].max() # 217 segment_thrshld = 0.33 * abs_max_rul distances = {} # key feat name, value list_dist phm_scores = {} rmse_scores = {} tot = len(list(test_df["id"].unique())) oo = 0 for object_id in list(test_df["id"].unique()): oo += 1 Logging.log( str(oo) + " of " + str(tot) + " - Optimizing based on - OBJECT ID: " + str(object_id)) cur_df1 = test_df[test_df['id'] == object_id] # predict immer einmal random aus erste 33% dann zwischen 33% und 66% und dann zwischen 66 und 100% le = int(numpy.ceil(len(cur_df1) / 3)) z_to_33 = list(range(le)) random.shuffle(z_to_33) if 2 * le > len(cur_df1): t_to_66 = [] s_to_100 = [] thrshlds = [z_to_33[0]] else: t_to_66 = list(range(le, 2 * le)) s_to_100 = list(range(2 * le, len(cur_df1))) random.shuffle(t_to_66) random.shuffle(s_to_100) thrshlds = [z_to_33[0], t_to_66[0], s_to_100[0]] cur_df3 = cur_df1.sort_values("RUL", ascending=False) for thrshld in thrshlds: current_test_df = cur_df3.iloc[:thrshld] dist = current_test_df["RUL"].max( ) - current_test_df["RUL"].min() if dist > segment_thrshld: print( "SHORTENED RUL AREA !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! " ) thrs = current_test_df["RUL"].min() + segment_thrshld current_test_df = current_test_df[ current_test_df["RUL"] < thrs] # do prediction try: predicted_risk, predicted_rul, m, all_feature_sum, per_feature_sum, feature_favorites = tester._predict_RUL( data_in, current_test_df, cluster_id, None, [], 0, current_test_df["RUL"].min(), test=True, fine=finetuner_index) except: print("No prediction to short shit") continue true_RUL = current_test_df.iloc[-1]["RUL"] true_risk = 1 + m * true_RUL # post process # asses per_feature_sum for col_name in per_feature_sum.keys(): #print("--- Feature: " + col_name) cur = per_feature_sum[col_name] cur[1] = cur[1][0] #m_idx = numpy.argmax(cur[1]) #da_risk_found = cur[0][m_idx] #predicted_rul_feature = (da_risk_found - 1)/m if numpy.count_nonzero(cur[1]) == 0: if col_name in distances: distances[col_name].append( [1]) # this curve did not help at all phm_scores[col_name].append(["None"]) rmse_scores[col_name].append(["None"]) else: distances[col_name] = [[1]] phm_scores[col_name] = [["None"]] rmse_scores[col_name] = [["None"]] #print(" - Distance - 1") continue ten_perc = math.ceil(0.05 * len(cur[1])) subs = int(0.1 * len(cur[1])) ind = sorted( numpy.argpartition(cur[1], -ten_perc) [-ten_perc:]) # indices of x percent of highest values # if gap bigger than subs indices == 0.1 risk -> split to to regions gaps = numpy.where(numpy.diff(ind) > subs) if not gaps: runner = [None] if gaps: runner = sorted(list(gaps[0])) prev = 0 multi_dist = [] phm_scores_lst = [] rmse_scores_lst = [] for gap_idx in runner: if gap_idx == None: cur_subset_selection = ind else: gap_idx += 1 cur_subset_selection = ind[int(prev):int(gap_idx)] values = cur[0][cur_subset_selection] avg = numpy.average(values) dist = avg - true_risk # bevorzuge praktisch frühere weil das den index nicht so zerlegt multi_dist.append(dist) #print(" - Distance - " + str(dist)) # analog find phm and risk da_risk_found = avg predicted_rul_feature = (da_risk_found - 1) / m phmScore = self.score_phm( pd.DataFrame( [[true_RUL, predicted_rul_feature, -1]], columns=["RUL", "predicted_RUL", "object_id"])) rmse = self.score_rmse( numpy.array([true_RUL]), numpy.array([predicted_rul_feature])) phm_scores_lst.append(phmScore) rmse_scores_lst.append(rmse) prev = gap_idx ''' print("\nFeature: "+ str(col_name)+ "\nplot all - true risk: " + str(true_risk)) plt.plot(cur[0], cur[1]) plt.plot(cur[0][cur_subset_selection], cur[1][cur_subset_selection], color='red') #plt.plot(cur[0], medfilt(cur[1], 61), color = "green") # KERNEL MUST BE ODD plt.xlabel("risk - true risk = " + str(true_risk)) plt.ylabel("heat - "+str(col_name)) plt.show() ''' # use this for evaluation #phmScore = self.score_phm(pd.DataFrame([[true_RUL, predicted_rul_feature, -1]], columns = ["RUL", "predicted_RUL", "object_id"])) #rmse = self.score_rmse(numpy.array([true_RUL]), numpy.array([predicted_rul_feature])) if col_name in distances: distances[col_name].append( multi_dist) # this curve did not help at all phm_scores[col_name].append(phm_scores_lst) rmse_scores[col_name].append(rmse_scores_lst) else: distances[col_name] = [multi_dist] phm_scores[col_name] = [phm_scores_lst] rmse_scores[col_name] = [rmse_scores_lst] return distances, phm_scores, rmse_scores
def run(self, data_in): super().run(data_in) # do not remove this! Logging.log("Training da heat...") # cross validation params iter_idx = 2 # Monte Carlo cross-validation - randomly assign training and test set x times percentage_train = 0.8 # percentage of data being trainingset # 1. transform to df and keep critical whole_df = self._extract_critical_data_frame(data_in) whole_df[self._field_in_train_cluster_id] = data_in[ self._field_in_train_cluster_id] lst_of_train_n_test = self._split_to_subsets( whole_df, percentage_train, iter_idx) # split frame multiple times # each element being [train_df, test_df] # 2. distance - scoring quality of a feature dist_score = { } # key: clusterid_featureid e.g. c1 value: list of dict: key: feature_id, value score for train_test in lst_of_train_n_test: train_df = train_test[0] test_df = train_test[1] test_df["cluster_id"] = test_df["train_cluster_id"] for cluster_id in list(train_df["train_cluster_id"].unique() ): # per cluster own model #if cluster_id in [1, 5, 4, 0, 3]: # den hab ich schon # continue if self._test_mode and not (cluster_id == 3 or cluster_id == 1): continue print("\n\n TRAINING CLUSTER: " + str(cluster_id)) cur_train_df = train_df[train_df[ self._field_in_train_cluster_id] == cluster_id] cur_test_df = test_df[test_df["cluster_id"] == cluster_id] # 2. scale data and remove outliers output_dfs, trained_scalers = self._preprocess_data( cur_train_df, self._remove_empty_features, self._nr_outlier_iterations, self._outlier_window_size, self._outlier_std_threshold) data_in[ "CL_" + str(cluster_id) + "_" + self. _field_out_train_model_trained_scalers] = trained_scalers # 3. Train the model model_per_feature = self._build_heat_map_parallel(output_dfs) data_in[ "CL_" + str(cluster_id) + "_" + self._field_out_train_model_grid_area] = self._grid_area # 4. Store the models data_in["CL_" + str(cluster_id) + "_" + self._field_out_train_model] = model_per_feature # 5. score the feature quality for cross validation score_dict = self._score_feature_quality( cur_test_df, whole_df, model_per_feature, cluster_id, data_in) print("Found scores: " + str(score_dict)) idfr = "c" + str(cluster_id) if idfr not in dist_score: dist_score[idfr] = [score_dict] else: dist_score[idfr].append(score_dict) try: pathh = os.path.join(r"C:\Users\q416435\Desktop\scores", "cluster_" + str(cluster_id) + ".csv") print("Writing file to " + pathh) self._csv_file = open(pathh, 'w') for ke in score_dict.keys(): self._csv_writer = csv.writer(self._csv_file, delimiter=';') self._csv_writer.writerow([ke, str(score_dict[ke])]) self._csv_file.close() except: pass # 3. Perform training for whole model now # get final model # 4. keep only optimal models now based on dist_score #T.B.D. # 5. empty metrics metrics = dict() return data_in, metrics