def listen(): RESULT = " " print("EİS: Anahtar kelime bekleniyor.") with MIC as source: # mikrofonu 2sn dinle AUDIO = R.listen(source, phrase_time_limit=2) try: # anahtar kelime bulma KEYWORD = R.recognize_google(AUDIO, language='tr') # ses tanıma KEYWORD_STR = str(KEYWORD) except sr.UnknownValueError: # anlaşılmaz ise hata ayıklama KEYWORD_STR = " " if KEYWORD_STR.lower() == "merhaba": # anahtar kelime doğru ise komut al print("EİS: Komut için dinlemede.") with MIC as source: # mikrofonu 5sn dinle AUDIO = R.listen(source, phrase_time_limit=3) try: # Asıl algılama RESULT = R.recognize_google(AUDIO, language='tr') # ses tanıma print("Kullanıcı: " + str(RESULT)) except sr.UnknownValueError: # anlaşılmaz ise hata ayıklama print('EİS: Anlaşılmadı.') if RESULT == "kapan": sys.exit("Kapanıyor...") normalization.normalize(RESULT) # anla ve davran kısmı
def bm_25(doc_len_arr, inverted_index, words, l_avg): print "EXECUTING BM25" result_list = defaultdict(int) num_doc_collection = len(doc_len_arr) print words #performs normalization to stay consistent with the index for i, w in enumerate(words): term = normalize(w) if term == "": words.remove(w) else: words[i] = term print words doc_unranked = get_docs_containing_word(inverted_index, words) words_freq_dict = get_frequencies(inverted_index, words) for doc in doc_unranked: d_length = doc_len_arr[doc] res = calculate_rsv(num_doc_collection, words_freq_dict, words, doc, d_length, l_avg) result_list[res] = doc #adds return value to list where the score is the key and the doc the value return result_list
def get_manual_win_df(window_size): manual_features_pt = pd.DataFrame.from_csv('./manual/pt_df/unnormalized_pt_features_df.csv') manual_labels_pt = pd.DataFrame.from_csv('./manual/pt_df/unnormalized_pt_labels_df.csv')['pt_label'].tolist() with open("./manual/pt_df/trip_dict.txt", "rb") as fp: # Unpickling trip_dict = pickle.load(fp) # normalized the point features manual_features_pt = normalize(manual_features_pt[DL_FEATURES]) # features_pt is a Data frame print "only collect the manual labelled data with user_id = 1" labels_win = cal_win_label_special_trip_dict(manual_labels_pt, window_size, trip_dict, user_id=1) features_win = cal_win_features_special_trip_dict(manual_features_pt, window_size, trip_dict, user_id=1) # normalize the features for window level if len(WIN_FEATURES) > 0: features_win = win_normalize(features_win) # check whether the features match with labels if len(features_win) != len(labels_win): logging.warning("the windows features are not matched with labels!!!!!!!!!!!!!!!!!!!!!!") manual_win_df = pd.DataFrame(features_win) manual_win_df['win_label'] = pd.Series(labels_win) # remove the window with label mix manual_win_df = manual_win_df[manual_win_df.win_label != 5] manual_win_df = manual_win_df[manual_win_df.win_label != -1] # now the win_df is unbalanced and has 4 labels return manual_win_df
def get_app_win_df(window_size): app_features_pt = pd.DataFrame.from_csv('./pt_df/unnormalized_pt_features_df.csv') app_labels_pt = pd.DataFrame.from_csv('./pt_df/unnormalized_pt_labels_df.csv')['pt_label'].tolist() with open("./pt_df/trip_dict.txt", "rb") as fp: # Unpickling trip_dict = pickle.load(fp) # normalized the point features app_features_pt = normalize(app_features_pt[DL_FEATURES]) # app_features_pt is a Dataframe labels_win = cal_win_label(app_labels_pt, window_size, trip_dict) features_win = cal_win_features(app_features_pt, window_size, trip_dict) # normalize the features for window level if len(WIN_FEATURES) > 0: features_win = win_normalize(features_win) # check whether the features match with labels if len(features_win) != len(labels_win): logging.warning("the windows features are not matched with labels!!!!!!!!!!!!!!!!!!!!!!") app_win_df = pd.DataFrame(features_win) app_win_df['win_label'] = pd.Series(labels_win) # remove the window with label mix app_win_df = app_win_df[app_win_df.win_label != 5] # now the win_df is unbalanced and has 5 labels return app_win_df
def main(cfg): try: # nltk.download("vader_lexicon") # nltk.download('wordnet') glbs = GlobalParameters() configs = get_cfg_files(cfg) total_files = len(configs) results = {} for i, config in enumerate(configs): print_message("Running config {}/{}".format(i + 1, total_files)) set_global_parameters(config) print_run_details() dataset_dir = normalize() X, y = extract_features(dataset_dir) config_result = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS) glbs.RESULTS[glbs.FILE_NAME] = config_result glbs.RESULTS = add_results(glbs.RESULTS, glbs) if glbs.EXPORT_AS_BASELINE: export_as_baseline(config_result, config[1]) if glbs.WORDCLOUD: print_message("Generating word clouds (long processes)") generate_word_clouds() add_results_glbs(results, glbs) write_results(divide_results(glbs.RESULTS)) send_work_done(glbs.DATASET_DIR) print_message("Done!") except Exception as e: traceback.print_exc() send_work_done(glbs.DATASET_DIR, "", error=str(e), traceback=str(traceback.format_exc()))
def recognizeFile(models, file, translate='', rotate='', scale=''): """! Match a single file and return the resulting scores as well as the normalization parameters. @param models list: The previously trained HMM models @param file String: The file containing the motion. @param translate String: The normalization type for correcting translation @param rotate String: The normalization type for correcting rotation @param scale String: The normalization type for correcting scaling @return An array of the model scores, translation, rotation, scaling parameters """ #print(file) #read motion and normalize motion = input.read(file) motion,t,r,s = normalization.normalize(motion, translate, rotate, scale) plot.addPlot(motion[:,1:4], file) #writePointsToGrapherFile(motion, file) scores = [] # check motion score (likelyhood) for each recording for i,model in enumerate(models): scores.append(float(model.score(motion))) return numpy.array(scores), t, r, s
def process(infile): # Below, a series of text processing functions can be added serially, # with the ultimate output to be return to the calling function (@see # gpio.py's `process_file() function) output = normalization.normalize(infile) output = remove_tags.remove(output) # The below line will not need to change. return output
def run_normalization(): json_content = request.get_json() if "array" in json_content: array = json_content["array"] norm_array = normalize(array) return jsonify({"status": "ok", "result": norm_array}) return jsonify({"status": "failed", "message": "No array"})
def main(cfg): try: glbs = GlobalParameters() configs = get_cfg_files(cfg) results = {} n_test_dir = "" total_files = len(configs) for i, config in enumerate(configs): print_message("Running config {}/{}".format(i + 1, total_files)) set_global_parameters(config) print_run_details() n_train_dir = normalize() if glbs.TEST_DIR != "": n_test_dir = normalize(test=True) train, tr_labels, test, ts_labels, all_features = extract_features( n_train_dir, n_test_dir) for selection in glbs.SELECTION: try: train, test = get_selected_features( selection, train, tr_labels, test, ts_labels, all_features) except: pass results[glbs.FILE_NAME] = classify(train, tr_labels, test, ts_labels, all_features, model_number=i) results = add_results(results) if glbs.WORDCLOUD: print_message("Generating word clouds (long processes)") generate_word_clouds() write_results(divide_results(results)) send_work_done(glbs.TRAIN_DIR) print_message("Done!") # clean_backup_files() except Exception as e: traceback.print_exc() send_work_done(glbs.TRAIN_DIR, "", error=str(e), traceback=str(traceback.format_exc()))
def score_all_reorderings(references, candidates): # Compute average kendall's tau over all sentences. assert len(candidates) == len(references) scores = {} for i in range(len(candidates)): reference, candidate = normalize(references[i], candidates[i]) assert reference and candidate, "Normalization failed!" scores[i] = normalized_kendalls_tau(reference, candidate) print "Average normalized kendalls tau was %1.3f on %d sentences." % ( float(sum(scores.values())) / len(scores.keys()), len(scores.keys()))
def get_ngrams(self, _w, gram_size=2): if _w != EPS: w = '<' + strip_accents(normalize(_w.lower())) + '>' grams = set([ w[i:i + n] for n in xrange(gram_size, gram_size + 1) for i in xrange(len(w)) if w[i:i + n] != '>' ]) else: grams = set([]) return grams
def separate_reps(data_file, exercise, key, column_labels, epsilon=0.15, gamma=20, delta=0.5, beta=1): front_cut_values = [0, 0, 0, 25, 0, 50, 0, 25, 50, 100, 0, 100] back_cut_values = [0, 0, 0, 0, 25, 0, 50, 25, 50, 0, 100, 100] epsilon_values = [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25] if exercise is 'pushup': delta = 0.2 beta = 2 for iteration in range(0,len(front_cut_values)): front_cut = front_cut_values[iteration] back_cut = back_cut_values[iteration] epsilon = epsilon_values[iteration] data = [] #=====[ Format each line of data ]===== # os.path.join('data/raw_data/squat_pushupData_10to20', with open(data_file) as f: for line in f: try: if 'Infinity' in line or 'NaN' in line: continue line = [float(x.replace('\r\n','')) for x in line.split(',')] data.append(line) except Exception as e: continue # print e #=====[ Make dataframe and readjust indices to take into account front and back cuts ]===== df = pd.DataFrame(data, columns=column_labels) df = df[front_cut:df.shape[0]-back_cut] df = df.set_index([range(0,df.shape[0])]) y_coords = np.array(df.get(key)) mins = get_local_mins(y_coords, epsilon, gamma, delta, beta) reps = [] #=====[ Get points from DF between each max found -- constitutes a single rep ]===== for index,x in enumerate(mins): if(index == len(mins) -1 ): continue rep = (df.loc[x:mins[index+1]-1]).copy(True) reps.append(rep.set_index([range(rep.shape[0])])) if len(reps) > 1: break if exercise is 'squat': return nz.normalize(df, reps) elif exercise is 'pushup': return pnz.normalize(df, reps)
def action(self, sensorState): norm = normalize(sensorState) dis = self.discretizer.discretize(norm) self.sensorStateHistory.append(dis) n = self.problem.required_state_sequence_length() if self.problem.goal(self.sensorStateHistory[-n:]): print("stop") return "stop" behavior, _ = self.search.choose_behavior(self.sensorStateHistory, self.graph, self.problem) print(behavior) return behavior
def get_list_of_terms(tokenizedTermList, docId): token_list = list() # Loops through all the terms in the document and adds them to the list with their associated docId for term in tokenizedTermList: term = normalize(term) if term != '': tokenObj = Token(term, docId) token_list.append(tokenObj) #To remove duplicates uncomment the following #term_dict.append(term) return token_list
def normalize(f): f = normalization.normalize(f) f = sent_tokenize(f) processed = [] for u in range(0, len(f)): tokens = tokenize(f[u]) processed.extend(tokens) return processed
def select_population(self, points_per_front, population_size, combined_population_representation, combined_evaluations, target_functions, z_min, z_max, reference_points): structured_points, last_front_points = self.generate_structured_points( points_per_front, population_size) num_objs = len(target_functions) if len(structured_points) == population_size: #print('Next generation: ', structured_points) population_representation = combined_population_representation[ structured_points] evaluations = combined_evaluations[structured_points] return population_representation, evaluations else: #print('Normalization needed.') #print('Structured points: ', structured_points) #print('Last front points: ', last_front_points) num_elements = len(structured_points) - len(last_front_points) next_generation = structured_points[:num_elements] #print('Next generation: ', next_generation) last_front_index = len(next_generation) num_K = population_size - len(next_generation) #print('Num points to be chosen from last front: ', num_K) normalized_evaluations = normalize(combined_evaluations, structured_points, num_objs, target_functions, z_min, z_max) reference_points_assignment, association_counts_structured_points, association_counts_next_generation, reference_points_perpencidular_distances = \ associate_dask(structured_points, normalized_evaluations, reference_points, next_generation) selected_points = \ associate_to_niche(structured_points, association_counts_next_generation, reference_points_assignment, reference_points_perpencidular_distances, last_front_points, last_front_index, num_K) population_representation = np.concatenate( (combined_population_representation[next_generation], combined_population_representation[selected_points]), axis=0) evaluations = np.concatenate( (combined_evaluations[next_generation], combined_evaluations[selected_points]), axis=0) return population_representation, evaluations
def clustering(dic): df = pd.read_csv('./data/hateb.csv') td = [] with open("./data/stop.txt", "r") as f: stop_list = [v.rstrip() for v in f.readlines() if v != '\n'] # 1文書ずつ、単語に分割してリストに入れていく[([単語1,単語2,単語3],文書id),...]こんなイメージ # words:文書に含まれる単語のリスト(単語の重複あり) # tags:文書の識別子(リストで指定.1つの文書に複数のタグを付与できる) for i in range(len(df)): wordlist = parseText(text=str(df['content'][i]), sysdic=dic) # 単語の文字種の統一、つづりや表記揺れの吸収 normalizedlist = [normalize(word) for word in wordlist] # ストップワードの除去 stopremovedlist = remove_stopwords(normalizedlist, stop_list) td.append(TaggedDocument(words=stopremovedlist, tags=[i])) #モデル作成 model = Doc2Vec(documents=td, dm = 1, vector_size=300, window=8, min_count=10, workers=4) #ベクトルをリストに格納 vectors_list=[model.docvecs[n] for n in range(len(model.docvecs))] #ドキュメント番号のリスト doc_nums=range(len(model.docvecs)) #クラスタリング設定 #クラスター数を変えたい場合はn_clustersを変えてください n_clusters = 8 kmeans_model = KMeans(n_clusters=n_clusters, verbose=1, random_state=1, n_jobs=-1) #クラスタリング実行 kmeans_model.fit(vectors_list) #クラスタリングデータにラベル付け labels=kmeans_model.labels_ #ラベルとドキュメント番号の辞書づくり cluster_to_docs = defaultdict(list) for cluster_id, doc_num in zip(labels, doc_nums): cluster_to_docs[cluster_id].append(doc_num) #クラスター出力 for docs in cluster_to_docs.values(): print(docs) # DataFrameにcluster_idのカラムを追加 df['cluster_id'] = labels df.to_csv('data/hateb_cluster.csv')
def insert(data): db = get_instance() cur = db.cursor() data = normalization.normalize(data) columns = data.keys() values = [data[col] for col in columns] stmt = 'INSERT INTO crimes (%s) VALUES %s' cur.execute(stmt, (AsIs(','.join(columns)), tuple(values))) # print(cur.mogrify(stmt, (AsIs(','.join(columns)), tuple(values))))
def main(img, model_core, model_top, model_bot): import cv2 import numpy as np import gary_convert as gc import binarize as br import segment as sg import gaussfun as gau import normalization as norm heighty, widthx, ch = img.shape #cv2.imshow("input",img) img1 = np.zeros((heighty, widthx), np.uint8) print(ch) #********************************************* gc.gray_con(heighty, widthx, img) for i in range(0, heighty): for j in range(0, widthx): img1[i][j] = img[i, j, 0] print("Grayed") #cv2.imshow("Gray conversion",img1) #********************************************* '''nr.noisered(heighty,widthx,img1) print("filtered")''' #********************************************* gau.gauss(heighty, widthx, img1) #cv2.imshow("Gaussian Blurring",img1) #********************************************* norm.normalize(heighty, widthx, img1) print("Normalized") #cv2.imshow("contrast stretching",img1) #********************************************* br.bin_(heighty, widthx, img1) print("Binarized") #cv2.imshow("binarization",img1) #********************************************* a = sg.segFun(heighty, widthx, img1, model_core, model_top, model_bot) print("Segmented") return a
def _resolve_participant(self, finalist): search_name = normalize(finalist.Name) if finalist.Name in self._name_exceptions: search_name = self._name_exceptions[finalist.Name] for part in self._participants: if part.NormalizedName == search_name: part.Participations.append(finalist) return part new_participant = Participant(finalist) new_participant.NormalizedName = search_name # To make exceptions work properly. self._participants.append(new_participant) return new_participant
def plotFile(file, translate='', rotate='', scale=''): """! Read a single motion from a file and add it to the current plot list. @param file String: The file containing the motion. @param translate String: The normalization type for correcting translation @param rotate String: The normalization type for correcting rotation @param scale String: The normalization type for correcting scaling """ #read motion and normalize motion = input.read(file) motion,t,r,s = normalization.normalize(motion, translate, rotate, scale) plot.addPlot(motion[:,1:4], file)
def datingClassTest(): hoRatio = 0.10 #测试样本占总样本的比例 k = 7 # datingDataMat, datingLabels = getData.file2Matrix( '../datas/datingTestSet2.txt') normMat = normalization.normalize(datingDataMat) m = normMat.shape[0] #行数 numTestVecs = int(m * hoRatio) #测试样本数 errorCount = 0 #错误样本数 for i in range(numTestVecs): classifierResult = KNN.KNN(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], k) if (classifierResult != datingLabels[i]): errorCount += 1 return float(errorCount) / float(numTestVecs)
def nextBehavior(self, sensorState): print(sensorState, file=self.f) normSensorState = normalize(sensorState) dis = ",".join(map(str, self.discretizer.discretize(normSensorState))) if dis not in self.stateBehaviors: self.stateBehaviors[dis] = {} behavior = self.selectBehavior(self.stateBehaviors[dis]) if behavior not in self.stateBehaviors[dis]: self.stateBehaviors[dis][behavior] = 0 self.stateBehaviors[dis][behavior] += 1 print(self.stateBehaviors) print(behavior) print(behavior, file=self.f) return behavior
def main(dir_path, output_dir): ''' Run Pipeline of processes on file one by one. ''' files = os.listdir(dir_path) for file_name in files: file_dataframe = pd.read_csv(os.path.join(dir_path, file_name)) cols = ['high', 'open', 'low', 'close', 'volume', 'adj_close'] file_dataframe = interpolate(file_dataframe, cols) file_dataframe = normalize(file_dataframe, cols) file_dataframe.to_csv(os.path.join(output_dir, file_name), encoding='utf-8')
def main(dir_path, output_dir): ''' Run Pipeline of processes on file one by one. ''' files = os.listdir(dir_path) for file_name in files: file_dataframe = pd.read_csv(os.path.join(dir_path, file_name)) cols = ['high', 'open', 'low', 'close', 'volume', 'adj_close'] file_dataframe = interpolate(file_dataframe, cols) file_dataframe = normalize(file_dataframe, cols) file_dataframe.to_csv( os.path.join(output_dir, file_name), encoding='utf-8')
def createMotionsAndLengths(path, translate='', rotate='', scale=''): """! Read the motions from a folder and create a concatenated array with the lengths. The given directory 'path' must contain a subdirectory 'training' containing the motions as individual csv files. @param path String: The path to the motion data. @param translate: The normalization for translating the motions @param rotate: The normalization for rotating the motions @param scale: The normalization for scaling the motions @return: The concatenated motions and a list of the motion lengths """ # list of motions read from the file motions = [] # length of motions read from the file lengths = [] count = 0 plot.clearPlot() input.logLn('\n- ' + '{:<10}'.format(path + ':')) # read all files from directory associated with one motion for file in sorted(glob.glob(path + '/training/*.csv')): print(file) input.logLn(string.basename(string.splitext(file)[0])) count += 1 #read motion and normalize motion = input.read(file) motion,t,r,s = normalization.normalize(motion, translate, rotate, scale) # Add to plot all training plots plot.addPlot(motion[:,1:4], file) # Add motion to the list of motions motions.append(motion) # Add length (number of poses in motion) to the list of lengths lengths.append(len(motion)) _, folderName = string.split(string.dirname(path)) # Plot all training motions plot.plot('../plots/' + folderName + ' training') # The observations are a list of poses X = numpy.concatenate(motions) return X, lengths
def get_manual_win_df(window_size): manual_features_pt = pd.DataFrame.from_csv( './manual/pt_df/unnormalized_pt_features_df.csv') manual_labels_pt = pd.DataFrame.from_csv( './manual/pt_df/unnormalized_pt_labels_df.csv')['pt_label'].tolist() with open("./manual/pt_df/trip_dict.txt", "rb") as fp: # Unpickling trip_dict = pickle.load(fp) # normalized the point features manual_features_pt = normalize(manual_features_pt[DL_FEATURES]) # features_pt is a Data frame print "only collect the manual labelled data with user_id = 1" labels_win = cal_win_label_special_trip_dict(manual_labels_pt, window_size, trip_dict, user_id=1) features_win = cal_win_features_special_trip_dict(manual_features_pt, window_size, trip_dict, user_id=1) # normalize the features for window level if len(WIN_FEATURES) > 0: features_win = win_normalize(features_win) # check whether the features match with labels if len(features_win) != len(labels_win): logging.warning( "the windows features are not matched with labels!!!!!!!!!!!!!!!!!!!!!!" ) manual_win_df = pd.DataFrame(features_win) manual_win_df['win_label'] = pd.Series(labels_win) # remove the window with label mix manual_win_df = manual_win_df[manual_win_df.win_label != 5] manual_win_df = manual_win_df[manual_win_df.win_label != -1] # now the win_df is unbalanced and has 4 labels return manual_win_df
def nextBehavior(self, sensorState): self.round += 1 print("\nRound:", self.round) print("Algorithm:", self.algorithmState) print(sensorState, file=self.f) normSensorState = normalize(sensorState) discretized = self.discretizer.discretize(normSensorState) dis = self.graph.state_to_key(discretized) if self.previous_state is not None and self.previous_behavior is not None: self.graph.construct([self.previous_state, discretized], [self.previous_behavior]) if dis not in self.stateBehaviors: self.sinceLastNewState = 0 self.stateBehaviors[dis] = {} self.stateBehaviors[dis]["count"] = 0 self.stateBehaviors[dis]["origin"] = discretized behavior = "explore" if self.algorithmState == "random": behavior = self.randomBehavior(dis, discretized) elif self.algorithmState == "intelligent": behavior = self.intelligentBehavior(dis, discretized) elif self.algorithmState == "search": behavior = self.searchBehavior(dis, discretized) if behavior not in self.stateBehaviors[dis]: self.stateBehaviors[dis][behavior] = 0 self.stateBehaviors[dis][behavior] += 1 self.stateBehaviors[dis]["count"] += 1 print(self.stateBehaviors) print("State:", dis) print("Behavior:", behavior) print(behavior, file=self.f) self.previous_state = discretized self.previous_behavior = behavior self.graph.visualize("dot/graph.dot", self.initialState) self.graph.visualize("dot/graph_%d.dot" % self.round, self.initialState) return behavior
def searchAnd(dict, terms): tempDict = defaultdict(list) result = [] for term in terms: term = normalize(term) if (term != ""): tempDict[term].extend( dict[term]) #will get the postings list for each term if (len(tempDict[term]) == 0): return "The word you are looking for could not be found." # find the smallest index smallest_term = findSmallestList(tempDict) result.extend(tempDict[smallest_term]) del (tempDict[smallest_term]) while (len(tempDict) > 0): smallest_term = findSmallestList(tempDict) result = intersect(result, deepcopy(tempDict[smallest_term])) del (tempDict[smallest_term]) return result
def main(): dic = {} normalization = [] bunsho_file = sys.argv[1] data, book_list = make_jp_data(bunsho_file) for i in range(len(data)): tfidf, word_list_i = calc_tfidf(data[i], data) word_for_normalization, list_for_normalization = make_normalization_data( data) normalization.append( normalize(word_list_i, tfidf, word_for_normalization, list_for_normalization)) print("\n\n===============similarity===============") similarity = calc_cosine_similarity(normalization) print(similarity) print("\n\n===============recommendation================") recommendation = recommend(similarity, book_list) if len(recommendation) == 0: print("There are no recommendation in the data.") else: for i, v in enumerate(recommendation.values()): print(i + 1, v)
def test_normal(self): self.assertEqual(normalization.normalize("This is a normal sentence."), "This is a normal sentence.")
def summarize(doc_list): stop_words = list(set(stopwords.words("english"))) f = open('data/text_doc.json', 'r') text = json.load(f) f.close() f = open('data/tfidf_index.json', 'r') indices = json.load(f) f.close() all_summaries = dict() def row_normalize(A): return (A.transpose() * (1 / A.sum(1))).transpose() def idf(word): stemmer = PorterStemmer() word = stemmer.stem(word) #global indices if word in indices: idf_value = indices[word].values()[0][1] else: idf_value = 0.0000001 return idf_value def idf_modified_cosine(str1, str2): x = TE.tokenize(str1) y = TE.tokenize(str2) num = den1 = den2 = 0.0 for w in list(set(x + y)): if w not in stop_words: num += x.count(w) * y.count(w) * (idf(w)**2) for xi in x: if xi not in stop_words: den1 += (x.count(xi) * idf(xi))**2 for yi in y: if yi not in stop_words: den2 += (y.count(yi) * idf(yi))**2 result = num / np.sqrt(den1 * den2) return result def create_centrality_matrix(sentences): N = len(sentences) centrality_matrix = np.zeros([N, N]) for i, senti in enumerate(sentences): for j, sentj in enumerate(sentences): centrality_matrix[i][j] = idf_modified_cosine(senti, sentj) centrality_matrix = row_normalize(centrality_matrix) return centrality_matrix def LexRank(str): sentences = sent_tokenize(str) N = len(sentences) M = create_centrality_matrix(sentences) d = 0.85 page_rank = np.ones(N) / N degree = np.zeros(N) for u in range(N): for v in range(N): if u != v and M[u][v] > 0: degree[u] += M[u][v] for i in range(100): for u in range(N): rank = 0 for v in range(N): if M[u][v] > 0 and u != v: rank += (M[u][v] * page_rank[v] / degree[v]) page_rank[u] = (1 - d) / N + d * rank summary = '' for i in heapq.nlargest(2, range(N), page_rank.take): summary += sentences[i] return summary for doc in doc_list: txt = text[doc] docname = doc summary = LexRank(normalization.normalize(txt)) all_summaries[docname] = summary print "File ", i + 1, " : ", docname, " done." return all_summaries print("Summarization Done")
'1.0, 1.0, 0.0': 'facing object', '0.0, 0.0, 1.0': 'object grabbed', '1.0, 0.0, 1.0': 'object grabbed and object in front', '1.0, 1.0, 1.0': 'object grabbed and facing object', } if __name__ == '__main__': plt.rcParams.update({'figure.autolayout': True}) filename = sys.argv[1] sensorStates, behaviors = dataloader.load(filename) discretizer = SimplifyingDiscretizer() graph = MarkovChainGraph() result = {} ss = [] for sensorState in sensorStates: sensorState = normalize(sensorState) s = discretizer.discretize(sensorState) s = graph.state_to_key(s) ss.append(stateTranslate[s]) if stateTranslate[s] in result: result[stateTranslate[s]] += 1 else: result[stateTranslate[s]] = 1 graph.construct(ss, behaviors) graph.visualize(filename + '.dot', ss[0]) dictionary = plt.figure() axes = plt.gca() axes.set_ylim([0,1000])
# -*- coding: utf-8 -*- import numpy as np import loaddata as ld import gradientdescent as GD import normalization as norm X, Y, n = ld.load('data.txt') X = norm.normalize(X, n) X = X.reshape((n * 2)) tmp = [] for i in xrange(0, 2 * n, 2):#Оторвать мне руки tmp.append(1) tmp.append(X[i]) tmp.append(X[i + 1]) X = np.array(tmp).reshape(n, 3) print X alpha = 0.01; iterations = 400; theta = np.zeros((3, 1))#init fitting params
def __init__(self, finalist): self.Participations = [finalist] self.NormalizedName = normalize(self.Name)
def process(self, subject, cap, mon, device, gaze_network, por_available=False, show=False): g_t = None data = { 'image_a': [], 'gaze_a': [], 'head_a': [], 'R_gaze_a': [], 'R_head_a': [] } if por_available: f = open('./%s_calib_target.pkl' % subject, 'rb') targets = pickle.load(f) frames_read = 0 ret, img = cap.read() while ret: img = self.undistorter.apply(img) if por_available: g_t = targets[frames_read] frames_read += 1 # detect face face_location = face.detect(img, scale=0.25, use_max='SIZE') if len(face_location) > 0: # use kalman filter to smooth bounding box position # assume work with complex numbers: output_tracked = self.kalman_filters[0].update( face_location[0] + 1j * face_location[1]) face_location[0], face_location[1] = np.real( output_tracked), np.imag(output_tracked) output_tracked = self.kalman_filters[1].update( face_location[2] + 1j * face_location[3]) face_location[2], face_location[3] = np.real( output_tracked), np.imag(output_tracked) # detect facial points pts = self.landmarks_detector.detect(face_location, img) # run Kalman filter on landmarks to smooth them for i in range(68): kalman_filters_landm_complex = self.kalman_filters_landm[ i].update(pts[i, 0] + 1j * pts[i, 1]) pts[i, 0], pts[i, 1] = np.real( kalman_filters_landm_complex), np.imag( kalman_filters_landm_complex) # compute head pose fx, _, cx, _, fy, cy, _, _, _ = self.cam_calib['mtx'].flatten() camera_parameters = np.asarray([fx, fy, cx, cy]) rvec, tvec = self.head_pose_estimator.fit_func( pts, camera_parameters) ######### GAZE PART ######### # create normalized eye patch and gaze and head pose value, # if the ground truth point of regard is given head_pose = (rvec, tvec) por = None if por_available: por = np.zeros((3, 1)) por[0] = g_t[0] por[1] = g_t[1] entry = { 'full_frame': img, '3d_gaze_target': por, 'camera_parameters': camera_parameters, 'full_frame_size': (img.shape[0], img.shape[1]), 'face_bounding_box': (int(face_location[0]), int(face_location[1]), int(face_location[2] - face_location[0]), int(face_location[3] - face_location[1])) } [patch, h_n, g_n, inverse_M, gaze_cam_origin, gaze_cam_target] = normalize(entry, head_pose) # cv2.imshow('raw patch', patch) def preprocess_image(image): ycrcb = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb) ycrcb[:, :, 0] = cv2.equalizeHist(ycrcb[:, :, 0]) image = cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2RGB) # cv2.imshow('processed patch', image) image = np.transpose(image, [2, 0, 1]) # CxHxW image = 2.0 * image / 255.0 - 1 return image # estimate the PoR using the gaze network processed_patch = preprocess_image(patch) processed_patch = processed_patch[np.newaxis, :, :, :] # Functions to calculate relative rotation matrices for gaze dir. and head pose def R_x(theta): sin_ = np.sin(theta) cos_ = np.cos(theta) return np.array([[1., 0., 0.], [0., cos_, -sin_], [0., sin_, cos_]]).astype(np.float32) def R_y(phi): sin_ = np.sin(phi) cos_ = np.cos(phi) return np.array([[cos_, 0., sin_], [0., 1., 0.], [-sin_, 0., cos_]]).astype(np.float32) def calculate_rotation_matrix(e): return np.matmul(R_y(e[1]), R_x(e[0])) def pitchyaw_to_vector(pitchyaw): vector = np.zeros((3, 1)) vector[0, 0] = np.cos(pitchyaw[0]) * np.sin(pitchyaw[1]) vector[1, 0] = np.sin(pitchyaw[0]) vector[2, 0] = np.cos(pitchyaw[0]) * np.cos(pitchyaw[1]) return vector # compute the ground truth POR if the # ground truth is available R_head_a = calculate_rotation_matrix(h_n) R_gaze_a = np.zeros((1, 3, 3)) if type(g_n) is np.ndarray: R_gaze_a = calculate_rotation_matrix(g_n) # verify that g_n can be transformed back # to the screen's pixel location shown # during calibration gaze_n_vector = pitchyaw_to_vector(g_n) gaze_n_forward = -gaze_n_vector g_cam_forward = inverse_M * gaze_n_forward # compute the POR on z=0 plane d = -gaze_cam_origin[2] / g_cam_forward[2] por_cam_x = gaze_cam_origin[0] + d * g_cam_forward[0] por_cam_y = gaze_cam_origin[1] + d * g_cam_forward[1] por_cam_z = 0.0 x_pixel_gt, y_pixel_gt = mon.camera_to_monitor( por_cam_x, por_cam_y) # verified for correctness of calibration targets input_dict = { 'image_a': processed_patch, 'gaze_a': g_n, 'head_a': h_n, 'R_gaze_a': R_gaze_a, 'R_head_a': R_head_a, } if por_available: data['image_a'].append(processed_patch) data['gaze_a'].append(g_n) data['head_a'].append(h_n) data['R_gaze_a'].append(R_gaze_a) data['R_head_a'].append(R_head_a) if show: # compute eye gaze and point of regard for k, v in input_dict.items(): input_dict[k] = torch.FloatTensor(v).to( device).detach() gaze_network.eval() output_dict = gaze_network(input_dict) output = output_dict['gaze_a_hat'] g_cnn = output.data.cpu().numpy() g_cnn = g_cnn.reshape(3, 1) g_cnn /= np.linalg.norm(g_cnn) # compute the POR on z=0 plane g_n_forward = -g_cnn g_cam_forward = inverse_M * g_n_forward g_cam_forward = g_cam_forward / np.linalg.norm( g_cam_forward) d = -gaze_cam_origin[2] / g_cam_forward[2] por_cam_x = gaze_cam_origin[0] + d * g_cam_forward[0] por_cam_y = gaze_cam_origin[1] + d * g_cam_forward[1] por_cam_z = 0.0 x_pixel_hat, y_pixel_hat = mon.camera_to_monitor( por_cam_x, por_cam_y) output_tracked = self.kalman_filter_gaze[0].update( x_pixel_hat + 1j * y_pixel_hat) x_pixel_hat, y_pixel_hat = np.ceil( np.real(output_tracked)), np.ceil( np.imag(output_tracked)) # show point of regard on screen display = np.ones((mon.h_pixels, mon.w_pixels, 3), np.float32) h, w, c = patch.shape display[0:h, int(mon.w_pixels / 2 - w / 2):int(mon.w_pixels / 2 + w / 2), :] = 1.0 * patch / 255.0 font = cv2.FONT_HERSHEY_SIMPLEX if type(g_n) is np.ndarray: cv2.putText(display, '.', (x_pixel_gt, y_pixel_gt), font, 0.5, (0, 0, 0), 10, cv2.LINE_AA) cv2.putText(display, '.', (int(x_pixel_hat), int(y_pixel_hat)), font, 0.5, (0, 0, 255), 10, cv2.LINE_AA) cv2.namedWindow("por", cv2.WINDOW_NORMAL) cv2.setWindowProperty("por", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) cv2.imshow('por', display) # also show the face: cv2.rectangle( img, (int(face_location[0]), int(face_location[1])), (int(face_location[2]), int(face_location[3])), (255, 0, 0), 2) self.landmarks_detector.plot_markers(img, pts) self.head_pose_estimator.drawPose(img, rvec, tvec, self.cam_calib['mtx'], np.zeros((1, 4))) cv2.imshow('image', img) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() cap.release() break # read the next frame ret, img = cap.read() return data
def evaluate(minority_label, majority_label, training_data, training_target, test_data, test_true_target, clf, p_synthetic_samples = None, p_majority_samples = None): ''' Parameters ---------- minor_label : minor_label : p_synthetic_samples : Sets parameter N for SMOTE. Tells how many synthetic samples are supposed to be generated. If not None SMOTE is done. p_majority_samples : Sets how many majority samples should be used. n_majority_samples = p_majority_samples/100 * n_minority_samples. If None no under sampling is done. ''' #Normalize training data theta, sigma = calculate_mean_and_std_deviation(training_data) training_data = normalize(training_data, theta, sigma) #just train and test on labels minor_mask = (training_target == samples.get_target_number(minority_label)) major_mask = (training_target == samples.get_target_number(majority_label)) minority_samples = training_data[minor_mask] majority_samples = training_data[major_mask] minority_target = training_target[minor_mask] majority_target = training_target[major_mask] training_sizes = {minority_label: minority_samples.shape[0], majority_label: majority_samples.shape[0]} #Under-sampling if p_majority_samples is not None: logger.info("Under-sample majority class...") n_majority_samples = p_majority_samples / 100 * minority_samples.shape[0] np.random.shuffle(majority_samples) majority_samples = majority_samples[:n_majority_samples] logger.info("Selected %d random majority samples." % majority_samples.shape[0]) majority_target = np.empty(shape=(majority_samples.shape[0])) majority_target[:] = samples.get_target_number(majority_label) #SMOTE if p_synthetic_samples is not None: logger.info("SMOTE minority class...") #Create synthetic data and target synthetic_minor_samples = SMOTE(minority_samples, p_synthetic_samples, k = 5) synthetic_targets = np.empty(shape=(synthetic_minor_samples.shape[0])) synthetic_targets[:] = samples.get_target_number(minority_label) logger.info("Created %d synthetic minority samples from %d samples with N = %d." % (synthetic_minor_samples.shape[0], minority_samples.shape[0], p_synthetic_samples)) #Add synthetic data and target minority_samples = np.concatenate((minority_samples, synthetic_minor_samples)) minority_target = np.concatenate((minority_target, synthetic_targets)) #Put minorities and majorities together training_data = np.concatenate((minority_samples,majority_samples)) training_target = np.concatenate((minority_target,majority_target)) #Train logger.info("Train classifier...") clf.fit(training_data, training_target) #Just use targets for labels mask = (test_true_target == samples.get_target_number(minority_label)) neg_mask = (test_true_target == samples.get_target_number(majority_label)) evaluation_sizes = {minority_label: np.sum(mask), majority_label: np.sum(neg_mask)} test_data = np.concatenate((test_data[mask],test_data[neg_mask])) test_true_target = np.concatenate((test_true_target[mask],test_true_target[neg_mask])) #Normalize test data test_data = normalize(test_data, theta, sigma) test_predicted_target = clf.predict(test_data) logger.debug("Predicted classes: %s" % unicode(np.unique(test_predicted_target))) logger.debug("%d, %d" % (np.sum(test_predicted_target == samples.get_target_number(minority_label)), np.sum(test_predicted_target == samples.get_target_number(majority_label)))) #Score test data, target logger.info("Calculate F1 Score...") precisions, recalls, f1_scores, _ = metrics.precision_recall_fscore_support(test_true_target, test_predicted_target, pos_label = None) for precision, recall, f1_score, label \ in izip(precisions, recalls, f1_scores, [minority_label, majority_label]): logger.info("%s: Recall = %.5f, Precision = %.5f, F1 Score = %.5f" % (label, recall, precision, f1_score)) return precisions, recalls, f1_scores, evaluation_sizes, training_sizes
def getInitialState(self, sensorState): self.initialState = self.discretizer.discretize(normalize(sensorState)) return self.nextBehavior(sensorState)
def __init__(self, name, provterr=None): self.Name = name self.NormalizedName = normalize(name) self.ProvTerr = provterr self.FirstSeenYear = 9999 self.LastSeenYear = 0
import tensorflow as tf import numpy as np import time import batch import normalization as norm import discrimination as dsc train = np.load('trainset.npy') test = np.load('testset.npy') trainSet = train[()] trainFeatures = trainSet['features'].astype('float32').reshape(-1, 28, 28, 1) trainFeatures = norm.normalize(trainFeatures) trainLabels = trainSet['labels'] #print(trainFeatures[0]) #print(norm.normalize(trainFeatures)[0]) testSet = test[()] testFeatures = testSet['features'].astype('float32').reshape(-1, 28, 28, 1) testFeatures = norm.normalize(testFeatures) testLabels = testSet['labels'] numOfFeatures = len(trainFeatures[0][0]) numOfLabels = len(trainLabels[0]) learning_rate = 0.002225 training_epochs = 100 batch_size = 32 train_keep_prob = 0.5 X = tf.placeholder(
def norm_word(w): if w == NULL: return w n_w = strip_accents(normalize(w)) return n_w
def separate_reps(data_file, exercise, key, column_labels, epsilon=0.15, gamma=20, delta=0.5, beta=1): front_cut_values = [0, 0, 0, 25, 0, 50, 0, 25, 50, 100, 0, 100] back_cut_values = [0, 0, 0, 0, 25, 0, 50, 25, 50, 0, 100, 100] epsilon_values = [ 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25 ] if exercise is 'pushup': delta = 0.2 beta = 2 for iteration in range(0, len(front_cut_values)): front_cut = front_cut_values[iteration] back_cut = back_cut_values[iteration] epsilon = epsilon_values[iteration] data = [] #=====[ Format each line of data ]===== # os.path.join('data/raw_data/squat_pushupData_10to20', with open(data_file) as f: for line in f: try: if 'Infinity' in line or 'NaN' in line: continue line = [ float(x.replace('\r\n', '')) for x in line.split(',') ] data.append(line) except Exception as e: continue # print e #=====[ Make dataframe and readjust indices to take into account front and back cuts ]===== df = pd.DataFrame(data, columns=column_labels) df = df[front_cut:df.shape[0] - back_cut] df = df.set_index([range(0, df.shape[0])]) y_coords = np.array(df.get(key)) mins = get_local_mins(y_coords, epsilon, gamma, delta, beta) reps = [] #=====[ Get points from DF between each max found -- constitutes a single rep ]===== for index, x in enumerate(mins): if (index == len(mins) - 1): continue rep = (df.loc[x:mins[index + 1] - 1]).copy(True) reps.append(rep.set_index([range(rep.shape[0])])) if len(reps) > 1: break if exercise is 'squat': return nz.normalize(df, reps) elif exercise is 'pushup': return pnz.normalize(df, reps)
import time import batch import normalization as norm import discrimination as dsc device_name = tf.test.gpu_device_name() #train = np.load('train_set.npy') #test = np.load('imageset2.npy') #valid = np.load('test_set.npy') imgSize = 62 #testSet = test[()] testSet = image_set testFeatures = testSet['features'].astype('float32').reshape(-1,imgSize,imgSize,1) testFeatures = norm.normalize(testFeatures) #testLabels = testSet['labels'] numOfFeatures = 62 numOfLabels = 20 learning_rate = 0.000225 training_epochs = 100 batch_size = 64 train_keep_prob = 0.5 tf.reset_default_graph() X = tf.placeholder(tf.float32, [None, numOfFeatures, numOfFeatures, 1])# X : placeholder for features Y = tf.placeholder(tf.float32,[None, numOfLabels])# Y : placeholder for labels keep_prob = tf.placeholder(tf.float32, None)# placeholder for dropout_rate
training_set = json.load(f) X = np.array(training_set["datapoint"]) # number of training examples m = np.size(X, axis=0) # number of classes in output k = training_set["num_classes"] # number of input features excluding bias node n = np.size(X, axis=1) # X is a matrix with m rows # each row containing the features including the bias node normalize(X) X = np.hstack((np.ones((m, 1)), X)) # Y is a matrix with m rows # each row with 0 or 1 for each output class Y = np.zeros((m, k)) for i in range(m): Y[i][training_set["label"][i]] = 1 # parameters for multiplying with first and second layers h = n + 2 # hidden layer size Th1 = np.random.rand(n + 1, h - 1) Th2 = np.random.rand(h, k) lamb = 0.01 # regularization parameter niter = 100000 # number of iterations for learning