def define_features(vocab_S, vocab_T, char_vocab_S, char_vocab_T, Embs_S, Embs_T, num_cells): maximum_length_S = max(len(w) for w in vocab_S) maximum_length_T = max(len(w) for w in vocab_T) maximum_length = max(maximum_length_S, maximum_length_T) char_features_encoder = wordencoding.BilingualRNNEncoding( char_vocab_S, char_vocab_T, num_cells) labeled_charlevel_features = char_features_encoder(maximum_length) unlabeled_charlevel_features = char_features_encoder(maximum_length, reuse=True) wordlevel_features_encoder = wordencoding.WordLevelEncoding( vocab_S, embeddings=Embs_S, scope='source') labeled_word_embs_S = wordlevel_features_encoder() unlabeled_word_embs_S = wordlevel_features_encoder() wordlevel_features_encoder = wordencoding.WordLevelEncoding( vocab_T, embeddings=Embs_T, scope='target') labeled_word_embs_T = wordlevel_features_encoder() unlabeled_word_embs_T = wordlevel_features_encoder() return [ features.Features([ labeled_charlevel_features, labeled_word_embs_S, labeled_word_embs_T ]), features.Features([ unlabeled_charlevel_features, unlabeled_word_embs_S, unlabeled_word_embs_T ]) ]
def load_pages(self): if not os.path.exists('/home/reborn/.config/openbox'): self.pages["language"] = language.Language(self.params) self.pages["check"] = check.Check(self.params) self.pages["location"] = location.Location(self.params) self.pages["mirrors"] = mirrors.Mirrors(self.params) self.pages["timezone"] = timezone.Timezone(self.params) if self.settings.get('desktop_ask'): self.pages["keymap"] = keymap.Keymap(self.params) self.pages["desktop"] = desktop.DesktopAsk(self.params) self.pages["features"] = features.Features(self.params) else: self.pages["keymap"] = keymap.Keymap(self.params, next_page='features') self.pages["features"] = features.Features(self.params, prev_page='keymap') self.pages["installation_ask"] = installation_ask.InstallationAsk( self.params) self.pages[ "installation_automatic"] = installation_automatic.InstallationAutomatic( self.params) if self.settings.get("enable_alongside"): self.pages[ "installation_alongside"] = installation_alongside.InstallationAlongside( self.params) else: self.pages["installation_alongside"] = None self.pages[ "installation_advanced"] = installation_advanced.InstallationAdvanced( self.params) self.pages["installation_zfs"] = installation_zfs.InstallationZFS( self.params) self.pages["summary"] = summary.Summary(self.params) self.pages["user_info"] = user_info.UserInfo(self.params) self.pages["slides"] = slides.Slides(self.params) diff = 2 if os.path.exists('/home/antergos/.config/openbox'): # In minimal (openbox) we don't have a welcome screen diff = 3 num_pages = len(self.pages) - diff if num_pages > 0: self.progressbar_step = 1.0 / num_pages
def test_should_compute_rider_primitives(self): # TODO: Need to add tests around the computed rider and local features fx = features.Features() fx.load_graph("resources/sample_graph_2.txt") fx.compute_primitive_features(rider_fx=False, rider_dir="resources/riders/") for vertex in fx.graph.nodes(): self.assertEquals(len(fx.graph.node[vertex]), 28) fx = features.Features() fx.load_graph("resources/sample_graph_2.txt") fx.compute_primitive_features(rider_fx=True, rider_dir="resources/riders/") for vertex in fx.graph.nodes(): self.assertEquals(len(fx.graph.node[vertex]), 52)
def find_cars(img, windows, clf, X_scaler): possible_cars = [] f = F.Features() # Iterate over all windows in the list positive_predictions = 0 positive_predictions_above_threshold = 0 for w in windows: # Extract the test window from original image sub_image = img[w[0][1]:w[1][1], w[0][0]:w[1][0]] test_img = cv2.resize(sub_image, (64, 64)) # Extract features for that window features = f.extract_features_image(test_img, color_space='YCrCb') # Scale extracted features to be fed to classifier test_features = X_scaler.transform(np.array(features).reshape(1, -1)) # Predict using your classifier prediction = clf.predict(test_features) # If positive (prediction == 1) then save the window if prediction == 1: # Confidence score confidence_score = clf.predict_proba(test_features) # print("confidence_score", confidence_score[0][1]) if confidence_score[0][1] > PROBABILITY_THRESHOLD: # probability threshold for positive detection possible_cars.append(w) positive_predictions_above_threshold += 1 positive_predictions += 1 # Return windows for positive detections print("positive_predictions:", positive_predictions) print("positive_predictions_above_threshold:", positive_predictions_above_threshold) return possible_cars
def initialize(self): self.stored['very easy'] = list() self.stored['easy'] = list() self.stored['medium'] = list() self.stored['hard'] = list() self.stored['fiendish'] = list() self.feature = features.Features()
def test_should_compare_cols_within_maxdist(self): fx = features.Features() column_1 = np.array([1.0, 2.0, 3.0]) column_2 = np.array([1.0, 2.0, 3.0]) max_dist = 0 actual = fx.fx_column_comparator(column_1, column_2, max_dist) self.assertTrue(actual) max_dist = 1 actual = fx.fx_column_comparator(column_1, column_2, max_dist) self.assertTrue(actual) column_1 = np.array([1.0, 2.0, 3.0]) column_2 = np.array([1.5, 2.5, 3.5]) max_dist = 0 actual = fx.fx_column_comparator(column_1, column_2, max_dist) self.assertFalse(actual) max_dist = 1 actual = fx.fx_column_comparator(column_1, column_2, max_dist) self.assertTrue(actual) max_dist = 0.5 actual = fx.fx_column_comparator(column_1, column_2, max_dist) self.assertTrue(actual) max_dist = 0.49 actual = fx.fx_column_comparator(column_1, column_2, max_dist) self.assertFalse(actual)
def initializeFeatureVector(): f = features.Features() f.fv.append(f.f0) f.fv.append(f.f1) f.fv.append(f.f2) f.fv.append(f.f3) f.fv.append(f.f4) f.fv.append(f.f5) f.fv.append(f.f6) f.fv.append(f.f7) f.fv.append(f.f8) f.fv.append(f.f9) f.fv.append(f.f10) f.fv.append(f.f11) f.fv.append(f.f12) f.fv.append(f.f13) f.fv.append(f.f14) f.fv.append(f.f15) f.fv.append(f.f16) f.fv.append(f.f17) f.fv.append(f.f18) f.fv.append(f.f19) f.fv.append(f.f20) f.fv.append(f.f21) f.fv.append(f.f22) f.fv.append(f.f23) f.fv.append(f.f24) f.fv.append(f.f25) f.fv.append(f.f26) f.fv.append(f.f27) f.fv.append(f.f28) f.fv.append(f.f29) f.fv.append(f.f30) return f
def test_should_load_graph(self): fx = features.Features() fx.load_graph("resources/sample_graph.txt") self.assertEquals(fx.graph.number_of_nodes(), 4) self.assertEquals(fx.graph.number_of_edges(), 10) self.assertEquals(fx.graph[1][2]['weight'], 1) self.assertEquals(fx.graph[3][4]['weight'], 2)
def __get_feature_names(self): if self.args.deep: feature_set = features_deep.Features() else: feature_set = features.Features() feature_names = feature_set.get_feature_names() return feature_names
def insert_audio_files(fileList, dbName, chroma=True, mfcc=False, cqft=False, progress=None): """ :: Simple insert features into an audioDB database named by dbBame. Features are either chroma [default], mfcc, or cqft. Feature parameters are default. """ db = adb.get(dbName, "w") if not db: print "Could not open database: %s" %dbName return False del db # commit the changes by closing the header db = adb.get(dbName) # re-open for writing data # FIXME: need to test if KEY (%i) already exists in db # Support for removing keys via include/exclude keys for a, i in enumerate(fileList): if progress: progress((a+0.5)/float(len(fileList)),i) # frac, fname print "Processing file: %s" %i F = features.Features(i) if chroma: F.feature_params['feature']='chroma' elif mfcc: F.feature_params['feature']='mfcc' elif cqft: F.feature_params['feature']='cqft' else: raise error.BregmanError("One of chroma, mfcc, or cqft must be specified for features") F.extract() # raw features and power in Bels if progress: progress((a+1.0)/float(len(fileList)),i) # frac, fname db.insert(featData=F.CHROMA.T, powerData=adb.feature_scale(F.POWER, bels=True), key=i) # db.insert(featData=F.CHROMA.T, powerData=F.feature_scale(F.POWER, bels=True), key=i) return db
def __init__(self, state, game_engine): # Set the state and game engine. self.state = state self.game = game_engine # Create a features object that will be used to compute the current features # of the state that Pacman cares about. self.features = features.Features(state, game_engine) self.feature_dict = None self.prev_features = {} # Load the training data from file. self.training_data = {} self.load_training_data() # Initialize other state that is used by the learning algorithm. self.cur_qvals = {} self.decision_count = 0 self.prev_action = None self.prev_qval = None self.call_counter = 0 # Initialize attributes for tracking results. self.results_mode = self.game.manager.config_options['results_mode'] self.results_games = self.game.manager.config_options['results_games'] self.games_count = 0 self.average_score = 0.0 self.average_level = 0.0
def output_file_location(filename,directory=None): """Return path to save file, including 'etc/savedir' if given If 'directory'==None, and if globalfeatures (see nsim.features.Features) has section 'etc' with entry 'savedir', then this will be prepended to the filename. If 'directory' is given (string), then this will be prepended to the 'filename' Warning: This has not been used for some time and needs further testing. In particular, we need to double check whether this function is called whereever files are created. 12/02/2007 08:25 """ if directory==None: globalfeatures=features.Features() if globalfeatures.has_sectionkey('etc','savedir'): directory=globalfeatures.get('etc','savedir') else: directory='' path = os.path.join(directory,filename) log.debug('composing path to save data to be %s' % path) return path
def prepare_training_data(): """ Create a training and test sets. :param from_pickle: :return: sklearn.model_selection.train_test_split() """ sound_root = '../data/training_data/' sound_folders = [ 'crying', 'livestream_crying', 'silence', 'noise', 'baby_laugh', 'aria_crying', 'aria_other' ] label_list = [] raw_list = [] vec_list = [] mat_list = [] for folder in sound_folders: logging.info(f'Processing files in {sound_root + folder}...') label = int('crying' in folder) # data labels are determined by # folder name. path = os.path.join(sound_root, folder) for file in os.listdir(path): logging.debug(f'Processing {file} in folder {folder}...') if file.endswith(('.wav', '.ogg')): file_path = os.path.join(path, file) f = features.Features(file_path).featurize() if f.raw is None: print(f'Skipping {file} because it is likely too short') continue label_list.append(np.array([label])) raw_list.append(f.raw) vec_list.append(f.vec) mat_list.append(f.mat) # TODO: Re-Write interface for mat_list elements to fit with 1D convnet. training_data_ = dict() training_data_['label'] = np.concatenate(label_list) training_data_['raw'] = np.concatenate(raw_list) training_data_['vec'] = np.concatenate(vec_list) training_data_['mat'] = np.concatenate(mat_list) lib.dump_to_pickle(training_data_, PICKLE_PATH) return training_data_
def test_should_return_egonet(self): fx = features.Features() fx.load_graph("resources/sample_graph.txt") self.assertEquals(fx.get_egonet_members(1), [1, 2, 3, 4]) self.assertEquals(fx.get_egonet_members(2), [1, 2, 3]) self.assertEquals(fx.get_egonet_members(2, level=1), [1, 2, 3, 4]) self.assertEquals(fx.get_egonet_members(4), [1, 3, 4]) self.assertEquals(fx.get_egonet_members(4, level=1), [1, 2, 3, 4])
def test_should_init_refex_log_binned_buckets(self): fx = features.Features() fx.load_graph("resources/sample_graph.txt") rfx = refex.Refex(fx.get_number_of_vertices()) expected_log_binned_buckets = {0: 3, 1: 2, 2: 1} rfx.init_log_binned_fx_buckets() self.assertEquals(rfx.refex_log_binned_buckets, expected_log_binned_buckets)
def test_should_compute_primitives(self): fx = features.Features() fx.load_graph("resources/sample_graph.txt") fx.compute_primitive_features() # TODO: Check the log binned Fx values self.assertEquals(fx.graph.node[1]['wn0'], 1) self.assertEquals(fx.graph.node[1]['wn1'], 0) self.assertEquals(fx.graph.node[2]['wn0'], 0) self.assertEquals(fx.graph.node[2]['wn1'], 0)
def __init__(self, utils, dirNum): process.ProcessFile() freq = {} self.dirNum = dirNum self.utils = utils self.fList = [] self.feat = features.Features(utils) self.functionList = []
def infer(wav): f = features.Features(wav) f.featurize() if f.vec is None: print(f'\nFile {wav} is not featurized. File likely too short.') return None else: return clf.predict(scaler.transform(f.vec))
def main(): print "Loading data..." X, Y = load_data("../training_set.py") print "len(X):", len(X), "len(X[0]):", len(X[0]), "len(Y):", len(Y) ftrs = features.Features(X, Y) ftrs.build_aprx() thetas = ftrs.get_thetas() for theta in thetas: print thetas write_thetas(thetas)
def get_screen(screen_name, params): screen = None if screen_name == "DesktopAsk": import desktop screen = desktop.DesktopAsk(params) elif screen_name == "Check": from modules.pages import check screen = check.Check(params) elif screen_name == "Timezone": import timezone screen = timezone.Timezone(params) params['settings'].set('timezone_start', True) elif screen_name == "Wireless": import wireless screen = wireless.Wireless(params) elif screen_name == "Welcome": import welcome screen = welcome.Welcome(params) elif screen_name == "UserInfo": import user_info screen = user_info.UserInfo(params) elif screen_name == "Location": import location screen = location.Location(params) elif screen_name == "Language": import language screen = language.Language(params) elif screen_name == "Keymap": import keymap screen = keymap.Keymap(params) elif screen_name == "Features": import features screen = features.Features(params) elif screen_name == "Summary": import summary screen = summary.Summary(params) elif screen_name == "Slides": import slides screen = slides.Slides(params) elif screen_name == "InstallationAsk": import ask screen = ask.InstallationAsk(params) elif screen_name == "InstallationAdvanced": import advanced screen = advanced.InstallationAdvanced(params) elif screen_name == "InstallationAlongside": import alongside screen = alongside.InstallationAlongside(params) elif screen_name == "InstallationAutomatic": import automatic screen = automatic.InstallationAutomatic(params) elif screen_name == "zfs": import zfs screen = zfs.InstallationZFS(params) return screen
def test_similarity_user_all(self, monkeypatch): monkeypatch.setattr(features.Features, '_Features__sim_size', lambda self, i, j: 1) monkeypatch.setattr(features.Features, '_Features__sim_texture', lambda self, i, j: 1) monkeypatch.setattr(features.Features, '_Features__sim_color', lambda self, i, j: 1) monkeypatch.setattr(features.Features, '_Features__sim_fill', lambda self, i, j: 1) w = features.SimilarityMask(1, 1, 1, 1) f = features.Features(self.dummy_image, self.dummy_label, 1, w) assert f.similarity(0, 1) == 4
def main(): below50k = features.Features("below50k") above50k = features.Features("above50k") with open("sample.txt", 'rb') as f: for line in f: feature = line.split(',') outcome = feature[14].rstrip() if (outcome == ' <=50K'): below50k.takeFeatures(feature) if (outcome == ' >50K'): above50k.takeFeatures(feature) below50k.calculateNumericAverages() below50k.calculateDiscreteAverages() above50k.calculateNumericAverages() above50k.calculateDiscreteAverages() comparer = compare.CompareFeatures(below50k, above50k) comparer.Compare() comparer.Print()
def define_features( vocab_S, vocab_T, char_vocab_S, char_vocab_T, Embs_S, Embs_T, num_cells): maximum_length_S = max(len(w) for w in vocab_S) maximum_length_T = max(len(w) for w in vocab_T) maximum_length = max(maximum_length_S, maximum_length_T) char_features_encoder = wordencoding.BilingualRNNEncoding(char_vocab_S, char_vocab_T, num_cells) charlevel_features = char_features_encoder(maximum_length) wordlevel_features_encoder = wordencoding.WordLevelEncoding(vocab_S, embeddings=Embs_S, scope='source') word_embs_S = wordlevel_features_encoder() wordlevel_features_encoder = wordencoding.WordLevelEncoding(vocab_T, embeddings=Embs_T, scope='target') word_embs_T = wordlevel_features_encoder() return features.Features([charlevel_features, word_embs_S, word_embs_T])
def test_should_compute_recursive_features(self): fx = features.Features() fx.load_graph("resources/sample_graph_2.txt") fx.compute_primitive_features(rider_fx=False, rider_dir="resources/riders/") for vertex in fx.graph.nodes(): self.assertEquals(len(fx.graph.node[vertex]), 28) prev_fx_matrix = fx.create_initial_feature_matrix() new_fx_matrix = fx.compute_recursive_features(prev_fx_matrix, 1, 0.0) print len(list(new_fx_matrix.dtype.names)) print len(list(prev_fx_matrix.dtype.names)) abc = 1
def loop(msgs): 'return (feature_vectors, unused messages)' vp = machine_learning.make_verbose_print(False) set_trace() feature_creators = ( ('trace_print', features.trace_print), ) result_feature_vectors = [] result_unused = msgs pdb.set_trace() for i in range(0, n_feature_vectors, 1): msgs_to_be_used = msgs[i:] all_features = features.Features() for feature_creator in feature_creators: for cusip in cusips: try: cusip_features, unused = feature_creator[1](msgs_to_be_used, cusip) except exception.NoFeatures as e: raise exception.Features('cusip %s, %s' % (cusip, e.msg)) if len(unused) < len(result_unused): result_unused = copy.copy(unused) # update feature names to incorporate the cusip for k, v in cusip_features.items(): key = ( 'id_%s_%s' (cusip, k[3:]) if k.startwith('id_') else '%s_%s_%s' (feature_creator[0], cusip, k) ) all_features.add(key, v) continue # bypass old code, for now # try: # fv, unused = feature_vector(msgs_to_be_used, cusips, required_reclassified_trade_type) # vp('loop %d: fv trigger identifier: %s len(msgs): %d, len(unused): %d' % ( # i, # fv['id_trigger_identifier'], # len(msgs_to_be_used), # len(unused), # )) # if False and i % 10 == 1: # pdb.set_trace() # result_feature_vectors.append(fv) # if len(unused) < len(result_unused): # result_unused = copy.copy(unused) # except exception.NoPriorEventWithCusipAndRtt as e: # vp('stub: handle exception %s' % e) # break # except exception.NoMessageWithCusip as e: # vp('stub: handle exception %s' % e) # break set_trace() return list(reversed(result_feature_vectors)), result_unused
def test_should_vertical_bin_correctly(self): fx = features.Features() fx.no_of_vertices = 6 fx.init_log_binned_fx_buckets() actual = fx.vertical_bin([(0, 4), (1, 3), (2, 2), (3, 2), (4, 4), (5, 1)]) expected = { 5: 0, 2: 0, 3: 0, 1: 1, 0: 1, 4: 1 } # fx_value of 1 has 2 candidates, self.assertEquals(actual, expected)
def maxent_label(input_file,output_file,encode='utf-8'): input=codecs.open(input_file,"r",encode) output=codecs.open(output_file,"w",encode) count=0 for line in input.readlines(): count+=1 if count%3!=0:continue line = chtl.strq2b(line) text,tags = label(line) textfea=feats.Features(text) for i in range(len(text)): curfea=textfea.getFeats(i) output.write(' '.join(curfea)) output.write(' '+tags[i]) output.write('\n') input.close() output.close()
def test_should_digitize_correctly(self): fx = features.Features() log_bins = np.logspace(np.log10(2), np.log10(20), num=15, endpoint=False) self.assertEquals(log_bins.size, 15) self.assertEquals( fx.digitize(2.0, log_bins=log_bins, file_name="sample"), "sample_0") self.assertEquals( fx.digitize(5.0, log_bins=log_bins, file_name="sample"), "sample_5") self.assertEquals( fx.digitize(15.0, log_bins=log_bins, file_name="sample"), "sample_13") self.assertEquals( fx.digitize(20.0, log_bins=log_bins, file_name="sample"), "sample_14")
def __init__(self, path_train=PATH_TN, learning_rate=0.01, n_estimators=1500, max_depth=13, min_child_weight=1, gamma=0.1, subsample=0.7, colsample_bytree=0.6, objective='binary:logistic', seed=2018, label='', tfbdc=False, debug=True, bdc=True, qz='DT', opt='bdc'): # qz in ('DT', 'sum', 'icf') self.xgboost = XGBClassifier(learning_rate=learning_rate, n_estimators=n_estimators, max_depth=max_depth, min_child_weight=min_child_weight, gamma=gamma, subsample=subsample, colsample_bytree=colsample_bytree, objective=objective, seed=seed, n_jobs=60) self.gbdt = GradientBoostingClassifier() self.rf = RandomForestClassifier() self.label = label self.model = [lgb, self.xgboost, self.gbdt, self.rf, lgb] # self.model = [lgb,lgb] self.SBBTree = SBBTree(model=self.model, bagging_num=5) self.BDC = bdc BDC = features.Features(tfbdc=tfbdc, debug=debug, BDC=self.BDC, qz=qz) Vocab_bdc = BDC.load_bdc(PATH_TN, label=self.label) self._X, self._Y, label_to_id, self.uuid = BDC.load_X_Y( Vocab_bdc, path=PATH_TN, label=self.label, froms='', opt=opt) self._X, self._Y, self.uuid = np.array(self._X), np.array( self._Y), np.array(self.uuid) self.kf = KFold(n_splits=CV, random_state=2018) self.id_to_lable = {i: _ for _, i in label_to_id.items()}
def test_should_init_vertical_bins(self): fx = features.Features() fx.no_of_vertices = 4 fx.init_log_binned_fx_buckets() self.assertEquals(len(fx.refex_log_binned_buckets), fx.no_of_vertices) self.assertEquals(fx.refex_log_binned_buckets, [0, 0, 1, 2]) fx.no_of_vertices = 5 fx.refex_log_binned_buckets = [] fx.init_log_binned_fx_buckets() self.assertEquals(len(fx.refex_log_binned_buckets), fx.no_of_vertices) self.assertEquals(fx.refex_log_binned_buckets, [0, 0, 1, 1, 2]) fx.no_of_vertices = 8 fx.refex_log_binned_buckets = [] fx.init_log_binned_fx_buckets() self.assertEquals(len(fx.refex_log_binned_buckets), fx.no_of_vertices) self.assertEquals(fx.refex_log_binned_buckets, [0, 0, 0, 0, 1, 1, 2, 3])