Python Features示例，features.Features Python示例

示例#1

0

显示文件

文件： semisup_model.py 项目： hangyav/biadapt

def define_features(vocab_S, vocab_T, char_vocab_S, char_vocab_T, Embs_S,
                    Embs_T, num_cells):
    maximum_length_S = max(len(w) for w in vocab_S)
    maximum_length_T = max(len(w) for w in vocab_T)
    maximum_length = max(maximum_length_S, maximum_length_T)
    char_features_encoder = wordencoding.BilingualRNNEncoding(
        char_vocab_S, char_vocab_T, num_cells)
    labeled_charlevel_features = char_features_encoder(maximum_length)
    unlabeled_charlevel_features = char_features_encoder(maximum_length,
                                                         reuse=True)

    wordlevel_features_encoder = wordencoding.WordLevelEncoding(
        vocab_S, embeddings=Embs_S, scope='source')
    labeled_word_embs_S = wordlevel_features_encoder()
    unlabeled_word_embs_S = wordlevel_features_encoder()
    wordlevel_features_encoder = wordencoding.WordLevelEncoding(
        vocab_T, embeddings=Embs_T, scope='target')
    labeled_word_embs_T = wordlevel_features_encoder()
    unlabeled_word_embs_T = wordlevel_features_encoder()
    return [
        features.Features([
            labeled_charlevel_features, labeled_word_embs_S,
            labeled_word_embs_T
        ]),
        features.Features([
            unlabeled_charlevel_features, unlabeled_word_embs_S,
            unlabeled_word_embs_T
        ])
    ]

示例#2

0

显示文件

    def load_pages(self):
        if not os.path.exists('/home/reborn/.config/openbox'):
            self.pages["language"] = language.Language(self.params)

        self.pages["check"] = check.Check(self.params)
        self.pages["location"] = location.Location(self.params)

        self.pages["mirrors"] = mirrors.Mirrors(self.params)

        self.pages["timezone"] = timezone.Timezone(self.params)

        if self.settings.get('desktop_ask'):
            self.pages["keymap"] = keymap.Keymap(self.params)
            self.pages["desktop"] = desktop.DesktopAsk(self.params)
            self.pages["features"] = features.Features(self.params)
        else:
            self.pages["keymap"] = keymap.Keymap(self.params,
                                                 next_page='features')
            self.pages["features"] = features.Features(self.params,
                                                       prev_page='keymap')

        self.pages["installation_ask"] = installation_ask.InstallationAsk(
            self.params)
        self.pages[
            "installation_automatic"] = installation_automatic.InstallationAutomatic(
                self.params)

        if self.settings.get("enable_alongside"):
            self.pages[
                "installation_alongside"] = installation_alongside.InstallationAlongside(
                    self.params)
        else:
            self.pages["installation_alongside"] = None

        self.pages[
            "installation_advanced"] = installation_advanced.InstallationAdvanced(
                self.params)
        self.pages["installation_zfs"] = installation_zfs.InstallationZFS(
            self.params)
        self.pages["summary"] = summary.Summary(self.params)
        self.pages["user_info"] = user_info.UserInfo(self.params)
        self.pages["slides"] = slides.Slides(self.params)

        diff = 2
        if os.path.exists('/home/antergos/.config/openbox'):
            # In minimal (openbox) we don't have a welcome screen
            diff = 3

        num_pages = len(self.pages) - diff

        if num_pages > 0:
            self.progressbar_step = 1.0 / num_pages

示例#3

0

显示文件

    def test_should_compute_rider_primitives(self):
        # TODO: Need to add tests around the computed rider and local features
        fx = features.Features()
        fx.load_graph("resources/sample_graph_2.txt")
        fx.compute_primitive_features(rider_fx=False,
                                      rider_dir="resources/riders/")
        for vertex in fx.graph.nodes():
            self.assertEquals(len(fx.graph.node[vertex]), 28)

        fx = features.Features()
        fx.load_graph("resources/sample_graph_2.txt")
        fx.compute_primitive_features(rider_fx=True,
                                      rider_dir="resources/riders/")
        for vertex in fx.graph.nodes():
            self.assertEquals(len(fx.graph.node[vertex]), 52)

示例#4

0

显示文件

文件： pipeline.py 项目： mchablani/CarND-Vehicle-Detection

def find_cars(img, windows, clf, X_scaler):
    possible_cars = []
    f = F.Features()
    # Iterate over all windows in the list
    positive_predictions = 0
    positive_predictions_above_threshold = 0
    for w in windows:
        # Extract the test window from original image
        sub_image = img[w[0][1]:w[1][1], w[0][0]:w[1][0]]
        test_img = cv2.resize(sub_image, (64, 64))
        # Extract features for that window 
        features = f.extract_features_image(test_img, color_space='YCrCb')
        # Scale extracted features to be fed to classifier
        test_features = X_scaler.transform(np.array(features).reshape(1, -1))
        # Predict using your classifier
        prediction = clf.predict(test_features)
        # If positive (prediction == 1) then save the window
        if prediction == 1:
            # Confidence score
            confidence_score = clf.predict_proba(test_features)
#             print("confidence_score", confidence_score[0][1])
            if confidence_score[0][1] > PROBABILITY_THRESHOLD:   # probability threshold for positive detection
                possible_cars.append(w)
                positive_predictions_above_threshold += 1
            positive_predictions += 1
    # Return windows for positive detections
    print("positive_predictions:", positive_predictions)
    print("positive_predictions_above_threshold:", positive_predictions_above_threshold)
    return possible_cars

示例#5

0

显示文件

文件： generate.py 项目： ividya/cs221

 def initialize(self):
   self.stored['very easy'] = list()
   self.stored['easy'] = list()
   self.stored['medium'] = list()
   self.stored['hard'] = list()
   self.stored['fiendish'] = list()
   self.feature = features.Features()

示例#6

0

显示文件

    def test_should_compare_cols_within_maxdist(self):
        fx = features.Features()

        column_1 = np.array([1.0, 2.0, 3.0])
        column_2 = np.array([1.0, 2.0, 3.0])

        max_dist = 0
        actual = fx.fx_column_comparator(column_1, column_2, max_dist)
        self.assertTrue(actual)

        max_dist = 1
        actual = fx.fx_column_comparator(column_1, column_2, max_dist)
        self.assertTrue(actual)

        column_1 = np.array([1.0, 2.0, 3.0])
        column_2 = np.array([1.5, 2.5, 3.5])

        max_dist = 0
        actual = fx.fx_column_comparator(column_1, column_2, max_dist)
        self.assertFalse(actual)

        max_dist = 1
        actual = fx.fx_column_comparator(column_1, column_2, max_dist)
        self.assertTrue(actual)

        max_dist = 0.5
        actual = fx.fx_column_comparator(column_1, column_2, max_dist)
        self.assertTrue(actual)

        max_dist = 0.49
        actual = fx.fx_column_comparator(column_1, column_2, max_dist)
        self.assertFalse(actual)

示例#7

0

显示文件

def initializeFeatureVector():
    f = features.Features()
    f.fv.append(f.f0)
    f.fv.append(f.f1)
    f.fv.append(f.f2)
    f.fv.append(f.f3)
    f.fv.append(f.f4)
    f.fv.append(f.f5)
    f.fv.append(f.f6)
    f.fv.append(f.f7)
    f.fv.append(f.f8)
    f.fv.append(f.f9)
    f.fv.append(f.f10)
    f.fv.append(f.f11)
    f.fv.append(f.f12)
    f.fv.append(f.f13)
    f.fv.append(f.f14)
    f.fv.append(f.f15)
    f.fv.append(f.f16)
    f.fv.append(f.f17)
    f.fv.append(f.f18)
    f.fv.append(f.f19)
    f.fv.append(f.f20)
    f.fv.append(f.f21)
    f.fv.append(f.f22)
    f.fv.append(f.f23)
    f.fv.append(f.f24)
    f.fv.append(f.f25)
    f.fv.append(f.f26)
    f.fv.append(f.f27)
    f.fv.append(f.f28)
    f.fv.append(f.f29)
    f.fv.append(f.f30)
    return f

示例#8

0

显示文件

 def test_should_load_graph(self):
     fx = features.Features()
     fx.load_graph("resources/sample_graph.txt")
     self.assertEquals(fx.graph.number_of_nodes(), 4)
     self.assertEquals(fx.graph.number_of_edges(), 10)
     self.assertEquals(fx.graph[1][2]['weight'], 1)
     self.assertEquals(fx.graph[3][4]['weight'], 2)

示例#9

0

显示文件

文件： museq_eval_utils.py 项目： flywind2/mutationseq

 def __get_feature_names(self):
     if self.args.deep:
         feature_set = features_deep.Features()
     else:
         feature_set = features.Features()
     feature_names = feature_set.get_feature_names()
     return feature_names

示例#10

0

显示文件

    def insert_audio_files(fileList, dbName, chroma=True, mfcc=False, cqft=False, progress=None):
        """
        ::

            Simple insert features into an audioDB database named by dbBame.
            Features are either chroma [default], mfcc, or cqft. 
            Feature parameters are default.
        """
        db = adb.get(dbName, "w")
        if not db:
            print "Could not open database: %s" %dbName
            return False    
        del db # commit the changes by closing the header
        db = adb.get(dbName) # re-open for writing data
        # FIXME: need to test if KEY (%i) already exists in db
        # Support for removing keys via include/exclude keys
        for a, i in enumerate(fileList):
            if progress:
                progress((a+0.5)/float(len(fileList)),i) # frac, fname
            print "Processing file: %s" %i
            F = features.Features(i)            
            if chroma: F.feature_params['feature']='chroma'
            elif mfcc: F.feature_params['feature']='mfcc'
            elif cqft: F.feature_params['feature']='cqft'
            else:
                raise error.BregmanError("One of chroma, mfcc, or cqft must be specified for features")
            F.extract()
            # raw features and power in Bels
            if progress:
                progress((a+1.0)/float(len(fileList)),i) # frac, fname
            db.insert(featData=F.CHROMA.T, powerData=adb.feature_scale(F.POWER, bels=True), key=i) 
            # db.insert(featData=F.CHROMA.T, powerData=F.feature_scale(F.POWER, bels=True), key=i)
        return db

示例#11

0

显示文件

    def __init__(self, state, game_engine):
        # Set the state and game engine.
        self.state = state
        self.game = game_engine

        # Create a features object that will be used to compute the current features
        # of the state that Pacman cares about.
        self.features = features.Features(state, game_engine)
        self.feature_dict = None
        self.prev_features = {}

        # Load the training data from file.
        self.training_data = {}
        self.load_training_data()

        # Initialize other state that is used by the learning algorithm.
        self.cur_qvals = {}
        self.decision_count = 0
        self.prev_action = None
        self.prev_qval = None
        self.call_counter = 0

        # Initialize attributes for tracking results.
        self.results_mode = self.game.manager.config_options['results_mode']
        self.results_games = self.game.manager.config_options['results_games']
        self.games_count = 0
        self.average_score = 0.0
        self.average_level = 0.0

示例#12

0

显示文件

文件： snippets.py 项目： anyint/nmag-src

def output_file_location(filename,directory=None):
    """Return path to save file, including 'etc/savedir' if given

    If 'directory'==None, and if globalfeatures (see
    nsim.features.Features) has section 'etc' with entry 'savedir',
    then this will be prepended to the filename.

    If 'directory' is given (string), then this will be prepended
    to the 'filename'

    Warning: This has not been used for some time and needs further
    testing. In particular, we need to double check whether this
    function is called whereever files are created.

    12/02/2007 08:25

    """

    if directory==None:

        globalfeatures=features.Features()
        if globalfeatures.has_sectionkey('etc','savedir'):
            directory=globalfeatures.get('etc','savedir')
        else:
            directory=''

    path = os.path.join(directory,filename)
    log.debug('composing path to save data to be %s' % path)
    return path

示例#13

0

显示文件

def prepare_training_data():
    """
    Create a training and test sets.

    :param from_pickle:
    :return: sklearn.model_selection.train_test_split()
    """

    sound_root = '../data/training_data/'

    sound_folders = [
        'crying', 'livestream_crying', 'silence', 'noise', 'baby_laugh',
        'aria_crying', 'aria_other'
    ]

    label_list = []
    raw_list = []
    vec_list = []
    mat_list = []

    for folder in sound_folders:

        logging.info(f'Processing files in {sound_root + folder}...')

        label = int('crying' in folder)  # data labels are determined by
        # folder name.

        path = os.path.join(sound_root, folder)

        for file in os.listdir(path):

            logging.debug(f'Processing {file} in folder {folder}...')

            if file.endswith(('.wav', '.ogg')):

                file_path = os.path.join(path, file)

                f = features.Features(file_path).featurize()

                if f.raw is None:
                    print(f'Skipping {file} because it is likely too short')
                    continue

                label_list.append(np.array([label]))
                raw_list.append(f.raw)
                vec_list.append(f.vec)
                mat_list.append(f.mat)

    # TODO: Re-Write interface for mat_list elements to fit with 1D convnet.

    training_data_ = dict()
    training_data_['label'] = np.concatenate(label_list)
    training_data_['raw'] = np.concatenate(raw_list)
    training_data_['vec'] = np.concatenate(vec_list)
    training_data_['mat'] = np.concatenate(mat_list)

    lib.dump_to_pickle(training_data_, PICKLE_PATH)

    return training_data_

示例#14

0

显示文件

 def test_should_return_egonet(self):
     fx = features.Features()
     fx.load_graph("resources/sample_graph.txt")
     self.assertEquals(fx.get_egonet_members(1), [1, 2, 3, 4])
     self.assertEquals(fx.get_egonet_members(2), [1, 2, 3])
     self.assertEquals(fx.get_egonet_members(2, level=1), [1, 2, 3, 4])
     self.assertEquals(fx.get_egonet_members(4), [1, 3, 4])
     self.assertEquals(fx.get_egonet_members(4, level=1), [1, 2, 3, 4])

示例#15

0

显示文件

文件： refex_spec.py 项目： mengyuehang/refex

 def test_should_init_refex_log_binned_buckets(self):
     fx = features.Features()
     fx.load_graph("resources/sample_graph.txt")
     rfx = refex.Refex(fx.get_number_of_vertices())
     expected_log_binned_buckets = {0: 3, 1: 2, 2: 1}
     rfx.init_log_binned_fx_buckets()
     self.assertEquals(rfx.refex_log_binned_buckets,
                       expected_log_binned_buckets)

示例#16

0

显示文件

 def test_should_compute_primitives(self):
     fx = features.Features()
     fx.load_graph("resources/sample_graph.txt")
     fx.compute_primitive_features()
     # TODO: Check the log binned Fx values
     self.assertEquals(fx.graph.node[1]['wn0'], 1)
     self.assertEquals(fx.graph.node[1]['wn1'], 0)
     self.assertEquals(fx.graph.node[2]['wn0'], 0)
     self.assertEquals(fx.graph.node[2]['wn1'], 0)

示例#17

0

显示文件

文件： process2.py 项目： roylanceMichael/compling_570_shallowprocessing_washington_joint

    def __init__(self, utils, dirNum):
        process.ProcessFile()
        freq = {}
        self.dirNum = dirNum
        self.utils = utils
        self.fList = []

        self.feat = features.Features(utils)
        self.functionList = []

示例#18

0

显示文件

    def infer(wav):

        f = features.Features(wav)
        f.featurize()

        if f.vec is None:
            print(f'\nFile {wav} is not featurized. File likely too short.')
            return None
        else:
            return clf.predict(scaler.transform(f.vec))

示例#19

0

显示文件

def main():
    print "Loading data..."
    X, Y = load_data("../training_set.py")
    print "len(X):", len(X), "len(X[0]):", len(X[0]), "len(Y):", len(Y)
    ftrs = features.Features(X, Y)
    ftrs.build_aprx()
    thetas = ftrs.get_thetas()
    for theta in thetas:
        print thetas
    write_thetas(thetas)

示例#20

0

显示文件

def get_screen(screen_name, params):
    screen = None
    if screen_name == "DesktopAsk":
        import desktop
        screen = desktop.DesktopAsk(params)
    elif screen_name == "Check":
        from modules.pages import check
        screen = check.Check(params)
    elif screen_name == "Timezone":
        import timezone
        screen = timezone.Timezone(params)
        params['settings'].set('timezone_start', True)
    elif screen_name == "Wireless":
        import wireless
        screen = wireless.Wireless(params)
    elif screen_name == "Welcome":
        import welcome
        screen = welcome.Welcome(params)
    elif screen_name == "UserInfo":
        import user_info
        screen = user_info.UserInfo(params)
    elif screen_name == "Location":
        import location
        screen = location.Location(params)
    elif screen_name == "Language":
        import language
        screen = language.Language(params)
    elif screen_name == "Keymap":
        import keymap
        screen = keymap.Keymap(params)
    elif screen_name == "Features":
        import features
        screen = features.Features(params)
    elif screen_name == "Summary":
        import summary
        screen = summary.Summary(params)
    elif screen_name == "Slides":
        import slides
        screen = slides.Slides(params)
    elif screen_name == "InstallationAsk":
        import ask
        screen = ask.InstallationAsk(params)
    elif screen_name == "InstallationAdvanced":
        import advanced
        screen = advanced.InstallationAdvanced(params)
    elif screen_name == "InstallationAlongside":
        import alongside
        screen = alongside.InstallationAlongside(params)
    elif screen_name == "InstallationAutomatic":
        import automatic
        screen = automatic.InstallationAutomatic(params)
    elif screen_name == "zfs":
        import zfs
        screen = zfs.InstallationZFS(params)
    return screen

示例#21

0

显示文件

文件： test_features.py 项目： xingyegithub/selective_search_py

 def test_similarity_user_all(self, monkeypatch):
     monkeypatch.setattr(features.Features, '_Features__sim_size',
                         lambda self, i, j: 1)
     monkeypatch.setattr(features.Features, '_Features__sim_texture',
                         lambda self, i, j: 1)
     monkeypatch.setattr(features.Features, '_Features__sim_color',
                         lambda self, i, j: 1)
     monkeypatch.setattr(features.Features, '_Features__sim_fill',
                         lambda self, i, j: 1)
     w = features.SimilarityMask(1, 1, 1, 1)
     f = features.Features(self.dummy_image, self.dummy_label, 1, w)
     assert f.similarity(0, 1) == 4

示例#22

0

显示文件

def main():
    below50k = features.Features("below50k")
    above50k = features.Features("above50k")

    with open("sample.txt", 'rb') as f:
        for line in f:
            feature = line.split(',')
            outcome = feature[14].rstrip()
            if (outcome == ' <=50K'):
                below50k.takeFeatures(feature)
            if (outcome == ' >50K'):
                above50k.takeFeatures(feature)

    below50k.calculateNumericAverages()
    below50k.calculateDiscreteAverages()
    above50k.calculateNumericAverages()
    above50k.calculateDiscreteAverages()

    comparer = compare.CompareFeatures(below50k, above50k)
    comparer.Compare()
    comparer.Print()

示例#23

0

显示文件

def define_features(
    vocab_S, vocab_T, char_vocab_S, char_vocab_T, Embs_S, Embs_T, num_cells):
  maximum_length_S = max(len(w) for w in vocab_S)
  maximum_length_T = max(len(w) for w in vocab_T)
  maximum_length = max(maximum_length_S, maximum_length_T)
  char_features_encoder = wordencoding.BilingualRNNEncoding(char_vocab_S, char_vocab_T, num_cells)
  charlevel_features = char_features_encoder(maximum_length)

  wordlevel_features_encoder = wordencoding.WordLevelEncoding(vocab_S, embeddings=Embs_S, scope='source')
  word_embs_S = wordlevel_features_encoder()
  wordlevel_features_encoder = wordencoding.WordLevelEncoding(vocab_T, embeddings=Embs_T, scope='target')
  word_embs_T = wordlevel_features_encoder()
  return features.Features([charlevel_features, word_embs_S, word_embs_T])

示例#24

0

显示文件

    def test_should_compute_recursive_features(self):
        fx = features.Features()

        fx.load_graph("resources/sample_graph_2.txt")
        fx.compute_primitive_features(rider_fx=False,
                                      rider_dir="resources/riders/")
        for vertex in fx.graph.nodes():
            self.assertEquals(len(fx.graph.node[vertex]), 28)

        prev_fx_matrix = fx.create_initial_feature_matrix()
        new_fx_matrix = fx.compute_recursive_features(prev_fx_matrix, 1, 0.0)
        print len(list(new_fx_matrix.dtype.names))
        print len(list(prev_fx_matrix.dtype.names))
        abc = 1

示例#25

0

显示文件

文件： events_cusip.py 项目： rlowrance/test7

 def loop(msgs):
     'return (feature_vectors, unused messages)'
     vp = machine_learning.make_verbose_print(False)
     set_trace()
     feature_creators = (
         ('trace_print', features.trace_print),
         )
     result_feature_vectors = []
     result_unused = msgs
     pdb.set_trace()
     for i in range(0, n_feature_vectors, 1):
         msgs_to_be_used = msgs[i:]
         all_features = features.Features()
         for feature_creator in feature_creators:
             for cusip in cusips:
                 try:
                     cusip_features, unused = feature_creator[1](msgs_to_be_used, cusip)
                 except exception.NoFeatures as e:
                     raise exception.Features('cusip %s, %s' % (cusip, e.msg))
                 if len(unused) < len(result_unused):
                     result_unused = copy.copy(unused)
                 # update feature names to incorporate the cusip
                 for k, v in cusip_features.items():
                     key = (
                         'id_%s_%s' (cusip, k[3:]) if k.startwith('id_') else
                         '%s_%s_%s' (feature_creator[0], cusip, k)
                     )
                     all_features.add(key, v)
         continue   # bypass old code, for now
         # try:
         #     fv, unused = feature_vector(msgs_to_be_used, cusips, required_reclassified_trade_type)
         #     vp('loop %d: fv trigger identifier: %s len(msgs): %d, len(unused): %d' % (
         #         i,
         #         fv['id_trigger_identifier'],
         #         len(msgs_to_be_used),
         #         len(unused),
         #     ))
         #     if False and i % 10 == 1:
         #         pdb.set_trace()
         #     result_feature_vectors.append(fv)
         #     if len(unused) < len(result_unused):
         #         result_unused = copy.copy(unused)
         # except exception.NoPriorEventWithCusipAndRtt as e:
         #     vp('stub: handle exception %s' % e)
         #     break
         # except exception.NoMessageWithCusip as e:
         #     vp('stub: handle exception %s' % e)
         #     break
     set_trace()
     return list(reversed(result_feature_vectors)), result_unused

示例#26

0

显示文件

 def test_should_vertical_bin_correctly(self):
     fx = features.Features()
     fx.no_of_vertices = 6
     fx.init_log_binned_fx_buckets()
     actual = fx.vertical_bin([(0, 4), (1, 3), (2, 2), (3, 2), (4, 4),
                               (5, 1)])
     expected = {
         5: 0,
         2: 0,
         3: 0,
         1: 1,
         0: 1,
         4: 1
     }  # fx_value of 1 has 2 candidates,
     self.assertEquals(actual, expected)

示例#27

0

显示文件

文件： maxent_label.py 项目： radi2015/chinese-segmenter

def maxent_label(input_file,output_file,encode='utf-8'):
	input=codecs.open(input_file,"r",encode)
	output=codecs.open(output_file,"w",encode)
	count=0
	for line in input.readlines():
		count+=1
		if count%3!=0:continue
		line = chtl.strq2b(line)
		text,tags = label(line)
		textfea=feats.Features(text)
		for i in range(len(text)):
			curfea=textfea.getFeats(i)
			output.write(' '.join(curfea))
			output.write(' '+tags[i])
			output.write('\n')
	input.close()
	output.close()

示例#28

0

显示文件

 def test_should_digitize_correctly(self):
     fx = features.Features()
     log_bins = np.logspace(np.log10(2),
                            np.log10(20),
                            num=15,
                            endpoint=False)
     self.assertEquals(log_bins.size, 15)
     self.assertEquals(
         fx.digitize(2.0, log_bins=log_bins, file_name="sample"),
         "sample_0")
     self.assertEquals(
         fx.digitize(5.0, log_bins=log_bins, file_name="sample"),
         "sample_5")
     self.assertEquals(
         fx.digitize(15.0, log_bins=log_bins, file_name="sample"),
         "sample_13")
     self.assertEquals(
         fx.digitize(20.0, log_bins=log_bins, file_name="sample"),
         "sample_14")

示例#29

0

显示文件

文件： train.py 项目： ljm9615/lvshou

 def __init__(self,
              path_train=PATH_TN,
              learning_rate=0.01,
              n_estimators=1500,
              max_depth=13,
              min_child_weight=1,
              gamma=0.1,
              subsample=0.7,
              colsample_bytree=0.6,
              objective='binary:logistic',
              seed=2018,
              label='',
              tfbdc=False,
              debug=True,
              bdc=True,
              qz='DT',
              opt='bdc'):
     # qz in ('DT', 'sum', 'icf')
     self.xgboost = XGBClassifier(learning_rate=learning_rate,
                                  n_estimators=n_estimators,
                                  max_depth=max_depth,
                                  min_child_weight=min_child_weight,
                                  gamma=gamma,
                                  subsample=subsample,
                                  colsample_bytree=colsample_bytree,
                                  objective=objective,
                                  seed=seed,
                                  n_jobs=60)
     self.gbdt = GradientBoostingClassifier()
     self.rf = RandomForestClassifier()
     self.label = label
     self.model = [lgb, self.xgboost, self.gbdt, self.rf, lgb]
     # self.model = [lgb,lgb]
     self.SBBTree = SBBTree(model=self.model, bagging_num=5)
     self.BDC = bdc
     BDC = features.Features(tfbdc=tfbdc, debug=debug, BDC=self.BDC, qz=qz)
     Vocab_bdc = BDC.load_bdc(PATH_TN, label=self.label)
     self._X, self._Y, label_to_id, self.uuid = BDC.load_X_Y(
         Vocab_bdc, path=PATH_TN, label=self.label, froms='', opt=opt)
     self._X, self._Y, self.uuid = np.array(self._X), np.array(
         self._Y), np.array(self.uuid)
     self.kf = KFold(n_splits=CV, random_state=2018)
     self.id_to_lable = {i: _ for _, i in label_to_id.items()}

示例#30

0

显示文件

    def test_should_init_vertical_bins(self):
        fx = features.Features()
        fx.no_of_vertices = 4
        fx.init_log_binned_fx_buckets()
        self.assertEquals(len(fx.refex_log_binned_buckets), fx.no_of_vertices)
        self.assertEquals(fx.refex_log_binned_buckets, [0, 0, 1, 2])

        fx.no_of_vertices = 5
        fx.refex_log_binned_buckets = []
        fx.init_log_binned_fx_buckets()
        self.assertEquals(len(fx.refex_log_binned_buckets), fx.no_of_vertices)
        self.assertEquals(fx.refex_log_binned_buckets, [0, 0, 1, 1, 2])

        fx.no_of_vertices = 8
        fx.refex_log_binned_buckets = []
        fx.init_log_binned_fx_buckets()
        self.assertEquals(len(fx.refex_log_binned_buckets), fx.no_of_vertices)
        self.assertEquals(fx.refex_log_binned_buckets,
                          [0, 0, 0, 0, 1, 1, 2, 3])