示例#1
0
    def scientist(self, game_ended):
        #quitting criteria
        quitting = (self.total_cards > 20) and (self.guesses_correct > 20)

        if(game_ended or quitting):
            #if we are ending the game
            if(not game_ended):
                self.ended_game = True
            if(self.hypothesis == None):
                self.hypothesis = DecisionTree()
                self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES)
            return self.hypothesis.get_rule()
        else:
            #if we need to rebuild the tree, rebuild it
            if(len(self.training_data) > 0 and self.rebuildTree):
                if(self.hypothesis == None):
                    self.hypothesis = DecisionTree()
                #rebuild the tree
                #print("REBUILDING")
                self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES)

            #pick a card and refill hand
            card = self.pick_card(self.BOARD[-2][0], self.BOARD[-1][0])
            #index = self.hand.index(card)
            #self.hand = self.hand[:index] + self.hand[index+1:] + [self.generate_random_card()]


            #record what number card we played
            self.cards_played.append(self.total_cards)

            #play the card
            return card
    def fit(self, x_train, y_train):
        len_train_data = len(x_train)
        for i in range(self.no_estimators):
            print(("\rRunning estimator {0}/{1}"
                   "...".format(i + 1, self.no_estimators)),
                  end='')
            # time.sleep(1)
            if self.bootstrap:
                idx = np.random.randint(0, len_train_data, len_train_data)
                x_train_tree = x_train[idx]
                y_train_tree = y_train[idx]  # test here please
            else:
                x_train_tree = x_train
                y_train_tree = y_train

            dt = DecisionTree(random_state=None,
                              split_measure=self.split_measure,
                              min_impurity_split=self.min_impurity_split,
                              max_depth=self.max_depth,
                              min_samples_split=self.min_samples_split,
                              no_splits=self.no_splits,
                              max_features=self.max_features,
                              print_flag=False)
            dt.fit(x_train_tree, y_train_tree)
            self.fitted_trees.append(dt)
        print("\r")
示例#3
0
def rf_train(data, forest_size, tree_depth):
	print("rf train")
	classifiers = []
	for i in range(forest_size):
		tree = DecisionTree()
		tree.root = tree.train(data, tree_depth, True)
		classifiers.append(tree)
	return classifiers
def train(self, data, forest_size, tree_depth):
    classifiers = []
    for i in range(forest_size):
        forest_data = data
        tree = DecisionTree()
        tree.root = tree.train(forest_data, depth, True)
        classifiers.append()
    return classifiers
示例#5
0
 def __init__(self, num_tree, algorithm='ID3', mode='classification'):
     super().__init__()
     self.mode = mode
     self.classification = 'classification'
     self.regression = 'regression'
     self.RF = [
         DecisionTree(algorithm, mode, RF=True) for _ in range(num_tree)
     ]
示例#6
0
if __name__ == "__main__":
    # 加载数据
    data = load_iris()
    x = data['data']
    y = data['target']
    # 分割数据
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=40,
                                                        shuffle=True)
    train_data = np.c_[X_train, y_train]

    ################################# 决策树运行 ###################################
    # 加载决策树模型
    tree = DecisionTree(mode='classification')
    # 训练
    tree.train(train_data)
    # 剪枝
    # tree.pruning(train_data, 0.03)
    # 预测
    y_pre = tree(X_test)

    ################################# 随机森林运行 ###################################
    # # 加载随机森林模型
    # RF = RandomForest(num_tree=10)
    # # 训练随机森林
    # RF.train(train_data, RF_k=3)
    # # 预测
    # y_pre = RF.predict(X_test)
示例#7
0
def main():	
	# MAKE SURE ALL OF THESE SWITCHES ARE SET CORRECTLY!!!!
	random_forest_switch = False
	error_rate_switch = True
	test_switch = False
	spam_val = False

	input_file, test_input_file, index, label, categorical = census()

	if not spam_val:
		replace_missing_values(input_file, categorical)
		mean, mode = mean_and_mode(input_file)
		impute(input_file, mean, mode, categorical)
		random.shuffle(input_file)
	else:
		input_file = shuffle(input_file)
	train = input_file[index:]
	validation = input_file[:index]
	train_df = pd.DataFrame(train)
	train_d = pd.get_dummies(train_df)
	validation_df = pd.DataFrame(validation)
	validation_d = pd.get_dummies(validation_df)

	if test_switch:
		replace_missing_values(test_input_file)
		test_mean, test_mode = mean_and_mode(test_input_file)
		impute(test_input_file, test_mean, test_mode)
		test_df = pd.DataFrame(test_input_file)
		test_d = pd.get_dummies(test_df)

	if random_forest_switch:
		all_classifiers = rf_train(train_d, 30, 18)
		predict = rf_predict(all_classifiers, validation_d)
		print("RF PREDICT")
		print(predict[:50])
	else:
		x_values = []
		y_values = []
		for i in range(21):
			if i == 0:
				continue
			x_values.append(i)
			classifier = DecisionTree()
			classifier.root = classifier.train(train_d, i)
			predict = classifier.predict(validation_d)
			total = 0
			error = 0
			validation_labels = validation_d[[label]]
			a = validation_labels[label]
			i = 0
			for v in validation_labels[label]:
				if v != predict[i]:
					error += 1
				total += 1
				i += 1
			print("ERROR RATE:")
			print(1 - (error/total))
			y_values.append(1 - (error/total))
		plot.plot(x_values, y_values, label = "census graphs")
		plot.legend()
		plot.grid()
		plot.xlabel("Depth")
		plot.ylabel("Accuracy")
		plot.savefig("census_tests")
	if error_rate_switch:
		# CALCULATE ERROR RATE
		total = 0
		error = 0
		validation_labels = validation_d[[label]]
		a = validation_labels[label]
		i = 0
		for v in validation_labels[label]:
			if v != predict[i]:
				error += 1
			total += 1
			i += 1
		print("ERROR RATE:")
		print(error/total)
	else:
		# WRITE TO CSV
		predictions = [["id", "category"]]
		i = 0
		for v in predict:
			predictions += [[i, v]]
			i += 1
		with open('spam_test_predictions.csv', 'w') as f:
			writer = csv.writer(f)
			writer.writerows(predictions)
示例#8
0
文件: main.py 项目: jlzxian/Python
def tree(X,y,i):
    model = DecisionTree(max_depth=i, stump_class=ds.DecisionStumpErrorRate)
    model.fit(X, y)
    y_pred = model.predict(X)
    error = np.mean(y != y_pred)
    print(i, ":", error)
示例#9
0
class Player(object):

    def __init__(self, cards):
        """
        Pretty self explanatory constructor
        """

        #These variables replace the global variables in Phase I
        self.BOARD = [(c, []) for c in cards] 
        self.VALUES = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]
        self.SUITS = ["C", "D", "H", "S"]
        self.DECK = [x+y for x in self.VALUES for y in self.SUITS]


        #Helper variables for ATTRIBUTES
        cards_att = ["previous2", "previous", "current"]
        individuals_att = ["suit", "color", "even", "is_royal"]
        self.ATTRIBUTES = [x + "(" + str(y) + ")" for y in cards_att for x in individuals_att]
        self.ATTRIBUTES += [x + "(value(" + y + ")," + z + ")" for y in cards_att for z in self.VALUES for x in ["greater", "equal"]]
        self.ATTRIBUTES += [x + "(current" + "," + y + ")" for y in cards_att[:-1] for x in ["greater", "equal"]]
        self.ATTRIBUTES += [x + "(previous, previous2)" for x in ["greater", "equal"]]
        self.ATTRIBUTES += [x + "(value(current)" + ",value(" + y + ")" for y in cards_att[:-1] for x in ["greater", "equal"]]
        self.ATTRIBUTES += [x + "(value(previous), value(previous2))" for x in ["greater", "equal"]]
        self.ATTRIBUTES += ["Legal"]

        #To keep track of running score
        self.game_score = 0
        self.ended_game = False

        #This is for our rule
        self.hypothesis = None
        self.training_data = []

        #A boolean that will tell us if we need to update the tree
        self.rebuildTree = True

        #These are for our quitting criteria
        self.cards_played = []
        self.total_cards = 0
        self.guesses_correct = 0

        #Setup our hand
        self.hand = [self.generate_random_card() for i in range(14)]

    def pick_card(self, prev2, prev):
        """
        Assume that our hypothesis is correct, and play a card that the hypothsis
        predicts is illegal, therefore increasing our chances of getting a false play.
        That way we gain information.
        If all cards in our hand are predicted to be legal or if we don't have a hypothesis,
        then play at random.
        """
        if not self.hypothesis:
            to_play = self.hand.pop(random.randrange(len(self.hand)))
        else:
            hyp = parse(self.hypothesis.get_rule())
            for card in self.hand:
                if not hyp.evaluate((prev2, prev, card)):
                    to_play = self.hand.pop(self.hand.index(card))
                    break
            else:
                to_play = self.hand.pop(random.randrange(len(self.hand)))
        
        self.hand.append(random.choice(self.DECK))
        
        return to_play

    """
    Returns random card in the deck
    """
    def generate_random_card(self):
        return random.choice(self.VALUES) + random.choice(self.SUITS)

    """
    Takes in a card and whether or not it was legal, updating the board state

    We will also update our training data here, and decide whether or not we need to rebuild the
        decision tree
    """
    def update_card_to_boardstate(self, card, result):

        #Construct an element of the training data
        datum = self.create_datum(card)

        datum.append(result)
        datum = tuple(datum)

        self.training_data.append(datum)

        #If we have built a tree
        if(len(self.training_data) > 1 and self.hypothesis):
            #Figure out what our rule says about this card
            guess = self.guess_legal(datum)

            #If we were wrong, we need to rebuild the tree
            if(guess != result):
                self.rebuildTree = True
                self.guesses_correct = 0
            #We were correct, and our tree is not proven wrong
            elif(guess == result and not self.rebuildTree):
                self.guesses_correct += 1
        else:
            self.rebuildTree = True

        #print("REBUILD", self.rebuildTree)

        #Now we can update the board state
        if(result):
            self.BOARD.append((card, []))
        else:
            self.BOARD[-1][1].append(card)

        #Increase our score (iff we played the card and it counts towards score)


        if(self.total_cards > 20 and self.cards_played[-1] == self.total_cards):
            #if the card was legal
            if(result):
                self.game_score += 1
            else:
                self.game_score += 2

        #Increase the total number of cards that we've seen
        self.total_cards += 1

    """
    This takes in a card and returns the datum (without the classification, which will be added later)

    This assumes that card has *NOT* been added to the BOARD, i.e. we have not "played" card yet
    """
    def create_datum(self, card):
        prev2 = self.BOARD[-2][0]
        prev  = self.BOARD[-1][0]

        cards = [prev2, prev, card]

        # we need suit, parity, color...
        individuals = [suit, color, even, is_royal]

        features = [x(y) for y in cards for x in individuals]

        # unfortunately we need features for comparing values (for each card) 
        #   to the numbers 1 to 13, to encompass numerical differences
        # this makes the feature list gigantic
        features += [x(str(y[:-1]), str(z)) for y in cards for z in self.VALUES for x in [greater, equal]]

        # compare the deck values of the cards to each other
        features += [x(card, y) for y in [prev2, prev] for x in [greater, equal]] + [x(prev, prev2) for x in [greater, equal]]

        #TODO: add anything else here that could possibly be a predicate that we split on
        features += [x(card[:-1], y[:-1]) for y in [prev2, prev] for x in [greater, equal]] 
        features += [x(prev[:-1], prev2[:-1]) for x in [greater, equal]]

        return features

    """
    Takes in a datum (create_datum(card)) and returns our hypothesis about whether the card is legal or not
    """
    def guess_legal(self, datum):

        guess = self.hypothesis.predict(self.ATTRIBUTES, [datum])[0]

        if(guess == "Null"):
            guess = False
        return guess

    """
    The core of the Player's decision making
    """
    def scientist(self, game_ended):
        #quitting criteria
        quitting = (self.total_cards > 20) and (self.guesses_correct > 20)

        if(game_ended or quitting):
            #if we are ending the game
            if(not game_ended):
                self.ended_game = True
            if(self.hypothesis == None):
                self.hypothesis = DecisionTree()
                self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES)
            return self.hypothesis.get_rule()
        else:
            #if we need to rebuild the tree, rebuild it
            if(len(self.training_data) > 0 and self.rebuildTree):
                if(self.hypothesis == None):
                    self.hypothesis = DecisionTree()
                #rebuild the tree
                #print("REBUILDING")
                self.hypothesis.build_tree(self.training_data, self.ATTRIBUTES[-1], self.ATTRIBUTES)

            #pick a card and refill hand
            card = self.pick_card(self.BOARD[-2][0], self.BOARD[-1][0])
            #index = self.hand.index(card)
            #self.hand = self.hand[:index] + self.hand[index+1:] + [self.generate_random_card()]


            #record what number card we played
            self.cards_played.append(self.total_cards)

            #play the card
            return card

    """
    This computes the score of the player
    """
    def score(self, rule):
        equiv = self.check_equivalence(rule)
        if(equiv):
            self.game_score -= 75
        if(self.ended_game):
            self.game_score -= 25
        return self.game_score

    """
    This checks to see if the rule is equivalent to our hypothesis

    This has a try catch because sometimes rule.evaluate fails (like with greater())
    TODO: Maybe remove this?

    TODO: Maybe change this for vacuous stuff?
            -Maybe use (None, None, x) to see if the dealer could play x
                This would require a try catch because maybe None would cause it to fail
            -Maybe try and parse the rule to ignore prev2/prev for the first 2 cards etc?
                Like evaluate the parts of the rule that don't use prev/prev2
    """
    def check_equivalence(self, rule):
        try:
            hyp = parse(self.hypothesis.get_rule())
            #print(self.hypothesis.get_rule())
            for prev2 in self.DECK:
                for prev in self.DECK:
                    for curr in self.DECK:
                        # should check for vacuous equivalence
                        if rule.evaluate((prev2, prev, curr)) != hyp.evaluate((prev2, prev, curr)):
                            return False
            return True
        except :
            return False

    """
    This is mostly a wrapper for scientist
    """
    def play(self, game_ended=False):
        #from game import game_ended
        return self.scientist(game_ended)

    """
    Just returns the board
    """
    def boardState(self):
        return self.BOARD
示例#10
0
    x = data['data']
    y = data['target']
    # X = np.array([
    #         ['青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'],
    #         ['乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '是'],
    #         ['乌黑', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'],
    #         ['青绿', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '是'],
    #         ['浅白', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'],
    #         ['青绿', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '是'],
    #         ['乌黑', '稍蜷', '浊响', '稍糊', '稍凹', '软粘', '是'],
    #         ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '硬滑', '是'],
    #         ['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '否'],
    #         ['青绿', '硬挺', '清脆', '清晰', '平坦', '软粘', '否'],
    #         ['浅白', '硬挺', '清脆', '模糊', '平坦', '硬滑', '否'],
    #         ['浅白', '蜷缩', '浊响', '模糊', '平坦', '软粘', '否'],
    #         ['青绿', '稍蜷', '浊响', '稍糊', '凹陷', '硬滑', '否'],
    #         ['浅白', '稍蜷', '沉闷', '稍糊', '凹陷', '硬滑', '否'],
    #         ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '否'],
    #         ['浅白', '蜷缩', '浊响', '模糊', '平坦', '硬滑', '否'],
    #         ['青绿', '蜷缩', '沉闷', '稍糊', '稍凹', '硬滑', '否'],
    #         ])
    Y = np.c_[x, y]
    # tree = DecisionTree(mode='regression')
    tree = DecisionTree(mode='classification')
    print(tree.train(Y))
    print('\n')
    print(tree.pruning(Y, 0.03))
    print(tree(x[9].reshape([1, -1])))
    print(y[9])
    # print(len((8,9)))