Python DataFrame.from_array примеры, dataframe.DataFrame.from_array Python примеры использования

Пример #1

0

Показать файл

Файл: decision_tree.py Проект: DrM00G/machine-learning

 def split(self, max_depth, depth):
     # print("testing")
     if self.impurity != 0 and (max_depth > depth or max_depth == False):
         if self.unsplit:
             if self.best_split[0] == 'x':
                 axis = 0
             else:
                 axis = 1
             low_points = []
             high_points = []
             for point in self.df.to_array():
                 if point[axis] < self.best_split[1]:
                     low_points.append(point)
                 elif point[axis] >= self.best_split[1]:
                     high_points.append(point)
             self.low = Node(
                 DataFrame.from_array(low_points, self.df.columns),
                 self.split_metric)
             self.high = Node(
                 DataFrame.from_array(high_points, self.df.columns),
                 self.split_metric)
             self.unsplit = False
         elif max_depth > depth + 1 or max_depth == False:
             if self.low.impurity != 0:
                 self.low.split(max_depth, depth + 1)
             if self.high.impurity != 0:
                 self.high.split(max_depth, depth + 1)
     else:
         self.unsplit = False

Пример #2

0

Показать файл

Файл: polynomial_regressor.py Проект: charliebrown364/machine-learning

    def fit(self, dataframe, dependent_variable):

        self.first_variable = dataframe.columns[0]
        self.dependent_variable = dependent_variable

        if self.degree == 0:
            new_columns = [self.dependent_variable]
        elif self.degree == 1:
            new_columns = [self.first_variable, self.dependent_variable]
        else:
            new_columns = [self.first_variable]
            for i in range(2, self.degree + 1):
                new_term = self.first_variable + '^' + str(i)
                new_columns.append(new_term)
            new_columns.append(self.dependent_variable)

        new_dataset = []
        for pair in dataframe.to_array():
            new_values = []
            for i in range(1, self.degree + 1):
                value = pair[0]**i
                new_values.append(value)
            new_values.append(pair[1])
            new_dataset.append(new_values)

        self.df = DataFrame.from_array(new_dataset, new_columns)

Пример #3

0

Показать файл

Файл: logistic_regressor.py Проект: DrM00G/machine-learning

 def __init__(self,
              data_class,
              prediction_column,
              max_value,
              delta,
              constant=True):
     super().__init__(data_class, prediction_column)
     self.prediction = prediction_column
     self.current_input = None
     self.max_val = max_value
     self.original_data = DataFrame.from_array(data_class.to_array(),
                                               data_class.columns)
     print("#0" + str(self.original_data.to_array()))
     self.original_data = self.original_data.append_columns(
         {'constant': [1 for _ in range(len(data_class.to_array()))]},
         ['constant'] + data_class.columns)
     self.data = data_class.apply(
         self.prediction_column,
         lambda x: self.set_bound_replacements(delta, x))
     if constant:
         self.data = self.data.append_columns(
             {'constant': [1 for _ in range(len(self.data.to_array()))]},
             ['constant'] + self.data.columns)
     self.multipliers = self.solve_coefficients()
     print("#1" + str(self.multipliers))
     print("#2" + str(self.original_data.to_array()))

Пример #4

0

Показать файл

 def nearest_neighbors(self, observation):
     close_list = self.compute_distances(observation).to_array()
     sorted_list = []
     for n in range(len(close_list)):
         sorted_list.append(
             close_list.pop(self.sort_closest_cookie(close_list)))
     return DataFrame.from_array(sorted_list[::-1],
                                 columns=['distance', 'Cookie Type'])

Пример #5

0

Показать файл

Файл: decision_tree.py Проект: DrM00G/machine-learning

 def calc_goodness(self, split, axis):
     goodness = self.impurity
     low = []
     high = []
     for point in self.df.to_array():
         if point[axis] < split:
             low.append(point)
         elif point[axis] >= split:
             high.append(point)
     low_node = Node(DataFrame.from_array(low, self.df.columns),
                     self.split_metric)
     high_node = Node(DataFrame.from_array(high, self.df.columns),
                      self.split_metric)
     new_nodes = [low_node, high_node]
     for split_node in new_nodes:
         goodness -= (len(split_node.row_indices) /
                      len(self.row_indices)) * split_node.impurity
     return round(goodness, 3)

Пример #6

0

Показать файл

Файл: k_nearest_neighbors.py Проект: geomeza/machine-learning

 def compute_distances(self, observation):
     data_arr = self.dataframe.to_array()
     data_dict = self.dataframe.data_dict
     distances = []
     for i in range(len(data_arr)):
         distances.append([
             sum([(observation[entry] - data_dict[entry][i])**2
                  for entry in observation])**(0.5), data_arr[i][0]
         ])
     return DataFrame.from_array(distances, ['Distance', 'Cookie Type'])

Пример #7

0

Показать файл

 def split(self, if_once=False, depth_needed=None):
     if depth_needed is None or self.depth < depth_needed:
         if self.low is None and self.high is None:
             if self.final_split is False:
                 self.possible_splits = self.get_possible_splits()
                 self.get_best_split()
                 if self.best_split is None:
                     return
                 if str(self.depth) in self.tree.splits:
                     self.tree.splits[str(self.depth)].append(
                         self.best_split)
                 else:
                     self.tree.splits[str(self.depth)] = [self.best_split]
                 low = []
                 high = []
                 for entry in self.df.to_array():
                     if entry[self.best_split_index] < self.best_split[1]:
                         low.append(entry)
                     elif entry[
                             self.best_split_index] >= self.best_split[1]:
                         high.append(entry)
                 self.low = Node(DataFrame.from_array(low, self.df.columns),
                                 self.split_metric, (self.depth + 1),
                                 tree=self.tree)
                 self.high = Node(DataFrame.from_array(
                     high, self.df.columns),
                                  self.split_metric, (self.depth + 1),
                                  tree=self.tree)
                 if not if_once:
                     self.low.split(depth_needed=depth_needed)
                     self.high.split(depth_needed=depth_needed)
             else:
                 return
         else:
             if self.low is not None:
                 self.low.split(if_once, depth_needed=depth_needed)
             if self.high is not None:
                 self.high.split(if_once, depth_needed=depth_needed)
             return
     else:
         return

Пример #8

0

Показать файл

Файл: csv_training.py Проект: geomeza/machine-learning

def run_tests(training_set, testing_set, decision_tree, forest = False):
    correct = 0
    training_df = DataFrame.from_array(training_set, ['bmi', 'weight', 'class'])
    decision_tree.fit(training_df)
    for test in testing_set:
        test_dict = {'bmi' : test[0], 'weight' : test[1]}
        if forest:
            prediction = decision_tree.predict(test_dict)
        else:
            prediction = decision_tree.classify(test_dict)
        if prediction == test[2]:
            correct += 1
    return correct,len(testing_set)

Пример #9

0

Показать файл

 def compute_distances(self, observation):
     distances = []
     for data in self.df.to_array():
         distances.append(
             self.compute_distance(observation, [
                 data[n] for n in range(len(data))
                 if n != self.df.columns.index(self.prediction_column)
             ]))
     result = [[n] for n in distances]
     for n in range(len(distances)):
         result[n].append(self.df.to_array()[n][self.df.columns.index(
             self.prediction_column)])
     return DataFrame.from_array(result,
                                 columns=['distance', 'Cookie Type'])

Пример #10

0

Показать файл

 def calc_goodness(self, split, axis_index):
     goodness = self.impurity
     low = []
     high = []
     for point in self.df.to_array():
         if point[axis_index] < split:
             low.append(point)
         elif point[axis_index] >= split:
             high.append(point)
     low_node = Node(DataFrame.from_array(low, self.df.columns),
                     self.split_metric,
                     depth=int(self.depth) + 1,
                     check_splits=False,
                     tree=self.tree)
     high_node = Node(DataFrame.from_array(high, self.df.columns),
                      self.split_metric,
                      depth=(self.depth + 1),
                      check_splits=False,
                      tree=self.tree)
     nodes = [low_node, high_node]
     for split_node in nodes:
         goodness -= (len(split_node.row_indices) /
                      len(self.row_indices)) * split_node.impurity
     return goodness

Пример #11

0

Показать файл

Файл: decision_tree.py Проект: DrM00G/machine-learning

 def calc_possible_splits(self):
     points = [[], 'x', [], 'y']
     for x in self.df.ordered_dict['x']:
         if x not in points[0]:
             points[0].append(x)
     for y in self.df.ordered_dict['y']:
         if y not in points[2]:
             points[2].append(y)
     splits = []
     for n in range(2):
         for i in range(len(points[2 * n]) - 1):
             splits.append([
                 points[2 * n + 1],
                 (points[2 * n][i] + points[2 * n][i + 1]) / 2,
                 self.calc_goodness(
                     (points[2 * n][i] + points[2 * n][i + 1]) / 2, n)
             ])
     return DataFrame.from_array(splits,
                                 ['feature', 'value', 'goodness of split'])

Пример #12

0

Показать файл

 def get_possible_splits(self):
     axis = [
         axis for axis in self.df.columns
         if axis != 'class' and axis != 'indices'
     ]
     all_splits = []
     for i in range(len(self.distinct_values)):
         for j in range(len(self.distinct_values[i]) - 1):
             split_value = (self.distinct_values[i][j] +
                            self.distinct_values[i][j + 1]) / 2
             all_splits.append(
                 [axis[i], split_value,
                  self.calc_goodness(split_value, i)])
     if self.split_metric == 'random':
         if len(list(set([split[0] for split in all_splits]))) == 0:
             return []
         random_choice = random.choice(
             list(set([split[0] for split in all_splits])))
         new_splits = [
             split for split in all_splits if split[0] == random_choice
         ]
         all_splits = new_splits
     return DataFrame.from_array(
         all_splits, ['axis', 'split_value', 'goodness of split'])

Пример #13

0

Показать файл

Файл: test_dataframe.py Проект: anton-perez/machine-learning

data_dict = {'Pete': [1, 0, 1, 0], 'John': [2, 1, 0, 2], 'Sarah': [3, 1, 4, 0]}

df1 = DataFrame(data_dict, column_order=['Pete', 'John', 'Sarah'])
df2 = df1.apply('John', lambda x: 7 * x)
print('Testing method "apply"...')
assert df2.data_dict == {
    'Pete': [1, 0, 1, 0],
    'John': [14, 7, 0, 14],
    'Sarah': [3, 1, 4, 0]
}
print('PASSED')

columns = ['firstname', 'lastname', 'age']
arr = [['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17], ['Anna', 'Smith', 13],
       ['Sylvia', 'Mendez', 9]]
df = DataFrame.from_array(arr, columns)

print('Testing method "select_rows_where"...')
assert df.where(lambda row: len(row['firstname']) >= len(row['lastname']) and
                row['age'] > 10).to_array() == [['Charles', 'Trapp', 17]]
print('PASSED')

print('Testing method "order_by"...')
assert df.order_by('age', order="ASC").to_array() == [['Kevin', 'Fray', 5],
                                                      ['Sylvia', 'Mendez', 9],
                                                      ['Anna', 'Smith', 13],
                                                      ['Charles', 'Trapp', 17]]

assert df.order_by('firstname',
                   order="DESC").to_array() == [['Sylvia', 'Mendez', 9],
                                                ['Kevin', 'Fray', 5],

Пример #14

0

Показать файл

Файл: polynomial_regression.py Проект: charliebrown364/machine-learning

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

dataset = [(0.0, 4.0), (0.2, 8.9), (0.4, 17.2), (0.6, 28.3), (0.8, 41.6),
           (1.0, 56.5), (1.2, 72.4), (1.4, 88.7), (1.6, 104.8), (1.8, 120.1),
           (2.0, 134.0), (2.2, 145.9), (2.4, 155.2), (2.6, 161.3),
           (2.8, 163.6), (3.0, 161.5), (3.2, 154.4), (3.4, 141.7),
           (3.6, 122.8), (3.8, 97.1), (4.0, 64.0), (4.2, 22.9), (4.4, -26.8),
           (4.6, -85.7), (4.8, -154.4)]

new_columns = ['x', 'x^2', 'x^3', 'y']
new_dataset = [(pair[0], pair[0]**2, pair[0]**3, pair[1]) for pair in dataset]

df = DataFrame.from_array(new_dataset, new_columns)
polynomial_regressor = LinearRegressor(df, 'y')
polynomial_regressor_coefficients = polynomial_regressor.coefficients
print("polynomial_regressor_coefficients:", polynomial_regressor_coefficients)

Пример #15

0

Показать файл

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
import math
import matplotlib.pyplot as plt

df = DataFrame.from_array([(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23),
                           (0.8, -1.03), (1.0, -2.89), (1.2, -4.06),
                           (1.4, -4.39), (1.6, -3.88), (1.8, -2.64),
                           (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79),
                           (2.8, 4.22), (3.0, 3.8), (3.2, 2.56), (3.4, 0.68),
                           (3.6, -1.58), (3.8, -3.84), (4.0, -5.76),
                           (4.2, -7.01), (4.4, -7.38), (4.6, -6.76),
                           (4.8, -5.22)],
                          columns=['x', 'y'])
#add different columns
df = df.apply_add('x', lambda x: math.sin(x), 'sin(x)')
df = df.apply_add('x', lambda x: math.cos(x), 'cos(x)')
df = df.apply_add('x', lambda x: math.sin(2 * x), 'sin(2*x)')
df = df.apply_add('x', lambda x: math.cos(2 * x), 'cos(2*x)')
#save x and y values
x_values = list(df.data_dict['x'])
y_values = list(df.data_dict['y'])

#delete x values

df = df.del_column('x')
#find coefficients
linear_regressor = LinearRegressor(df, dependent_variable='y')

Пример #16

0

Показать файл

#   #   [ 5,  0,  1,  0,  0,  5,  0,  0,  0,  0,  1,  8],
#   #   [ 5,  0,  0,  1,  0,  0,  5,  0,  0,  0,  1,  1],
#   #   [ 5,  0,  1,  1,  0,  5,  5,  0,  0,  1,  1,  0],
#   #   [ 0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  1,  5],
#   #   [ 0,  5,  1,  0,  0,  0,  0,  5,  0,  0,  1,  0],
#   #   [ 0,  5,  0,  1,  0,  0,  0,  0,  5,  0,  1,  9],
#   #   [ 0,  5,  1,  1,  0,  0,  0,  5,  5,  1,  1,  0],
#   #   [ 5,  5,  0,  0, 25,  0,  0,  0,  0,  0,  1,  0],
#   #   [ 5,  5,  1,  0, 25,  5,  0,  5,  0,  0,  1,  0],
#   #   [ 5,  5,  0,  1, 25,  0,  5,  0,  5,  0,  1,  0],
#   #   [ 5,  5,  1,  1, 25,  5,  5,  5,  5,  1,  1,  0]]

columns = ['firstname', 'lastname', 'age']
arr = [['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17], ['Anna', 'Smith', 13],
       ['Sylvia', 'Mendez', 9]]
df = DataFrame.from_array(arr, columns)

print(df.to_array())

print(df.select_columns(['firstname', 'age']).to_array())
# [['Kevin', 5],
# ['Charles', 17],
# ['Anna', 13],
# ['Sylvia', 9]]

print(df.select_rows([1, 3]).to_array())
# [['Charles', 'Trapp', 17],
# ['Sylvia', 'Mendez', 9]]

print(
    df.select_rows_where(lambda row: len(row['firstname']) >= len(row[

Пример #17

0

Показать файл

Файл: poly_regression.py Проект: snowthesprite/machine-learning

 (1.0, 56.5),
 (1.2, 72.4),
 (1.4, 88.7),
 (1.6, 104.8),
 (1.8, 120.1),
 (2.0, 134.0),
 (2.2, 145.9),
 (2.4, 155.2),
 (2.6, 161.3),
 (2.8, 163.6),
 (3.0, 161.5),
 (3.2, 154.4),
 (3.4, 141.7),
 (3.6, 122.8),
 (3.8, 97.1),
 (4.0, 64.0),
 (4.2, 22.9),
 (4.4, -26.8),
 (4.6, -85.7),
 (4.8, -154.4)]

df = DataFrame.from_array(arr, ['x', 'y'])

df = df.create_interaction_terms('x', 'x')

df = df.create_interaction_terms('x * x', 'x')

regressor = LinearRegressor(df, 'y')

print(regressor.coefficients)

Пример #18

0

Показать файл

import matplotlib.pyplot as plt

list_data = [[1, 0], [2, 0], [3, 0], [2, 1], [3, 1], [4, 1]]

delta_table = [0.1, 0.01, 0.001, 0.0001]
all_coords = []

for delta_low in delta_table:
    # new_list=[]
    # for pair in list_data:
    #     if pair[1] == 0:
    #         new_list.append([pair[0],delta])
    #     else:
    #         new_list.append([pair[0],1-delta])

    df = DataFrame.from_array(list_data, columns=['x', 'y'])

    regressor = LogisticRegressor(df,
                                  prediction_column='y',
                                  max_value=1,
                                  delta=delta_low)

    coords = [[], []]
    for x in range(20):
        coords[0].append(x / 100)
        coords[1].append(regressor.predict({'constant': 1, 'x': x}))
    all_coords.append(coords)
print(all_coords)
plt.style.use('bmh')
for coords in all_coords:
    plt.plot(coords[0], coords[1], linewidth=2.5)

Пример #19

0

Показать файл

Файл: test_dataframe.py Проект: charliebrown364/machine-learning

import sys
sys.path.append('src')
from dataframe import DataFrame
sys.path.append('kaggle/titanic')
from parse_line import parse_line

df = DataFrame.from_array([['Kevin', 'Fray', 5], ['Charles', 'Trapp', 17],
                           ['Anna', 'Smith', 13], ['Sylvia', 'Mendez', 9]],
                          columns=['firstname', 'lastname', 'age'])

assert df.query(
    "SELECT lastname, firstname, age ORDER BY age DESC").to_array() == [[
        'Trapp', 'Charles', 17
    ], ['Smith', 'Anna', 13], ['Mendez', 'Sylvia', 9], ['Fray', 'Kevin', 5]]

print("\npassed test 1")

assert df.query("SELECT firstname ORDER BY lastname ASC").to_array() == [[
    'Kevin'
], ['Sylvia'], ['Anna'], ['Charles']]

print("\npassed test 2")

df = DataFrame.from_array(
    [['Kevin', 'Fray', 5], ['Melvin', 'Fray', 5], ['Charles', 'Trapp', 17],
     ['Carl', 'Trapp', 17], ['Anna', 'Smith', 13], ['Hannah', 'Smith', 13],
     ['Sylvia', 'Mendez', 9], ['Cynthia', 'Mendez', 9]],
    columns=['firstname', 'lastname', 'age'])

assert df.query(
    "SELECT lastname, firstname, age ORDER BY age ASC, firstname DESC"

Пример #20

0

Показать файл

Файл: signal_separation.py Проект: snowthesprite/machine-learning

import math

data = [(0.0, 7.0), (0.2, 5.6), (0.4, 3.56), (0.6, 1.23), (0.8, -1.03),
        (1.0, -2.89), (1.2, -4.06), (1.4, -4.39), (1.6, -3.88), (1.8, -2.64),
        (2.0, -0.92), (2.2, 0.95), (2.4, 2.63), (2.6, 3.79), (2.8, 4.22),
        (3.0, 3.8), (3.2, 2.56), (3.4, 0.68), (3.6, -1.58), (3.8, -3.84),
        (4.0, -5.76), (4.2, -7.01), (4.4, -7.38), (4.6, -6.76), (4.8, -5.22)]

columns = ['y', 'sin(x)', 'cos(x)', 'sin(2x)', 'cos(2x)']

new_data = [[y, math.sin(x),
             math.cos(x),
             math.sin(2 * x),
             math.cos(2 * x)] for (x, y) in data]

df = DataFrame.from_array(new_data, columns)

regressor = LinearRegressor(df, 'y')

print(regressor.coefficients)
'''
import matplotlib.pyplot as plt
plt.style.use('bmh')

x_points = []
predicted_points = []

x = 0
while x <= 5 :
    data_dict = {'sin(x)' : math.sin(x), 'cos(x)' : math.cos(x), 'sin(2x)' : math.sin(2 * x), 'cos(2x)' : math.cos(2 * x)}
    x_points.append(x)

Пример #21

0

Показать файл

Файл: rocket_takeoff_regression.py Проект: anton-perez/machine-learning

import sys
sys.path.append('src')
from polynomial_regressor import PolynomialRegressor
from dataframe import DataFrame

data = [(1, 3.1), (2, 10.17), (3, 20.93), (4, 38.71), (5, 60.91), (6, 98.87),
        (7, 113.92), (8, 146.95), (9, 190.09), (10, 232.65)]

df = DataFrame.from_array(data, ['time', 'distance'])

quadratic_regressor = PolynomialRegressor(degree=2)
quadratic_regressor.fit(df, 'distance')
print('Quadratic Regressor:')
print(quadratic_regressor.coefficients)

for t in [5, 10, 200]:
    print('Distance after ' + str(t) + ' seconds:',
          quadratic_regressor.predict({'time': t}))

df = DataFrame.from_array(data, ['time', 'distance'])

cubic_regressor = PolynomialRegressor(degree=3)
cubic_regressor.fit(df, 'distance')
print('Cubic Regressor:')
print(cubic_regressor.coefficients)

for t in [5, 10, 200]:
    print('Distance after ' + str(t) + ' seconds:',
          cubic_regressor.predict({'time': t}))

Пример #22

0

Показать файл

Файл: sandwich_rating.py Проект: NateOwl1108/machine-learning

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor

df = DataFrame.from_array(
    [[0, 0, 1], [1, 0, 2], [2, 0, 4], [4, 0, 8], [6, 0, 9], [0, 2, 2],
     [0, 4, 5], [0, 6, 7], [0, 8, 6]],
    columns=['slices of roast beef', 'tbsp of peanut butter', 'rating'])

regressor = LinearRegressor(df, dependent_variable='rating')
print(regressor.coefficients)
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tbsp of peanut butter': 0
    }))
print(
    regressor.predict({
        'slices of roast beef': 5,
        'tbsp of peanut butter': 5
    }))

Пример #23

0

Показать файл

import sys
sys.path.append('src')
from dataframe import DataFrame
from decision_tree import DecisionTree
from random_forest import RandomForest

data = [[2,13,'B'],[2,13,'B'],[2,13,'B'],[2,13,'B'],[2,13,'B'],[2,13,'B'],
        [3,13,'B'],[3,13,'B'],[3,13,'B'],[3,13,'B'],[3,13,'B'],[3,13,'B'],
        [2,12,'B'],[2,12,'B'],
        [3,12,'A'],[3,12,'A'],
        [3,11,'A'],[3,11,'A'],
        [3,11.5,'A'],[3,11.5,'A'],
        [4,11,'A'],[4,11,'A'],
        [4,11.5,'A'],[4,11.5,'A'],
        [2,10.5,'A'],[2,10.5,'A'],
        [3,10.5,'B'],
        [4,10.5,'A'],
        [3, 9.5, 'A'],
        [2,10,'A']]

df = DataFrame.from_array(data, columns = ['x', 'y', 'class'])

r = RandomForest(10)
r.fit(df)
print(r.predict({'x': 3, 'y': 10}))

Пример #24

0

Показать файл

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

 df = DataFrame.from_array(
[[0, 0, 1, 0], 
[1, 0, 2, 0], 
[2, 0, 4, 0], 
[4, 0, 8, 0], 
[6, 0, 9, 0], 
[0, 2, 2, 0], 
[0, 4, 5, 0], 
[0, 6, 7, 0], 
[0, 8, 6, 0],
[2, 2, 0.1, 4],
[3, 4, 0.1, 12]],
columns = ['beef', 'pb', 'rating', 'interactive']
)
log_reg = LogisticRegressor(df,10, dependent_variable = 'rating')
print(log_reg.predict({'beef': 5, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 12, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 5, 'pb': 5 , 'interactive':25}))

Пример #25

0

Показать файл

Файл: test_linear_regressor.py Проект: snowthesprite/machine-learning

df = DataFrame.from_array([[1,0.2], [2,0.25], [3,0.5]], columns = ['hours worked', 'progress'])

regressor = LinearRegressor(df, dependent_variable='progress')

print('Does all the linear_regressor stuff work')

assert regressor.coefficients == [0.01667, 0.15], 'No, coefficients does not work'

assert regressor.predict({'hours worked': 4}) == 0.61667, 'No, predict does not work'

print('Yes they do', "\n")
'''

df = DataFrame.from_array(
    [[0, 0, 0.1], [1, 0, 0.2], [0, 2, 0.5], [4, 5, 0.6]],
    columns=['scoops of chocolate', 'scoops of vanilla', 'taste rating'])

regressor = LinearRegressor(df, dependent_variable='taste rating')

print('Does all the linear_regressor stuff work')

reg_coeff = regressor.coefficients.copy()
for (key, value) in reg_coeff.items():
    reg_coeff[key] = round(value, 8)

assert reg_coeff == {
    'constant': 0.19252336,
    'scoops of chocolate': -0.05981308,
    'scoops of vanilla': 0.13271028
}, 'No, coefficients does not work'

Пример #26

0

Показать файл

Файл: test_k_nearest_neighbors_classifier.py Проект: anton-perez/machine-learning

import sys
sys.path.append('src')
from k_nearest_neighbors_classifier import KNearestNeighborsClassifier
from dataframe import DataFrame

df = DataFrame.from_array(
    [['Shortbread', 0.14, 0.14, 0.28, 0.44],
     ['Shortbread', 0.10, 0.18, 0.28, 0.44],
     ['Shortbread', 0.12, 0.10, 0.33, 0.45],
     ['Shortbread', 0.10, 0.25, 0.25, 0.40], ['Sugar', 0.00, 0.10, 0.40, 0.50],
     ['Sugar', 0.00, 0.20, 0.40, 0.40], ['Sugar', 0.10, 0.08, 0.35, 0.47],
     ['Sugar', 0.00, 0.05, 0.30, 0.65], ['Fortune', 0.20, 0.00, 0.40, 0.40],
     ['Fortune', 0.25, 0.10, 0.30, 0.35], ['Fortune', 0.22, 0.15, 0.50, 0.13],
     ['Fortune', 0.15, 0.20, 0.35, 0.30], ['Fortune', 0.22, 0.00, 0.40, 0.38]],
    columns=[
        'Cookie Type', 'Portion Eggs', 'Portion Butter', 'Portion Sugar',
        'Portion Flour'
    ])

knn = KNearestNeighborsClassifier(k=5)
knn.fit(df, dependent_variable='Cookie Type')
observation = {
    'Portion Eggs': 0.10,
    'Portion Butter': 0.15,
    'Portion Sugar': 0.30,
    'Portion Flour': 0.45
}

print(knn.compute_distances(observation).to_array())
# Returns a dataframe representation of the following array:

Пример #27

0

Показать файл

Файл: test_random_forest.py Проект: geomeza/machine-learning

# for i in range(len(pos_neg)):
#     correct_class = pos_neg[i][3]
#     observation = into_new_observation(pos_neg[i])
#     prediction = r.predict(observation)
#     if prediction == correct_class:
#         correct += 1

# assert correct/len(pos_neg) * 100 == 100, 'WRONG ACCURACY BRUH'

points = [[x, y, z, 'A'] for z in range(-5, 6) for y in range(-5, 6)
          for x in range(-5, 6) if x * y * z != 0]
points.extend([[x, y, z, 'B'] for z in range(1, 6) for y in range(1, 6)
               for x in range(1, 6) if x * y * z != 0])
points.extend([[x, y, z, 'B'] for z in range(1, 6) for y in range(1, 6)
               for x in range(1, 6) if x * y * z != 0])

df = DataFrame.from_array(points, columns=['x', 'y', 'z', 'class'])
r = RandomForest(100, depth=None)
r.fit(df)
correct = 0

for i in range(len(points)):
    correct_class = points[i][3]
    observation = into_new_observation(points[i])
    prediction = r.predict(observation)
    if prediction == correct_class:
        correct += 1

assert correct / len(points) * 100 == 90, 'WRONG ACCURACY BRUH'

print('passed')

Пример #28

0

Показать файл

import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
    columns=['beef', 'pb', 'condiments', 'rating'])
df = df.create_dummy_variables('condiments')
df = df.create_interaction_terms('beef', 'pb')

df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
log_df = DataFrame(df.data_dict, df.columns)

logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating')

# test 8 slices of beef + mayo
observation = {'beef': 8, 'mayo': 1}

assert round(logistic_regressor.predict(observation), 2) == 9.72

Пример #29

0

Показать файл

import sys
sys.path.append('src')
from dataframe import DataFrame
from polynomial_regressor import PolynomialRegressor

df = DataFrame.from_array(
    [(0,1), (1,2), (2,5), (3,10), (4,20), (5,30)],
    columns = ['x', 'y']
)

constant_regressor = PolynomialRegressor(degree=0)
constant_regressor.fit(df, dependent_variable='y')
print(constant_regressor.coefficients)
{'constant': 11.3333}
print(constant_regressor.predict({'x': 2}))
11.3333

linear_regressor = PolynomialRegressor(degree=1)

linear_regressor.fit(df, dependent_variable='y')
print(linear_regressor.coefficients)
{'constant': -3.2381, 'x': 5.8286}
print(linear_regressor.predict({'x': 2}))
8.4190



quadratic_regressor = PolynomialRegressor(degree=2)
quadratic_regressor.fit(df, dependent_variable='y')
print(quadratic_regressor.coefficients)
{'constant': 1.1071, 'x': -0.6893, 'x^2': 1.3036}

Пример #30

0

Показать файл

Файл: test_decision_tree.py Проект: geomeza/machine-learning

# print('\nTesting root low high indices')
# assert dt.root.low.high.row_indices == [6]
# print('passed')

# print('\nTesting root low low impurity')
# assert dt.root.low.low.impurity == 0
# print('passed')

# print('\nTesting root low high impurity')
# assert dt.root.low.high.impurity == 0
# print('passed')

print('Splitting Tests')
df = DataFrame.from_array(
    [[1, 11, 'A'], [1, 12, 'A'], [2, 11, 'A'], [1, 13, 'B'], [2, 13, 'B'],
     [3, 13, 'B'], [3, 11, 'B']],
    columns=['x', 'y', 'class'])

dt = DecisionTree(split_metric='gini')
dt.initialize(df)
dt.split()
dt.split()

assert dt.root.high.row_indices == [3, 4, 5]
assert dt.root.low.low.row_indices == [0, 1, 2]
assert dt.root.low.high.row_indices == [6]
print('passed')
dt = DecisionTree(split_metric='gini')
dt.fit(df)
assert dt.root.high.row_indices == [3, 4, 5]
assert dt.root.low.low.row_indices == [0, 1, 2]

Python DataFrame.from_array примеры использования