Python Table示例，datascience.Table Python示例

示例#1

0

显示文件

def results(selected_states="none", words="none"):
    if selected_states == "none" or words == "none":
        return "Make sure you selected your state(s) AND submitted a topic of interest. Press the back button to retry."
    else:
        print("Calculating...")
        states = selected_states.replace(","," ")

        #Do some magic here to correspond states with senator
        #models = np.where(senators in model)
        #models is an array of the filenames of modles
        #word2ids is an array of the filenames of word2ids
        senators = []
        sen_state = []
        models = ["app/models/model_architecture_set_0.json", "app/models/model_architecture_set_1.json"]
        word2ids = ["app/models/word2id_set_0.json", "app/models/word2id_set_1.json"]
        weights = ['app/models/model_weights_set_0.h5', 'app/models/model_weights_set_1.h5']
        k = len(models)
        sentiments = []
        for i in range(k):
            (model, word2id) = model_setup(models[i], word2ids[i], weights[i])
            sentiments.append(predict_sentiment(words, model, word2id))
            print(i)

        table_results = Table().with_columns("Senators", senators, "State", sen_state, "Sentiment", sentiments)
        text_results = table_results.as_text()

        return render_template("results.html", selected_states=selected_states, words=words, result=text_results)

示例#2

0

显示文件

文件： game_filter.py 项目： mondal-alex/NBA4Q

def game_filter(csv_file):
    # Takes in a csv filepath of one of the EightThirtyFour data sets
    # and filters the data to games with a 10 or less point lead in
    # the last 6 minutes of the game.
    pbp = Table().read_table(csv_file)
    unique_games = pbp.group('GAME_ID').column(0)
    print(unique_games)
    last_quarter = pbp.where('PERIOD', predicates.are.equal_to(4))
    transformed_minutes = last_quarter.apply(time_string_to_number,
                                             'PCTIMESTRING')
    last_quarter_and_minutes = last_quarter.with_column(
        'TIME', transformed_minutes)
    between_six_and_seven = last_quarter_and_minutes.where(
        'TIME', predicates.are.below_or_equal_to(6.5))
    close_games = make_array()
    for game in unique_games:
        game_scores_only = between_six_and_seven.where(
            'GAME_ID',
            predicates.are.equal_to(game)).select('TIME', 'SCORE').where(
                'SCORE', predicates.are.not_equal_to('nan'))
        score = game_scores_only.row(0).item(1)
        t1, t2 = score.split('-')
        if abs(int(t1) - int(t2)) <= 10:
            close_games = np.append(close_games, game)
    return close_games

示例#3

0

显示文件

文件： data_processing.py 项目： yifanwu/b2

def sanitize_dataframe(df: Table):
    """Sanitize a DataFrame to prepare it for serialization.
    
    copied from the ipyvega project
    * Make a copy
    * Convert categoricals to strings.
    * Convert np.bool_ dtypes to Python bool objects
    * Convert np.int dtypes to Python int objects
    * Convert floats to objects and replace NaNs/infs with None.
    * Convert DateTime dtypes into appropriate string representations
    """
    import numpy as np

    if df is None:
        return None
        # raise InternalLogicalError("Cannot sanitize empty df")

    df = df.copy()

    def to_list_if_array(val):
        if isinstance(val, np.ndarray):
            return val.tolist()
        else:
            return val

    for col_name in df.labels:
        dtype = df.column(col_name).dtype
        if str(dtype) == 'category':
            # XXXX: work around bug in to_json for categorical types
            # https://github.com/pydata/pandas/issues/10778
            df[col_name] = df[col_name].astype(str)
        elif str(dtype) == 'bool':
            # convert numpy bools to objects; np.bool is not JSON serializable
            df[col_name] = df[col_name].astype(object)
        elif np.issubdtype(dtype, np.integer):
            # convert integers to objects; np.int is not JSON serializable
            df[col_name] = df[col_name].astype(object)
        elif np.issubdtype(dtype, np.floating):
            # For floats, convert to Python float: np.float is not JSON serializable
            # Also convert NaN/inf values to null, as they are not JSON serializable
            col = df[col_name]
            bad_values = np.isnan(col) | np.isinf(col)
            df[col_name] = np.where(bad_values, None, col).astype(object)
            # col.astype(object)[~bad_values]= None
        elif str(dtype).startswith('datetime'):
            # Convert datetimes to strings
            # astype(str) will choose the appropriate resolution
            new_column = df[col_name].astype(str)
            new_column[new_column == 'NaT'] = ''
            df[col_name] = new_column
        elif dtype == object:
            # Convert numpy arrays saved as objects to lists
            # Arrays are not JSON serializable
            col = np.vectorize(to_list_if_array)(df[col_name])
            df[col_name] = np.where(notnull(col), col, None).astype(object)
    return df

示例#4

0

显示文件

文件： wcs_table_helper.py 项目： jasonsjiang/World-Color-Survey

def loadChipTables(*chipData):
    '''
    Loads the color chip information into two datascience Tables.
    
    Args:
        chipData (tuple or dict): a variable length argument consisting of 
            either a tuple with a dictionary mapping each Munsell coordinate 
            to its WCS chip number, and a dictionary mapping each WCS chip 
            number to its Munsell coordinates, or those dictionaries in 
            that order
            
    Returns:
        the same information in two datascience Table objects
        
    Examples:
        >>> munsellInfo = readChipData('./WCS_data_core/chip.txt')
        >>> coordToIndexTable, indexToCoordTable = loadChipTables(munsellInfo)
        ...
        >>> coord_to_index, index_to_coord = readChipData('./WCS_data_core/chip.txt')
        >>> coordToIndexTable, indexToCoordTable = loadChipTables(coord_to_index, 
                                                                  index_to_coord)
    '''
    if len(chipData) == 0:
        print('Warning: No data provided')
        return Table(), Table()
    if len(chipData) == 1:
        chipData = chipData[0]
    elif len(chipData) > 2:
        raise ValueError('chipData can accept at most two arguments')
    # coordinate to index
    # get the dictionaries
    coord_to_index = chipData[0]
    # sort the keys and get the sorted values
    sorted_keys, sorted_values = _get_sorted_keys_and_values(coord_to_index)

    coordToIndexTable = Table().with_columns('Coordinate', sorted_keys,
                                             'Index', sorted_values)

    # index to lightness, hue coordinate
    # get the dictionaries
    index_to_coord = chipData[1]
    # sort the keys and get the sorted values
    sorted_keys, sorted_values = _get_sorted_keys_and_values(index_to_coord)
    # unzip the lightness and hue values
    unzipped_values = [v for v in zip(*sorted_values)]
    # get the lightness and hue values
    lightness, hue = unzipped_values

    indexToCoordTable = Table().with_columns('Coordinate', sorted_keys,
                                             'Lightness', lightness, 'Hue',
                                             hue)

    return coordToIndexTable, indexToCoordTable

示例#5

0

显示文件

def transition_probability(self, values):
    """
    For a multivariate probability distribution, assigns transition
    probabilities, ie P(Y | X).

    Parameters
    ----------
    values : List or Array
        Values that must correspond to the domain in the same order

    Returns
    -------
    Table
        A probability distribution with those probabilities
    """
    if any(np.array(values) < 0):
        warnings.warn('Probability cannot be negative')

    states = self.column(0)

    self = multi_domain(Table(), 'Source', states, 'Target', states)

    return_table = self.with_column('Probability', values)
    _transition_warn(return_table)
    return return_table

示例#6

0

显示文件

文件： ml_table.py 项目： deculler/MachineLearningTables

 def summary(self):
     b0, bs = self.params
     sum_tbl = Table().with_columns([
         ("Param", ['Intercept'] + self.input_labels),
         ("Coeffient", [b0] + list(bs)),
     ])
     return sum_tbl

示例#7

0

显示文件

def fast_run_expectancy(retro, re):
    TABLE_FLAG = False
    if isinstance(retro, _Table):
        TABLE_FLAG = True
        retro = retro.to_df()
        re = re.to_df()

    re = re.set_index(['Outs', 'Start_Bases'])

    # Build current out-runner states
    idx = list(zip(retro['Outs'], retro['Start_Bases']))
    # Extract run potentials
    retro['Run_Expectancy'] = re.loc[idx].values

    next_outs = retro['Outs'] + retro['Event_Outs']
    # Build next out-runner states
    idx = list(zip(next_outs, retro['End_Bases']))
    # Extract run potentials
    retro['Run_Expectancy_Next'] = re.loc[idx].values

    # When the inning ends, there are 3 outs.  That is not in the run
    # expectancy matrix so inning ending plate appearances will have an NA
    # value here.  We fill those with 0.
    retro['Run_Expectancy_Next'].fillna(0, inplace=True)

    return _Table.from_df(retro) if TABLE_FLAG else retro

示例#8

0

显示文件

文件： wcs_table_helper.py 项目： jasonsjiang/World-Color-Survey

def loadSpeakerTable(speakerData):
    '''
    Loads the speaker info into a datascience Table.
    
    Args:
        speakerData (dict): a dictionary mapping each language 
            to a list of its speakers' age and gender information
            
    Returns:
        the same information in a datascience Table
    '''
    # create lsits for the information
    language = []
    speaker = []
    age = []
    gender = []

    # loop through the languages
    for lang in speakerData:
        # loop through the speakers
        for spkr in speakerData[lang]:
            # add to the lists
            language.append(lang)
            speaker.append(spkr)
            # get the age and gender
            a, g = speakerData[lang][spkr][0]
            age.append(a)
            gender.append(g)

    # turn into a table
    speakerTable = Table().with_columns('Language', language, 'Speaker',
                                        speaker, 'Age', age, 'Gender', gender)
    return speakerTable

示例#9

0

显示文件

def most_common_lineup_position(retro):
    TABLE_FLAG = False
    if isinstance(retro, _Table):
        TABLE_FLAG = True
        retro = retro.to_df()

    # Order of operations:
    # 1. Get PA counts
    # 2. Turn Lineup_Order into a column
    # 3. Rename column to PA
    # 4. Sort on PA in descending order
    lineup_pos = retro.groupby(['Batter_ID', 'Lineup_Order'])['Inning'].\
        count().\
        reset_index(level='Lineup_Order').\
        rename(columns={'Inning': 'PA'}).\
        sort_values('PA', ascending=False)

    # Duplicates indicate other positions.  By keeping first, we keep the most
    # common due to the sorting
    most_common = ~lineup_pos.index.duplicated(keep='first')
    lineup_pos = lineup_pos.loc[most_common, ['Lineup_Order']].sort_index()

    if TABLE_FLAG:
        return _Table.from_df(lineup_pos.reset_index())
    else:
        return lineup_pos

示例#10

0

显示文件

文件： markov_chains.py 项目： zjminglead/prob140

    def steady_state(self):
        """
        Finds the stationary distribution of the Markov Chain.

        Returns
        -------
        Table
            Distribution.

        Examples
        --------
        >>> states = ['A', 'B']
        >>> transition_matrix = np.array([[0.1, 0.9],
        ...                               [0.8, 0.2]])
        >>> mc = MarkovChain.from_matrix(states, transition_matrix)
        >>> mc.steady_state()
        Value | Probability
        A     | 0.666667
        B     | 0.333333
        """
        # Steady state is the left eigenvector that corresponds to eigenvalue=1.
        w, vl = scipy.linalg.eig(self.matrix, left=True, right=False)

        # Find index of eigenvalue = 1.
        index = np.isclose(w, 1)

        eigenvector = np.real(vl[:, index])[:, 0]
        probabilities = eigenvector / sum(eigenvector)

        # Zero out floating poing errors that are negative.
        indices = np.logical_and(np.isclose(probabilities, 0),
                                 probabilities < 0)
        probabilities[indices] = 0
        return Table().values(self.states).probabilities(probabilities)

示例#11

0

显示文件

def transition_function(self, pfunc):
    """
    Assigns transition probabilities to a Distribution via a probability
    function. The probability function is applied to each value of the
    domain. Must have domain values in the first column first.

    Parameters
    ----------
    pfunc : variate function
        Conditional probability function of the distribution ( P(Y | X))

    Returns
    -------
    Table
        Table with those probabilities in its final column

    """
    states = self.column(0)

    self = multi_domain(Table(), 'Source', states, 'Target', states)

    domain_names = self.labels
    values = np.array(self.apply(pfunc, *domain_names)).astype(float)
    if any(values < 0):
        warnings.warn('Probability cannot be negative')
    conditioned_var = self.labels[0]
    all_other_vars = ','.join(self.labels[1:])
    return_table = self.with_column('P({} | {})'.format(
        all_other_vars, conditioned_var), values)
    _transition_warn(return_table)
    return return_table

示例#12

0

显示文件

文件： testing_performance.py 项目： chrismarkella/scripts

def test(data_structures: List[Union[List[str], Deque[str]]],
         sample_sizes: List[int]) -> Table:
    """
  Return the test results in a Table.
  """
    append_times, pop_times = [], []
    data_structure_labels = []
    elements = []

    for N in sample_sizes:
        print('%d elements' % (N))
        print('-' * 20)
        for ds in data_structures:
            append_time, pop_time = performance(ds, N)
            output(append_time, pop_time, ds)

            append_times.append(append_time)
            pop_times.append(pop_time)
            data_structure_labels.append(type_name(ds))
            elements.append(N)
            print()
        print()
        # print()

    table = Table().with_columns('Data structure', data_structure_labels,
                                 'Elements', elements, 'Append time(nano sec)',
                                 append_times, 'Pop time(nano sec)', pop_times)
    return table

示例#13

0

显示文件

文件： datascience_helpers.py 项目： ds-connectors/LS88

def fill_null(table, fill_column=None, fill_value=None, fill_method=None):
    TABLE_FLAG = False
    if isinstance(table, _Table):
        TABLE_FLAG = True
        table = table.to_df()
    data = table[fill_column] if fill_column is not None else table
    data = data.fillna(value=fill_value, method=fill_method)
    return _Table.from_df(data) if TABLE_FLAG else data

示例#14

0

显示文件

文件： markov_chains.py 项目： ferpart/metodos_cuantitativos

 def __init__(self, num_notes=20, starting_note="0"):
     """ __init__ """
     self.res_path = str(Path("components/res/"))
     self.data_table = Table.read_table(self.res_path +
                                        "/probability_table.csv")
     self.notes = self.data_table.column("octave")
     self.num_notes = num_notes
     self.starting_note = starting_note

示例#15

0

显示文件

def emp_dist(values):
    """
    Takes an array of values and returns an empirical distribution

    Parameters
    ----------
    values : array
        Array of values that will be grouped by the distribution

    Returns
    -------
    Table
        A distribution

    Examples
    --------
    >>> x = make_array(1, 1, 1, 1, 1, 2, 3, 3, 3, 4)
    >>> emp_dist(x)
    Value | Proportion
    1     | 0.5
    2     | 0.1
    3     | 0.3
    4     | 0.1
    """

    total = len(values)

    position_counts = Table().with_column('position', values).group(0)
    new_dist = Table().values(position_counts.column(0))
    return new_dist.with_column(
        'Proportion',
        position_counts.column(1) / total
    )

示例#16

0

显示文件

文件： timetable.py 项目： vantuyls/covid19

    def transpose(cls, tbl, category_col, time_col='Day', time_less = None) :
        """Transpose a table with one column containing categories and remaining labels time stamps"""

        time_col_vals = [lbl for lbl in tbl.labels if not lbl == category_col]
        xtbl = Table().with_column(time_col, time_col_vals)
        vals = tbl.drop(category_col)
        for lbl, vals in zip(tbl[category_col], vals.rows) :
            xtbl[lbl] = vals
        return TimeTable.from_table(xtbl, time_col, time_less)

示例#17

0

显示文件

文件： ml_table.py 项目： deculler/MachineLearningTables

 def lm_fit(self, output_label, model_fun, x_column_or_label=None):
     if x_column_or_label is None:
         input_labels = [
             lbl for lbl in self.labels if not lbl == output_label
         ]
         f_values = [
             model_fun(*row) for row in self.select(input_labels).rows
         ]
         p = len(input_labels)
     else:
         f_values = model_fun(self._get_column(x_column_or_label))
         p = 1
     fit_tbl = Table(["Quantity", "Value"])
     return fit_tbl.with_rows([
         ("Residual standard error", self.RSE(output_label, f_values)),
         ("R^2", self.R2(output_label, f_values)),
         ("F-statistic", self.F_stat(output_label, f_values, p))
     ])

示例#18

0

显示文件

文件： b2.py 项目： yifanwu/b2

 def from_file(self, filepath_or_buffer, *args, **vargs):
     try:
         table = Table.read_table(filepath_or_buffer, *args, **vargs)
         df_name = find_name()
         return self.create_with_table_wrap(table, df_name)
     except FileNotFoundError:
         red_print(f"File {filepath_or_buffer} does not exist!")
     except UserError as err:
         red_print(err)

示例#19

0

显示文件

文件： timetable.py 项目： vantuyls/covid19

 def forecast_table(self, past, ahead, inc=1):
     """Project a TimeTable forward.  inc must match the interval"""
     last_time = self[self.time_column][-1]
     past_times = self[self.time_column][-past-1:-1]
     fore_time = np.arange(last_time + inc, last_time + inc + ahead, inc)
     def project(lbl):
         m, b = np.polyfit(past_times, self[lbl][-past-1:-1], 1)
         return [m*time + b for time in fore_time]
     xtbl = Table().with_columns([(self.time_column, fore_time)] + [(label, project(label)) for label in self.categories])
     return self.copy().append(xtbl)

示例#20

0

显示文件

文件： datasci.py 项目： shivamparikh/partisan

def datasci():
    cong_dict = [houseDictPerYear(y) for y in range(1990, 2018)]
    sen_dict = [senateDictPerYear(y) for y in range(1990, 2018)]
    house = Table().with_columns(
        "Year", np.arange(1990, 2018), "House Bi-Partisan",
        np.array([(x['100'] + x['95']) * 100 / x['total'] for x in cong_dict]),
        "House Non Partisan",
        np.array([x['nonpart'] * 100 / x['total'] for x in cong_dict]),
        "House Collaborative",
        np.array([x['together'] * 100 / x['total'] for x in cong_dict]))
    senate = Table().with_columns(
        "Year", np.arange(1990, 2018), "Senate Bi-Partisan",
        np.array([(x['100'] + x['95']) * 100 / x['total'] for x in sen_dict]),
        "Senate Non Partisan",
        np.array([x['nonpart'] * 100 / x['total'] for x in sen_dict]),
        "Senate Collaborative",
        np.array([x['together'] * 100 / x['total'] for x in sen_dict]))
    total = senate.join("Year", house)
    #print(total)
    #total.scatter("Year")
    plt.axis([1989, 2020, 0, 100])
    plt.plot(total.column("Year"),
             total.column("House Bi-Partisan"),
             'k',
             c='g',
             label="House Bi-Partisan")
    plt.plot(total.column("Year"),
             total.column("Senate Bi-Partisan"),
             'k',
             c='y',
             label="Senate Bi-Partisan")
    #     plt.plot(total.column("Year"), total.column("House Non Partisan"), '*', c='g', label="House Non Partisan")
    #     plt.plot(total.column("Year"), total.column("Senate Non Partisan"), '*', c='y', label="Senate Non Partisan")
    #     plt.plot(total.column("Year"), total.column("House Collaborative"), '.', c='g', label="House Collaboration")
    #     plt.plot(total.column("Year"), total.column("Senate Collaborative"), '.', c='y', label="Senate Collaboration")

    drawParties(plt, "both")
    #plt.legend(bbox_to_anchor=(0.5, -0.15))
    plt.legend(loc=4)
    plt.ylabel("Percentage of Total Votes")
    plt.xlabel("Years (1990-2017)")
    plt.savefig("data.png", dpi=400)
    plt.show()

示例#21

0

显示文件

文件： ml_table.py 项目： deculler/MachineLearningTables

 def Cor(self):
     """Create a correlation matrix of numeric columns as a table."""
     assert (self.num_rows > 0)
     num_labels = [
         lbl for lbl in self.labels if isinstance(self[lbl][0], Number)
     ]
     tbl = self.select(num_labels)
     Cor_tbl = Table().with_column("Param", num_labels)
     for lbl in num_labels:
         Cor_tbl[lbl] = [self.Cor_coef(lbl, xlbl) for xlbl in num_labels]
     return Cor_tbl

示例#22

0

显示文件

文件： timetable.py 项目： vantuyls/covid19

 def extend_table(self, ahead, inc=1):
     """Project a TimeTable forward from last interval.  inc must match the interval"""
     last_time = self[self.time_column][-1]
     fore_time = np.arange(last_time + inc, last_time + inc + ahead, inc)
     def project(lbl):
         b = self[lbl][-1]
         m = self[lbl][-1] - self[lbl][-2]
         return [m*(time+1)*inc + b for time in range(ahead)]
                                          
     xtbl = Table().with_columns([(self.time_column, fore_time)] + [(label, project(label)) for label in self.categories])
     return self.copy().append(xtbl)

示例#23

0

显示文件

文件： datascience_utils.py 项目： sjunqua/data8-sports-materials

def get_first_from_group(table, groupby):
    TABLE_FLAG = False
    if isinstance(table, _Table):
        TABLE_FLAG = True
        table = table.to_df()
    out = table.sort_values(groupby).\
        drop_duplicates(subset=groupby, keep='first')
    if TABLE_FLAG:
        return _Table.from_df(out)
    else:
        return out

示例#24

0

显示文件

文件： dbTable.py 项目： dibyaghosh/dbTable

 def to_table(self,data=None):
     """
     Loads the table into memory, and converts it into a UCB DataScience Table
     Check out http://data8.org for more information
     """
     if not _berktb:
         print("You don't have the Berkeley DataScience library installed")
         return
     if not data:
         data = self.db.c.execute(self._formulate()).fetchall()
     cols = list(self.columns.keys())
     return Table.from_rows(data,cols)

示例#25

0

显示文件

文件： ml_table.py 项目： deculler/MachineLearningTables

 def lm_summary_1d(self, y_column_or_label, x_label):
     b0, b1 = self.regression_1d_params(y_column_or_label, x_label)
     r_model = lambda x: b0 + x * b1
     SE_b0, SE_b1 = self.SE_1d_params(y_column_or_label, x_label, r_model)
     sum_tbl = Table().with_column('Param', ['intercept', x_label])
     sum_tbl['Coefficient'] = [b0, b1]
     sum_tbl['Std Error'] = (SE_b0, SE_b1)
     sum_tbl['t-statistic'] = np.array([b0, b1]) / sum_tbl['Std Error']
     sum_tbl['95% CI'] = [(b0 - 2 * SE_b0, b0 + 2 * SE_b0),
                          (b1 - 2 * SE_b1, b1 + 2 * SE_b1)]
     sum_tbl['99% CI'] = [(b0 - 3 * SE_b0, b0 + 3 * SE_b0),
                          (b1 - 3 * SE_b1, b1 + 3 * SE_b1)]
     return sum_tbl

示例#26

0

显示文件

def merge(t1, t2, on, how='outer', fillna=True):
    DS_FLAG = False
    if isinstance(t1, _Table):
        t1 = t1.to_df()
        DS_FLAG = True
    if isinstance(t2, _Table):
        t2 = t2.to_df()
    full_t = _pd.merge(t1, t2, how=how, left_on=on, right_on=on)
    if fillna:
        full_t.fillna(0, inplace=True)
    if DS_FLAG:
        return _Table.from_df(full_t)
    else:
        return full_t

示例#27

0

显示文件

def merge(t1, t2, on, how='outer', fillna=True):
    import pandas as pd
    from datascience import Table
    DS_FLAG = False
    if isinstance(t1, Table):
        t1 = t1.to_df()
        DS_FLAG = True
    if isinstance(t2, Table):
        t2 = t2.to_df()
    full_t = pd.merge(t1, t2, how=how, left_on=on, right_on=on)
    if fillna:
        full_t.fillna(0, inplace=True)
    if DS_FLAG:
        return Table.from_df(full_t)
    else:
        return full_t

示例#28

0

显示文件

文件： ml_table.py 项目： deculler/MachineLearningTables

 def summary(self):
     b0, bs = self.params
     sum_tbl = Table().with_columns([
         ("Param", ['Intercept'] + self.input_labels),
         ("Coefficient", [b0] + list(bs)),
     ])
     sum_tbl['Std Error'] = self.source_table.SE_params(
         self.output_label, (b0, bs))
     sum_tbl['t-statistic'] = sum_tbl['Coefficient'] / sum_tbl['Std Error']
     sum_tbl['95% CI'] = [
         (b - 2 * se, b + 2 * se)
         for b, se in zip(sum_tbl['Coefficient'], sum_tbl['Std Error'])
     ]
     sum_tbl['99% CI'] = [
         (b - 3 * se, b + 3 * se)
         for b, se in zip(sum_tbl['Coefficient'], sum_tbl['Std Error'])
     ]
     return sum_tbl

示例#29

0

显示文件

文件： ml_table.py 项目： deculler/MachineLearningTables

 def lm_summary(self, output_label):
     intercept, slopes = self.regression_params(output_label)
     mdl = ML_Table._make_model(intercept, slopes)
     input_labels = [lbl for lbl in self.labels if not lbl == output_label]
     sum_tbl = Table().with_column('Param', ['Intercept'] + input_labels)
     sum_tbl['Coefficient'] = [intercept] + list(slopes)
     sum_tbl['Std Error'] = self.SE_params(output_label,
                                           (intercept, slopes))
     sum_tbl['t-statistic'] = sum_tbl['Coefficient'] / sum_tbl['Std Error']
     sum_tbl['95% CI'] = [
         (b - 2 * se, b + 2 * se)
         for b, se in zip(sum_tbl['Coefficient'], sum_tbl['Std Error'])
     ]
     sum_tbl['99% CI'] = [
         (b - 3 * se, b + 3 * se)
         for b, se in zip(sum_tbl['Coefficient'], sum_tbl['Std Error'])
     ]
     return sum_tbl

示例#30

0

显示文件

def event(self, x):
    """
    Shows the probability that distribution takes on value x or list of
    values x.

    Parameters
    ----------
    x : float or Iterable or function
        An event represented either as an indicator function or a specific value in the domain or a
        subset of the domain

    Returns
    -------
    Table
        Shows the probabilities of each value in the event

    Examples
    --------
    >>> dist = Table().values([1 2, 3, 4]).probabilities([1/4, 1/4, 1/4, 1/4])
    >>> dist.event(2)
    Domain | Probability
    2      | 0.25
    >>> dist.event([2,3])
    Domain | Probability
    2      | 0.25
    3      | 0.25
    """
    check_valid_probability_table(self)
    if callable(x):
        t = self.where(self.apply(x, 0))
        print('P(Event) = {0}'.format(sum(t.column(1))))
        return t


    if not isinstance(x, collections.Iterable):
        x = [x]
    probabilities = [self.prob_event(k) for k in x]
    print('P(Event) = {0}'.format(sum(probabilities)))
    return Table().with_columns('Outcome', x, 'Probability', probabilities)

示例#31

0

显示文件

文件： wcs_table_helper.py 项目： jasonsjiang/World-Color-Survey

def loadNamingTable(namingData):
    '''
    Loads the naming data into a datascience Table.
    
    Args:
        namingData (dict): a hierarchical dictionary mapping each language 
            to each speaker's naming data, which maps each color index to 
            their given color term
            
    Returns:
        the same information in a datascience Table
    '''
    # create lists for the information
    language = []
    speaker = []
    index = []
    color_term = []

    # loop through the languages
    for lang in namingData:
        # loop through the speakers
        for spkr in namingData[lang]:
            # loop through the color index
            for i in namingData[lang][spkr]:
                # get the color term
                term = namingData[lang][spkr][i]

                # add to the lists
                language.append(lang)
                speaker.append(spkr)
                index.append(i)
                color_term.append(term)

    # turn into a table
    namingTable = Table().with_columns('Language', language, 'Speaker',
                                       speaker, 'Index', index, 'Term',
                                       color_term)
    return namingTable

示例#32

0

显示文件

文件： server.py 项目： dibyaghosh/CalRF

from flask import Flask, jsonify,request

from datascience import Table
from intervaltree import Interval, IntervalTree

prefixcode = ''

t = Table.read_table('CourseWhere.csv')
trees = {day:IntervalTree() for day in ['M','T','W','R','F','S']}
for row in t.rows:
    for day in row[5]:
        trees[day][row[6]:row[7]] = row

room_table = t.group('Building',collect=set).select(['Building','Facility set'])
room_list = {building.lower():rooms for building,rooms in zip(room_table['Building'],room_table['Facility set'])}


app = Flask(__name__)

def class_to_dict(clas):
    convert = lambda x: x if isinstance(x,str) else int(x)
    return {label:convert(v) for label,v in zip(t.column_labels,clas)}

@app.route(prefixcode+'/rooms/<building>/<room>')
def get_room(building,room):
    weekday = request.args.get('day', 'M')
    if weekday not in "MTWRFS":
        weekday = 'M'
    values = [class_to_dict(v) for v in t.where('Building',building).where('Room',room).rows]
    values = [v for v in values if weekday in v['Days']]
    values = sorted(values,key=lambda x:x['Start'])