Exemplo n.º 1
0
def open_csv(path,
             delimiter=',',
             header=True,
             col_names=None,
             parse_datetimes=[]):
    """Creates a structured array from a local .csv file

    Parameters
    ----------
    path : str
        path of the csv file
    delimiter : str
        Character used to delimit csv fields
    header : bool
        If True, assumes the first line of the csv has column names
    col_names : list of str or None
        If header is False, this list will be used for column names
    parse_datetimes : list of col names
        Columns that should be interpreted as datetimes

    Returns
    -------
    numpy.ndarray
        structured array corresponding to the csv

    If header is False and col_names is None, diogenes will assign
    arbitrary column names
    """

    with open(path, 'rU') as fin:
        return open_csv_as_sa(fin,
                              delimiter,
                              header,
                              col_names,
                              parse_datetimes=parse_datetimes)
Exemplo n.º 2
0
def open_csv_url(url, delimiter=',', header=True, col_names=None, parse_datetimes=[]):
    """Creates a structured array from a url

    Parameters
    ----------
    url : str
        url of the csv file
    delimiter : str
        Character used to delimit csv fields
    header : bool
        If True, assumes the first line of the csv has column names
    col_names : list of str or None
        If header is False, this list will be used for column names
    parse_datetimes : list of col names
        Columns that should be interpreted as datetimes


    Returns
    -------
    numpy.ndarray
        structured array corresponding to the csv

    If header is False and col_names is None, diogenes will assign
    arbitrary column names
    """
    fin = urllib2.urlopen(url)
    sa = open_csv_as_sa(fin, delimiter, header, col_names, 
                        parse_datetimes=parse_datetimes)
    fin.close()
    return sa
Exemplo n.º 3
0
def open_csv(path, delimiter=',', header=True, col_names=None, parse_datetimes=[]):
    """Creates a structured array from a local .csv file

    Parameters
    ----------
    path : str
        path of the csv file
    delimiter : str
        Character used to delimit csv fields
    header : bool
        If True, assumes the first line of the csv has column names
    col_names : list of str or None
        If header is False, this list will be used for column names
    parse_datetimes : list of col names
        Columns that should be interpreted as datetimes

    Returns
    -------
    numpy.ndarray
        structured array corresponding to the csv

    If header is False and col_names is None, diogenes will assign
    arbitrary column names
    """

    with open(path, 'rU') as fin:
        return open_csv_as_sa(fin, delimiter, header, col_names, 
                              parse_datetimes=parse_datetimes)
Exemplo n.º 4
0
def run_csv(fin, uid_feature, label_feature, clfs=DBG_std_clfs):
    """ Turn a CSV into an Experiment then turn the Experiment into models"""

    sa = open_csv_as_sa(fin)
    labels = sa[label_feature]
    M = remove_cols(sa, label_feature)
    exp = Experiment(M, labels, clfs=clfs)
    register_exp(exp, uid_feature)
Exemplo n.º 5
0
def run_csv(fin, uid_feature, label_feature, clfs=DBG_std_clfs):
    """ Turn a CSV into an Experiment then turn the Experiment into models"""

    sa = open_csv_as_sa(fin)
    labels = sa[label_feature]
    M = remove_cols(sa, label_feature)
    exp = Experiment(M, labels, clfs=clfs)
    register_exp(exp, uid_feature)
Exemplo n.º 6
0
def run_csv(fin, uid_feature, label_feature):
    sa = open_csv_as_sa(fin)
    labels = sa[label_feature]
    M = remove_cols(sa, label_feature)
    exp = Experiment(M, labels, clfs=DBG_std_clfs)
    exp.run()
    last_experiments[current_user.id] = exp
    clear_models(current_user.id)
    for trial in exp.trials:
        for subset in trial.runs:
            for run in subset:
                register_model(current_user.id, run.clf, dt.now(),
                               run.M[run.train_indices],
                               run.M[run.test_indices],
                               run.labels[run.train_indices],
                               run.labels[run.test_indices], run.col_names,
                               uid_feature)
Exemplo n.º 7
0
def run_csv(fin, uid_feature, label_feature):
    sa = open_csv_as_sa(fin)
    labels = sa[label_feature]
    M = remove_cols(sa, label_feature)
    exp = Experiment(M, labels, clfs=DBG_std_clfs)
    exp.run()
    last_experiments[current_user.id] = exp
    clear_models(current_user.id)
    for trial in exp.trials:
        for subset in trial.runs:
            for run in subset:
                register_model(
                        current_user.id,
                        run.clf, 
                        dt.now(),
                        run.M[run.train_indices], 
                        run.M[run.test_indices], 
                        run.labels[run.train_indices], 
                        run.labels[run.test_indices], 
                        run.col_names, 
                        uid_feature)
Exemplo n.º 8
0
def open_csv_url(url,
                 delimiter=',',
                 header=True,
                 col_names=None,
                 parse_datetimes=[]):
    """Creates a structured array from a url

    Parameters
    ----------
    url : str
        url of the csv file
    delimiter : str
        Character used to delimit csv fields
    header : bool
        If True, assumes the first line of the csv has column names
    col_names : list of str or None
        If header is False, this list will be used for column names
    parse_datetimes : list of col names
        Columns that should be interpreted as datetimes


    Returns
    -------
    numpy.ndarray
        structured array corresponding to the csv

    If header is False and col_names is None, diogenes will assign
    arbitrary column names
    """
    fin = urllib2.urlopen(url)
    sa = open_csv_as_sa(fin,
                        delimiter,
                        header,
                        col_names,
                        parse_datetimes=parse_datetimes)
    fin.close()
    return sa