Exemplo n.º 1
    def load_dataset(self, base_path):
        r"""Read data matrix and labels vector from files.

        base_path : string
            The base path relative to which files are stored.

        data : ndarray
            The :math:`n \\times p` data matrix .

        labels : ndarray
            The :math:`n`-dimensional vector containing labels.

        feature_names : list
            The list containing the names of the features

        data_path = os.path.join(base_path, self.get_file('data'))
        labels_path = os.path.join(base_path, self.get_file('labels'))

        # DATA
        poslab = self._dataset_options.pop('positive_label', None)
        samples_on = self._dataset_options.pop('samples_on', 'col')
        pd_data = pd.read_excel(data_path, **self._dataset_options)

        if samples_on == 'col':
            pd_data = pd_data.transpose()

        # Retrieve feature names from the column names of the DataFrame
        feature_names = pd_data.columns
        if feature_names.shape[0] != np.unique(feature_names).shape[0]:
            import sys
            sys.stderr.write("Feature names specified are not unique. "
                             "Assigning a unique label.\n")
            feature_names_u = np.array(feature_names, dtype=str)
            for it, _ in enumerate(feature_names_u):
                feature_names_u[it] += '_{}'.format(it)
                       np.stack((np.array(feature_names), feature_names_u),

        # if not self.get_option('data_preprocessing') is None:
        # ### TODO Check!!!
        #     # if rank == 0:
        #         # print("Preprocessing data...")
        #     self.get_option('data_preprocessing').load_data(pd_data)
        #     pd_data = self.get_option('data_preprocessing').process()

        # LABELS
        # Before loading labels, remove parameters that were likely specified
        # for data only.
        self._dataset_options.pop('usecols', None)
        pd_labels = pd.read_xls(labels_path, **self._dataset_options)
        if poslab is None:
            uv = np.sort(np.unique(pd_labels.as_matrix()))
            if len(uv) != 2:
                raise Exception("More than two unique values in the labels "
            poslab = uv[0]

        def _to_plus_minus(x):
            """Convert labels to -1 / +1."""
            return +1. if x == poslab else -1.

        # Convert labels to -1/+1
        pd_labels_mapped = pd_labels.applymap(_to_plus_minus)

        data = pd_data.as_matrix()
        labels = pd_labels_mapped.as_matrix().ravel()
        if data.shape[0] != labels.shape[0]:
            raise ValueError("The number of samples in data do not correspond "
                             "to the number of samples in labels.")
        return data, labels, feature_names
Exemplo n.º 2
#idx1 = np.repeat(time, N, axis=1)



dfPoissTrial = pd.DataFrame(data=poissTrial)


from scipy.stats import nbinom
import matplotlib.pyplot as plt

result_NB_20k = pd.read_xls('results/NB_20kpool.xlsx');

result_NB_20k['r'], result_NB_20k['p'] = convert_params(result_NB_20k['alpha'],result_NB_20k['mu'])

from scipy.stats import nbinom
import matplotlib.pyplot as plt

x = np.arange(0, 17, 1)

out_r, out_p = convert_params(out_mu, out_alpha)

for i in np.arange(0,24,6):

    r = out_r[i]; p = out_p[i];
    plt.plot(x, nbinom.pmf(x, r, p), label=i)
Exemplo n.º 3
    def load_dataset(self, base_path):
        r"""Read data matrix and labels vector from files.

        base_path : string
            The base path relative to which files are stored.

        data : ndarray
            The :math:`n \\times p` data matrix .

        labels : ndarray
            The :math:`n`-dimensional vector containing labels.

        feature_names : list
            The list containing the names of the features

        data_path = os.path.join(base_path, self.get_file('data'))
        labels_path = os.path.join(base_path, self.get_file('labels'))

        # DATA
        poslab = self._dataset_options.pop('positive_label', None)
        samples_on = self._dataset_options.pop('samples_on', 'col')
        pd_data = pd.read_excel(data_path, **self._dataset_options)

        if samples_on == 'col':
            pd_data = pd_data.transpose()

        # Retrieve feature names from the column names of the DataFrame
        feature_names = pd_data.columns
        if feature_names.shape[0] != np.unique(feature_names).shape[0]:
            import sys
            sys.stderr.write("Feature names specified are not unique. "
                             "Assigning a unique label.\n")
            feature_names_u = np.array(feature_names, dtype=str)
            for it, _ in enumerate(feature_names_u):
                feature_names_u[it] += '_{}'.format(it)
                                 feature_names_u), axis=-1),
                       delimiter=",", fmt='%s')

        # if not self.get_option('data_preprocessing') is None:
        # ### TODO Check!!!
        #     # if rank == 0:
        #         # print("Preprocessing data...")
        #     self.get_option('data_preprocessing').load_data(pd_data)
        #     pd_data = self.get_option('data_preprocessing').process()

        # LABELS
        # Before loading labels, remove parameters that were likely specified
        # for data only.
        self._dataset_options.pop('usecols', None)
        pd_labels = pd.read_xls(labels_path, **self._dataset_options)
        if poslab is None:
            uv = np.sort(np.unique(pd_labels.as_matrix()))
            if len(uv) != 2:
                raise Exception("More than two unique values in the labels "
            poslab = uv[0]

        def _to_plus_minus(x):
            """Convert labels to -1 / +1."""
            return +1. if x == poslab else -1.

        # Convert labels to -1/+1
        pd_labels_mapped = pd_labels.applymap(_to_plus_minus)

        data = pd_data.as_matrix()
        labels = pd_labels_mapped.as_matrix().ravel()
        if data.shape[0] != labels.shape[0]:
            raise ValueError("The number of samples in data do not correspond "
                             "to the number of samples in labels.")
        return data, labels, feature_names
Exemplo n.º 4
import pandas as pd
reviews = pd.read_xls("sanmigue.xls", index_col=0)
import seaborn as sns
filelist = os.listdir(filedirectory)

pathlist = []
appendeddf = []

print filelist

#creates a list of paths to .csv files in the filelist
for files in filelist:
    if files.endswith(".xls"):
        pathlist.append(os.path.join(filedirectory, files))

print type(pathlist)
print pathlist

#reads each .csv file on the filelist into a pandas dataframe and adds each dataframe together
for paths in pathlist:
    xlscontents = pd.read_xls(paths)

#concatinates the dataframes into one single dataframe
outputdf = pd.concat(appendeddf)

outputdf.to_csv(filedirectory + '\output.csv')
#for paths in pathlist:

#def csv_reader(file_obj)
#    with open(file_obj, 'r') as userfile:
#        userFileReader = csv.reader(userfile)
#        for row in userFileReader:
#            print row
Exemplo n.º 6
#import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
#import numpy as np

test = pd.read_xls("~/Documents/Gujarati data sets/Excel/Table6_1.xls")