Пример #1
0
def read_deep(deep_path):
    if (type(deep_path) == list):
        datasets = []
        for deep_i in deep_path:
            datasets += feats.read(deep_i)
        return datasets
    return feats.read(deep_path)
Пример #2
0
def binary_selection(in_path):
    common_path,deep_path=in_path   
    deep_data=feats.read(deep_path)
    deep_data=[binary_helper(i,data_i) 
        for i,data_i in enumerate(deep_data)]
    common=feats.read(common_path)[0]
    datasets=[ common+deep_i for deep_i in deep_data]
    return dataset
Пример #3
0
def read_dataset(common_path, deep_path):
    if (not common_path):
        return read_deep(deep_path)  #feats.read(deep_path)
    if (not deep_path):
        return feats.read(common_path)
    common_data = feats.read(common_path)[0]
    deep_data = read_deep(deep_path)
    datasets = [common_data + data_i for data_i in deep_data]
    return datasets
Пример #4
0
def combined_dataset(common_path, deep_path, sub_datasets=False):
    if (not common_path):
        return feats.read(deep_path)
    if (not deep_path):
        return feats.read(common_path)
    common_data = feats.read(common_path)[0]
    deep_data = feats.read(deep_path)
    datasets = [common_data + data_i for data_i in deep_data]
    if (sub_datasets):
        return datasets, common_data, deep_data
    return datasets
Пример #5
0
def exp(common_path, deep_path):
    common = feats.read(common_path)[0]
    common = make_clusters(common.split()[0])
    binary = [
        make_clusters(binary_i.split()[0])
        for binary_i in feats.read(deep_path)
    ]
    print("Common:")
    base_quality = common.quality()
    print(base_quality)
    print("Binary:")
    for i, binary_i in enumerate(binary):
        print(i)
        print((binary_i.quality() - base_quality) > 0)
Пример #6
0
def concat_dataset(in_path):
    if (type(in_path) == tuple):
        common_path, deep_path = in_path
        #        raise Exception(type(common_path))
        if (type(common_path) == list):
            common_data = feats.read_unified(common_path)
        else:
            common_data = feats.read(common_path)
#            return multi_dataset(common_path,deep_path)
#        return combined_dataset(common_path,deep_path)
        deep_data = feats.read(deep_path)
        datasets = [common_data + data_i for data_i in deep_data]
        return datasets
    return feats.read(in_path)
Пример #7
0
def total_selection(in_path):
    common_path,deep_path=in_path   
    deep_data=feats.read(deep_path)
    deep_train=[deep_i.split()[0]
                    for deep_i in deep_data]
    info=[np.median(train_i.mutual())
            for train_i in deep_train]
    info=(info-np.mean(info))/np.std(info)
    print(info)
    common=feats.read(common_path)[0]
    datasets=[ common+data_i
                for i,data_i in enumerate(deep_data)
                    if(info[i]>-1)]
    return datasetst
Пример #8
0
def read_votes(in_path):
    data = feats.read(in_path)
    votes = []
    y_true = data[0].get_labels()
    for data_i in data:
        votes.append([y_true, data_i.X, data_i.info])
    return votes
Пример #9
0
def read_datasets(in_path):
    if (type(in_path) == tuple):
        common_path, deep_path = in_path
        if (type(common_path) == list):
            return multi_dataset(common_path, deep_path)
        return combined_dataset(common_path, deep_path)
    return feats.read(in_path)
Пример #10
0
def split_plot(in_path):
    dataset = feats.read(in_path)[0]

    def helper(i, y_i):
        name_i = dataset.info[i]
        person_i = int(name_i.split("_")[1])
        return (person_i % 2)

    tsne_plot(dataset, show=True, color_helper=helper)
Пример #11
0
def reduce_cross(in_path, step=50):
    feat_dict = feats.read(in_path)[0]
    n = int(feat_dict.dim()[0] / step)
    acc, dataset = [], []
    for i in range(n):
        feat_i = reduction.reduce(feat_dict, (i + 1) * step)
        dataset.append(feat_i)
        acc.append(rename.cross_validate(feat_i))
        print(acc)
    k = np.argmax(acc)
    return ((k + 1) * step)
Пример #12
0
def tsne_plot(in_path, show=True, color_helper="cat", names=False):
    feat_dataset = feats.read(in_path)[0].split()[1]
    tsne = manifold.TSNE(n_components=2, perplexity=30)
    X, y, names = feat_dataset.as_dataset()
    X = tsne.fit_transform(X)
    color_helper = lambda i, y_i: y_i
    return plot_embedding(X,
                          y,
                          title="tsne",
                          color_helper=color_helper,
                          show=show,
                          names=names)
Пример #13
0
def random_cat(feat_dict):
    if (type(feat_dict) == str):
        feat_dict = feats.read(feat_dict)[0]
    by_cat = defaultdict(lambda: [])
    for name_i in feat_dict.keys():
        by_cat[name_i.get_cat()].append(name_i)
    rename = {}
    for cat_i, names_i in by_cat.items():
        random.shuffle(names_i)
        for j, name_j in enumerate(names_i):
            new_name_j = "%d_%d_%d" % (name_j.get_cat() + 1, j % 2, j)
            rename[name_j] = new_name_j
    return rename
Пример #14
0
def visual(in_path):
    dataset = feats.read(in_path)[0]
    result = inliners.knn.get_detector(dataset, k=5, as_dict=True)

    def helper(i, y_i):
        name_i = dataset.info[i]
        person_i = int(name_i.split("_")[1])
        in_test = ((person_i % 2) == 0)
        if (in_test):
            return result[name_i] + 1
        return in_test

    reduction.tsne_plot(dataset, show=True, color_helper=helper)
Пример #15
0
def train_model(data,binary=False,clf_type="LR",selector=None):
    if(type(data)==str):	
        data=feats.read(data)[0]
    data.norm()
    print(data.dim())
    print(len(data))
    train,test=data.split(selector)
    model=make_model(train,clf_type)
    X_test,y_true=test.get_X(),test.get_labels()
    if(binary):
        y_pred=model.predict(X_test)
    else:
        y_pred=model.predict_proba(X_test)
    return Result(y_true,y_pred,test.names())
Пример #16
0
def train_model(data, binary=False, clf_type="LR", acc_only=False):
    if (type(data) == str):
        data = feats.read(data)[0]
    data.norm()
    train, test = data.split()
    model = learn.clf.get_cls(clf_type)
    model.fit(train.X, train.get_labels())
    y_true = test.get_labels()
    if (binary):
        y_pred = model.predict(test.X)
    else:
        y_pred = model.predict_proba(test.X)
    if (acc_only):
        return accuracy_score(y_true, y_pred)
    else:
        return y_true, y_pred, test.info
Пример #17
0
def tsne_plot(in_path, show=True, color_helper="cat", names=False):
    feat_dataset = feats.read(in_path)[0] if (type(in_path)
                                              == str) else in_path
    feat_dataset = feat_dataset.split()[1]
    tsne = manifold.TSNE(n_components=2,
                         perplexity=30)  #init='pca', random_state=0)
    X = tsne.fit_transform(feat_dataset.X)
    y = feat_dataset.get_labels()
    names = feat_dataset.info if (names) else None
    if (type(color_helper) == str or type(color_helper) == tuple):
        color_helper = get_colors_helper(feat_dataset.info, color_helper)
    return plot_embedding(X,
                          y,
                          title="tsne",
                          color_helper=color_helper,
                          show=show,
                          names=names)
Пример #18
0
def separ(in_path):
    dataset = feats.read(in_path)[0]
    train, test = dataset.split()
    clusters = make_clusters(test)
    print(clusters.quality())