Exemplo n.º 1
0
    def findrgbrange(self, image):
        RGB = []
        # Computing the knees for rgb
        if len(image.shape) >= 3:
            try:
                #print("***Colour RGB range detection***")
                for i in range(0, 3):
                    cdf3 = exposure.cumulative_distribution(image[:, :, i])
                    kneedle = KneeLocator(cdf3[1], cdf3[0], curve='convex', direction='increasing')
                    RGB.append(int(kneedle.knee))
                return RGB

            except ValueError:
                print(ValueError)
                print("Can't get the range")
                input("press any key to back to processing menu...")
                return False

        else:
            try:
                #print("***Grayscale  range detection***")
                cdf3 = exposure.cumulative_distribution(image)
                kneedle = KneeLocator(cdf3[1], cdf3[0], curve='convex', direction='increasing')
                RGB.append(int(kneedle.knee))
                return RGB

            except ValueError:
                print(ValueError)
                print("Can't get the range")
                input("press any key to back to processing menu...")
                return False
Exemplo n.º 2
0
def find_epsilon(feature_file, min_samp, kupa, number_of_features, dest):
    X = np.loadtxt(feature_file)
    X = StandardScaler().fit_transform(X)
    nearest_neighbors = NearestNeighbors(n_neighbors=min_samp + 1)
    neighbors = nearest_neighbors.fit(X)
    distances, indices = neighbors.kneighbors(X)
    distances = np.sort(distances[:, min_samp], axis=0)
    i = np.arange(len(distances))
    knee = KneeLocator(i,
                       distances,
                       S=1,
                       curve='convex',
                       direction='increasing',
                       interp_method='polynomial')
    epsilon = distances[knee.knee]
    fig = plt.figure(figsize=(5, 5))
    knee.plot_knee()
    plt.title(
        f"Elbow for {kupa}\nMfcc-{number_of_features}-features\nEpsilon={epsilon}"
    )
    plt.xlabel("Points")
    plt.ylabel("Distance")
    plt.savefig(
        f'{dest}results/Elbow_{kupa}_Mfcc_{number_of_features}_features.png')
    plt.close()
    print(epsilon)
    return epsilon
def locate_knee(time, eps_fit, eps_stat):
    from kneed import KneeLocator

    # cast as numpy arrays due to some bug with xarrays/pandas indexing
    time = np.array(time)
    eps_fit = np.array(eps_fit)
    eps_stat = np.array(eps_stat)

    while not np.array_equal(time, np.sort(time)):
        idx_del = np.where(np.diff(time) < 0)[0] + 1
        time = np.delete(time, idx_del)
        eps_fit = np.delete(eps_fit, idx_del)

    if eps_fit.max() > 2 * eps_stat:
        # log-norm + tanh
        knee = KneeLocator(time, eps_fit, direction="decreasing")
        idx = knee.knee_x
    else:
        knee = KneeLocator(time, eps_fit)
        idx = knee.knee_x

    if idx is None:
        # non-stationary case
        idx = -1

    return idx
Exemplo n.º 4
0
def findrgbrange(image):
    from kneed import KneeLocator
    RGB = []

    # Computing the knees for rgb
    if len(image.shape) >= 3:
        try:
            print("***Colour RGB range detection***")
            for i in range(0, 3):
                cdf3 = exposure.cumulative_distribution(image[:, :, i])
                # print(i)
                # print(image.shape)
                # print(image[:, :, i].shape)
                # print(cdf3)

                # div = np.gradient(np.array([cdf3[0], cdf3[1]], dtype=float))
                # print(div)
                # plt.plot(div[0], div[1])
                # plt.plot(cdf3[1], cdf3[0])
                kneedle = KneeLocator(cdf3[1], cdf3[0], curve='convex', direction='increasing')
                # kneedle.plot_knee()

                # finding the knee
                # kneedle = KneeLocator(-div[0][0], div[1][1], S=5, curve='convex', direction='increasing')
                # kneedle.plot_knee()
                print(round(kneedle.knee, 3))
                RGB.append(int(kneedle.knee))
                print(round(kneedle.knee_y, 3))
            # plt.show()
            return RGB

        except ValueError:
            print(ValueError)
            print("Can't get the range")
            input("press any key to back to processing menu...")
            return False

    else:
        try:
            print("***Grayscale  range detection***")
            cdf3 = exposure.cumulative_distribution(image)
            # print(image.shape)
            # print(cdf3)
            kneedle = KneeLocator(cdf3[1], cdf3[0], curve='convex', direction='increasing')
            # kneedle.plot_knee()

            print(round(kneedle.knee, 3))
            RGB.append(int(kneedle.knee))
            print(round(kneedle.knee_y, 3))

            return RGB

        except ValueError:
            print(ValueError)
            print("Can't get the range")
            input("press any key to back to processing menu...")
            return False
Exemplo n.º 5
0
def find_eps(data, k, metric):
    nbrs = NearestNeighbors(n_neighbors=k, metric=metric).fit(data)
    distances, indices = nbrs.kneighbors(data)
    distanceDec = sorted(distances[:, k - 1], reverse=True)
    knee = KneeLocator(indices[20:500, 0],
                       distanceDec[20:500],
                       direction="decreasing",
                       curve="convex")
    knee.plot_knee_normalized()
    return distanceDec[knee.elbow]
Exemplo n.º 6
0
def find_eps(data, k, metric):
    END = round(data.shape[0] * 0.9)
    nbrs = NearestNeighbors(n_neighbors=k, metric=metric).fit(data)
    distances, indices = nbrs.kneighbors(data)
    distanceDec = np.array(sorted(distances[:, k - 1], reverse=True))
    knee = KneeLocator(indices[:END, 0],
                       distanceDec[:END],
                       curve="convex",
                       direction="decreasing",
                       S=1.0)
    knee.plot_knee_normalized()
    return distanceDec[knee.elbow], knee
Exemplo n.º 7
0
def kneedle_cutoff(df_analysis, verbose=True):
    """Get kneedle estimate for what is a cell."""
    results = []
    # Get kneedle estimate using raw data OR fit smoothing spline. Also
    # maximize sensitivity.
    # NOTE: Much better estimates with kneedle (that is similar to cellranger3)
    # if this is done the original value space - not log space.
    kneedle_dict = {}
    for fit in [None, 'interp1d', 'polynomial']:
        key = 'kneedle::spline={}'.format(fit)
        sensitivity = 5000
        while True:
            if fit:
                kneedle_dict[key] = KneeLocator(
                    df_analysis['barcode'].values,
                    df_analysis['umi_counts'].values,
                    curve='convex',
                    direction='decreasing',
                    S=sensitivity,
                    interp_method=fit)
            else:
                kneedle_dict[key] = KneeLocator(
                    df_analysis['barcode'].values,
                    df_analysis['umi_counts'].values,
                    curve='convex',
                    direction='decreasing',
                    S=sensitivity)
            if kneedle_dict[key].knee is None:
                sensitivity -= 100
            else:
                if verbose:
                    print('S:\t{}\nknee:\t{}\nelbow:\t{}'.format(
                        sensitivity, round(kneedle_dict[key].knee, 3),
                        round(kneedle_dict[key].elbow, 3)))
                results.append({
                    'method':
                    key,
                    'umi_counts_cutoff':
                    df_analysis.loc[df_analysis['barcode'].values ==
                                    int(kneedle_dict[key].knee),
                                    'umi_counts'].values[0],
                    'n_cells':
                    int(kneedle_dict[key].knee),
                    'sensitivity':
                    sensitivity
                    # 'elbow': 10 ** kneedle_dict[key].elbow  # same as knee
                })
                break
    return results
Exemplo n.º 8
0
def feature_selection(data, target, method=c.XGB, verbose=False):
    if method == c.COR:
        correlation = data.corr()
        if verbose:
            sns.heatmap(correlation, cmap='Blues', annot=True)
            plt.show()

        return correlation.loc[(correlation[target] > 0.2)
                               & (correlation[target] < 0.8)].index.tolist()
    else:
        xgb_params = {
            'eta': 0.05,
            'max_depth': 10,
            'subsample': 1.0,
            'colsample_bytree': 0.7,
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse'
        }

        df = data.copy(deep=True)
        y = df[target]
        del df[target]
        x = df

        dtrain = xgb.DMatrix(x, y, feature_names=df.columns.values)
        model = xgb.train(xgb_params, dtrain, num_boost_round=1000)
        importance = model.get_score(importance_type='total_cover')
        imp = pd.DataFrame(importance, index=range(1)).T
        imp.columns = ["Importance"]
        imp = imp.sort_values(by=["Importance"], ascending=False)
        imp /= imp.sum()

        if verbose:
            sns.heatmap(imp, cmap='Blues', annot=True)
            plt.show()

        imp["x"] = range(len(imp))

        # Online is a good parameter but you might wanna get rid of it if it gives a bad accuracy
        kneedle = KneeLocator(imp.x,
                              imp.Importance,
                              curve="convex",
                              direction="decreasing",
                              online=True)
        if verbose:
            kneedle.plot_knee()

        return imp.iloc[0:kneedle.knee].index.tolist() + [target]
Exemplo n.º 9
0
def start_GMM():


    global dfCluster

    # We only want the vitals to be fed into the clustering algorithm
    dfIDs = pd.DataFrame(columns=['SId','AId'])
    dfIDs['SId'] = dfCluster['SId'].astype('int64',copy=True)
    dfIDs['AId'] = dfCluster['AId'].astype('int64',copy=True)
    dfCluster.drop(['SId','AId'],1,inplace=True)

    # The clustering algorithm takes in a list of lists which in our case will be the vitals
    allVitals = dfCluster.values

    # Now in a similar fashion to k-means we need to find out how many clusters we want
    nList = [ i for i in range(1,15) ]
    allVitalsNorm = scale(allVitals)

    models = [ GaussianMixture(n, covariance_type='full', random_state=0).fit(allVitalsNorm) for n in nList ]

    bicList = [ m.bic(allVitals) for m in models ]
    aicList = [ m.aic(allVitals) for m in models ]

    kn = KneeLocator(nList,bicList,S=1.0, curve='convex',direction='decreasing')
    kn1 = KneeLocator(nList,aicList,S=1.0, curve='convex', direction='decreasing')
    print('Recommended number of clusters by BIC: {}'.format(kn.knee))
    print('Recommended number of clusters by AIC: {}'.format(kn1.knee))

    nComps = kn.knee
    model = models[nComps]

    # may be interested in knowing the probabilities in the future
    # probs = model.predict_proba(allVitals)
    # for p in probs:
    #     if any( (i < 1.00) and (i > 0.00) for i in p ):
    #         print(p.round(3))

    dfCluster['Cluster Label'] = model.predict(allVitalsNorm)

    for column in dfIDs:
        dfIDs[column] = pd.to_numeric(dfIDs[column])

    # Add both the subject id and admission id to the cluster dataframe
    dfCluster['SId'] = dfIDs['SId'].astype('int64',copy=True)
    dfCluster['AId'] = dfIDs['AId'].astype('int64',copy=True)


    return
Exemplo n.º 10
0
def knee_loc(pipe, pipe_clusterer, X_train_trans):
    """Locate knee for clusterer and plot WCSS
    
    Args:
        pipe: Pipe for the entire model
        pipe_clusterer: Pipe for clusterer
        X_train_trans (): pre-transformed X_train (scaled and encoded)
    
    Returns:
        knee/elbow
        Plot of WCSS
    """

    wcss = []
    for i in range(1, 11):
        pipe_clusterer.n_clusters = i
        pipe.fit(X_train_trans)
        wcss.append(pipe_clusterer.inertia_)

    kl = KneeLocator(range(1, 11),
                     wcss,
                     curve="convex",
                     direction="decreasing")
    print(kl.elbow)

    plt.plot(range(1, 11), wcss)
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('Within cluster sum of squares (WCSS)')
    plt.show()
Exemplo n.º 11
0
    def elbow_plot(self):
        logger.info("Inside the elbow_plot function in KMeansClustering class")
        try:
            data = self.data
            k_range = range(1, 11)
            sse = []
            for k in k_range:
                km = KMeans(n_clusters=k,
                            init="k-means++",
                            random_state=100,
                            max_iter=1000)
                km.fit(data)
                sse.append(km.inertia_)
            plt.title("K vs sse(choose k value)")
            plt.xlabel("Number of clusters")
            plt.ylabel("sse")
            plt.plot(k_range, sse)
            plt.savefig("Elbow_plot.png")
            kn = KneeLocator(k_range,
                             sse,
                             curve="convex",
                             direction="decreasing")
            logger.info("The optimum number of clusters are: {}".format(
                kn.knee))
            return kn.knee

        except Exception as e:
            logger.warning(
                "Exception has occured in elbow_plot. Exception message: " +
                str(e))
            logger.warning("Finding the number of clusters failed.")
            raise Exception()
Exemplo n.º 12
0
def find_the_number_of_clusters(principal_components, limit):
    # Find the number of clusters
    wcss = []
    for i in range(1, limit + 1):
        print("Fitting components {0}/{1}".format(i, limit))
        kmeans_pca = KMeans(n_clusters=i, init='k-means++', random_state=42)
        kmeans_pca.fit(principal_components)
        wcss.append(
            sum(
                np.min(cdist(principal_components, kmeans_pca.cluster_centers_,
                             'euclidean'),
                       axis=1)) / principal_components.shape[0])

    # Plot the figure
    plt.figure(figsize=(16, 10))
    plt.plot(range(1, limit + 1), wcss, marker='o', linestyle='--')
    plt.xlabel("Number of clusters")
    plt.ylabel("WCSS")
    plt.title("K-Means PCA")

    order = np.linspace(1, limit, limit)
    # find the elbow
    # https://github.com/arvkevi/kneed/blob/master/notebooks/decreasing_function_walkthrough.ipynb
    kn = KneeLocator(order, wcss, curve='convex', direction='decreasing')
    plt.vlines(kn.elbow, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
    plt.show()
    print("Number of clusters: {0}".format(kn.elbow))
    return int(kn.elbow)
Exemplo n.º 13
0
    def k_means_elbow(self):
        kmeans_kwargs = {
            "init": "random",
            "n_init": 10,
            "max_iter": 300,
            "random_state": 42
        }
        sse = []
        X = self.scaled_data[self.use_metrics]
        for k in range(1, 11):
            kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
            kmeans.fit(X)
            sse.append(kmeans.inertia_)
        kl = KneeLocator(range(1, 11),
                         sse,
                         curve="convex",
                         direction="decreasing")
        logging.info('Recommended cluster number for K-means: ' +
                     str(kl.elbow))

        if self.output_plots_location is not None:
            plt.close("all")
            plt.plot(range(1, 11), sse, 'bx-')
            plt.xticks(range(1, 11))
            plt.xlabel("Number of Clusters")
            plt.ylabel("SSE")
            plt.savefig(self.output_plots_location / 'k_means-sse.pdf',
                        bbox_inches='tight',
                        pad_inches=0)
            plt.close('all')
Exemplo n.º 14
0
def give_num_clusters(matrix, min_cluster, max_cluster):
    distortions = []
    N_clusters = range(min_cluster, max_cluster)
    for n in N_clusters:
        kmeans = KMeans(init='k-means++', n_clusters=n, n_init=100)
        kmeans.fit(matrix)
        distortions.append(
            sum(
                np.min(cdist(matrix, kmeans.cluster_centers_, 'euclidean'),
                       axis=1)) / matrix.shape[0])
    kn = KneeLocator(list(N_clusters),
                     distortions,
                     S=0.1,
                     curve='convex',
                     direction='decreasing')
    fig, ax = plt.subplots()
    ax.plot(N_clusters, distortions, 'bx-')
    ax.set_xlabel('N')
    ax.set_ylabel('Distortion')
    ax.set_title('The Elbow Method showing the optimal customer clusters')
    ax.vlines(kn.knee,
              plt.ylim()[0],
              plt.ylim()[1],
              linestyles="--",
              label="knee/elbow")
    return {'Best_N': kn.knee, 'Plot': plt.tight_layout()}
Exemplo n.º 15
0
def calculate_best_k_clustering(wcss, min_k, max_k):
    """
    Finds the best k based on MSS
    :param max_k:
    :param min_k:
    :param wcss:
    :return:
    """
    x = range(min_k, min_k + len(wcss))
    y = wcss
    sensitivity = [1, 3, 5, 10, 100, 200, 400]
    knees = []
    norm_knees = []
    for s in sensitivity:
        kl = KneeLocator(x, y, curve='convex', direction='decreasing', S=s)
        knees.append(kl.knee)
        norm_knees.append(kl.norm_knee)

    print("knees")
    print(knees)
    plt.plot(range(min_k, min_k + len(wcss)), wcss)

    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.show()
    print("Errors: ")
    print(wcss)
    return knees[0]
Exemplo n.º 16
0
def define_threshold(pydam_df, min_knee=0.5, alpha=0.05):
    """Find kneedle point in PyDamage results

    Finding the kneedle point to get the optimal
    tradeoff between FP and FN, for the predicted
    accurary threshold

    Args:
        pydam_df (pandas df): pydamage results
        min_knee (float, optional): Min predicted_accuracy threshold.
        alpha(float, optional): Alpha q-value threshold
    """
    thresholds = [i.round(2) for i in arange(min_knee, 1, 0.01)]
    nb_contigs = list()
    nb_contigs = []
    for i in thresholds:
        nb_contigs.append(
            pydam_df.query(
                f"predicted_accuracy >= {i} & qvalue <= {alpha}").shape[0])
    kneedle = KneeLocator(
        thresholds,
        nb_contigs,
        S=1.0,
        curve="convex",
        direction="decreasing",
        online=True,
    )
    print(thresholds)
    print(nb_contigs)
    return kneedle.knee
def get_num_pca_comp(x, name):
    pca = PCA().fit(x)

    vr = np.cumsum(pca.explained_variance_ratio_)
    print("Distribution of Eigen Values")
    print(pca.explained_variance_)

    x = range(1, len(vr) + 1)
    kneedle = KneeLocator(x,
                          vr,
                          S=1.0,
                          curve='concave',
                          direction='increasing')

    knee = math.ceil(kneedle.knee)

    plt.figure()
    plt.plot(x, vr, 'bx-')
    plt.xlabel('Number of Components')
    plt.ylabel('Cumulative Explained Variance')
    plt.axvline(x=knee, label="Selected Number of Components")
    plt.legend(loc="best")
    plt.title('N-Components vs. Explained Variance for {}'.format(name))
    plt.savefig("pca_curves/{}.png".format(name))

    return knee
Exemplo n.º 18
0
  def Elbow_kneeLocator(self, X, verbose):
    clusters = []
    best_n_clusters = 0
    best_sil = 0
    for i in range(1, 11):
        km = KMeans(n_clusters=i).fit(X)
        clusters.append(km.inertia_)
        labels = km.labels_
        if len(set(labels)) <= 1: continue
        sil = silhouette_score(X, labels)
        if sil > best_sil:
          best_sil = sil
          best_n_clusters = i
        
    fig, ax = plt.subplots()
    sns.lineplot(x=list(range(1, 11)), y=clusters, ax=ax)
    ax.set_title('Searching for Elbow')
    ax.set_xlabel('Clusters')
    ax.set_ylabel('Inertia')

    plt.show()

    kl = KneeLocator(range(1, 11), 
                    clusters, 
                    curve="convex", 
                    direction="decreasing")
    if verbose:
      print("\nResult finding by Knee Locator function: ")
      print(kl.elbow)
      print("\n")
    return best_n_clusters
Exemplo n.º 19
0
def testk(df, data):
    kmeans_kwargs = {
        "init": "random",
        "n_init": 10,
        "max_iter": 100,
        "random_state": 42,
    }

    sse = []
    for k in range(1, 6):
        kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
        kmeans.fit(data)
        sse.append(kmeans.inertia_)
    # try:
    # except:
    try:
        # kl = KneeLocator(range(1, 6), sse, curve="concave", direction="increasing")
        kl = KneeLocator(range(1, 6),
                         sse,
                         curve="convex",
                         direction="decreasing")
    except TypeError:
        print("hello")
    # kl.plot_knee_normalized()

    # n = kl.elbow
    n = 3

    kmeans = KMeans(n_clusters=n, **kmeans_kwargs).fit(data)
    df['kmeans'] = list(kmeans.labels_)
    return df, n
Exemplo n.º 20
0
def find_components(graph, allow_outliers=False):
    """
    find the strongly connected components
    """
    comps = [
        c
        for c in sorted(nx.connected_components(graph), key=len, reverse=True)
    ]
    labels = [0] * graph.number_of_nodes()
    for i, c in enumerate(comps):
        for n in c:
            labels[n] = i

    if allow_outliers:
        hist = [len(v) for v in comps]
        x_axis = list(range(len(hist)))
        kn = KneeLocator(x_axis,
                         hist,
                         S=1.0,
                         curve='convex',
                         direction='decreasing')
        idx = kn.knee
        for i, c in enumerate(comps[idx:]):
            for n in c:
                labels[n] = -1
    return np.array(labels), comps
def elbow_manual(n_clusters, X):
    sample, features = X.shape
    e = 10**(-10)

    X = DataFrameImputer().fit_transform(X)
    #    X.fillna(X.mean())

    SSE = []
    SSE1 = []
    for i in range(1, n_clusters):
        initial_centers = kpp_init_notrials(i, X)
        #        initial_random=random_init(n_clusters,X)
        centers, labels = lloyd(i, X, e, initial_centers)
        centers_sk, labels_sk = kmean_sklearn(i, X)

        # en utilisant lloyd

        SSE.append(np.sum(np.min(cdist(X, centers, 'euclidean'), axis=1)))
        # en utilisant sklearn
        SSE1.append(np.sum(np.min(cdist(X, centers_sk, 'euclidean'), axis=1)))

    K = np.arange(1, n_clusters)
    plt.plot(K, SSE, label='méthode manuel', color='blue')
    plt.plot(K, SSE1, label='méthode sklearn', color='orange')
    plt.title('Comparaison Méthode du coude entre notre algorithme et sklearn')
    plt.show()
    plt.legend()

    # On doit prendre au minimum 2 clusters
    K_ = np.arange(2, n_clusters)
    kn = KneeLocator(K_, SSE, curve='convex', direction='decreasing')
    print(kn.knee)
    # plotting dashed_vline on knee
    plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
Exemplo n.º 22
0
def make_loadings_matrix(rating_m):
    '''Takes a rating matrix and returns the loading matrix. Optimized for number of components
    using the knee, with a oblimin rotation for interpretability
    '''
    # Fit the initial factor analysis
    fa = FactorAnalyzer(n_factors=10, rotation='oblimin')
    fa.fit(rating_m)
    x = list(range(1, 16))
    fa_eigens = fa.get_eigenvalues()[1]
    fa_matrix_knee = KneeLocator(x,
                                 fa_eigens,
                                 S=1.0,
                                 curve='convex',
                                 direction='decreasing')
    fa_knee = fa_matrix_knee.knee
    fa_kneed = FactorAnalyzer(n_factors=fa_knee,
                              rotation='varimax').fit(rating_m)
    loadings_m = pd.DataFrame(fa_kneed.loadings_.round(2))
    loadings_m.index = get_construct_names()
    loadings_m.index = loadings_m.index.rename(name='Construct')
    loadings_m.columns = [
        'Factor {} ({:.0f}%)'.format(
            i + 1,
            fa_kneed.get_factor_variance()[1][i] * 100)
        for i in loadings_m.columns
    ]
    return loadings_m
Exemplo n.º 23
0
def get_elbow(mode, adj_matrix, max_modules, min_modules):
	'''
	Inputs
	mode (str): just a string label to keep track of what matrix we're initializing. No computational value. 
	adj_matrix (np.array (N, N)): the adjacency matrix of the network
	max_modules (int): Max number of expected labels
	min_modules (int): Min number of expected labels
	Returns
	elbow/knee (int): The most appropriate number of knee/elbow value
	'''
	s, principal_axes = np.linalg.eig(adj_matrix)
	N = max_modules + 1
	ind = np.arange(min_modules, N, 1)    # the x locations for the groups
	kn = KneeLocator(ind, s[min_modules:N], S=1.0, curve='convex', direction='decreasing', online=True)
	
	'''
	plt.figure()
	plt.xlabel('k')
	plt.ylabel('Distortion')
	plt.title('The Elbow Method showing the optimal k')
	plt.plot(ind, s[min_modules:N], 'bx-')
	plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
	savefig('modules_' + str(mode) + '.eps', bbox_inches='tight', format='eps', dpi=200)
	plt.close()
	'''

	if kn.knee is None:
		return int((max_modules + min_modules)/2)
	return kn.knee
    def elbow_plot(self, data):

        wcss = []  # initializing an empty list --within cluster sum of errors
        try:
            self.logger.info('Start of elbow plotting...')
            for i in range(1, 11):
                kmeans = KMeans(
                    n_clusters=i, init='k-means++',
                    random_state=0)  # initializing the KMeans object
                kmeans.fit(data)  # fitting the data to the KMeans Algorithm
                wcss.append(kmeans.inertia_)
            plt.plot(
                range(1, 11), wcss
            )  # creating the graph between WCSS and the number of clusters
            plt.title('The Elbow Method')
            plt.xlabel('Number of clusters')
            plt.ylabel('WCSS')
            #plt.show()
            plt.savefig('apps/models/kmeans_elbow.png'
                        )  # saving the elbow plot locally
            # finding the value of the optimum cluster programmatically
            self.kn = KneeLocator(range(1, 11),
                                  wcss,
                                  curve='convex',
                                  direction='decreasing')
            self.logger.info('The optimum number of clusters is: ' +
                             str(self.kn.knee))
            self.logger.info('End of elbow plotting...')
            return self.kn.knee

        except Exception as e:
            self.logger.exception('Exception raised while elbow plotting:' +
                                  str(e))
            raise Exception()
Exemplo n.º 25
0
def calc_top_degron_threshold(result):
    """See how many top degron potential sequences need to be dropped before
    the bag of amino acids and position specific models provide similar results."""
    step = 20
    possible_thresh = np.arange(step, 8000, step)

    # get the delta auc for every threshold
    output_list = []
    for thresh in possible_thresh:
        tmp = result.iloc[thresh:-thresh]
        pos_auc = metrics.roc_auc_score(tmp['y'],
                                        tmp['sequence position specific'])
        bag_auc = metrics.roc_auc_score(tmp['y'], tmp['bag of words'])
        delta_auc = pos_auc - bag_auc
        output_list.append([thresh, delta_auc])
        #if delta_auc < 0.01: return thresh-step

    # figure out the knee
    result_df = pd.DataFrame(output_list, columns=['threshold', 'delta auc'])
    knee_obj = KneeLocator(result_df['threshold'],
                           result_df['delta auc'],
                           curve='convex',
                           direction='decreasing')

    return knee_obj.knee
Exemplo n.º 26
0
def getEpsilon(train_data):
    neigh = sklearn.neighbors.NearestNeighbors(n_neighbors=4)
    nbrs = neigh.fit(train_data)
    distances, indices = nbrs.kneighbors(train_data)
    distances = np.sort(distances, axis=0)
    distances = distances[:, 1]
    y = distances
    x = list(np.arange(0, len(distances)))
    sensitivity = [
        1, 3, 5, 10, 20, 40, 60, 80, 100, 120, 150, 180, 200, 250, 300, 350,
        400
    ]
    epsilons = []
    for s in sensitivity:
        try:
            kneedle = KneeLocator(x,
                                  y,
                                  S=s,
                                  curve='convex',
                                  direction='increasing')
            epsilon = kneedle.all_elbows_y[0]
            if (len(epsilons) >= 1 and epsilons[-1] - epsilon <= 0.001):
                print("")

            else:
                epsilons.append(epsilon)

        except Exception as e:
            print(e)
            if (len(epsilons) >= 1):
                epsilons.append(epsilons[-1] + s / 10)
            else:
                epsilons.append(s / 10)

    return epsilons
Exemplo n.º 27
0
def random_Crap():
    json_object = json.load(open("datasets/reg_season_advanced.json"))
    json_object_height = json.load(open("datasets/data.json"))
    data = []
    for person in json_object:
        values = json_object[person]
        values_to_add = (values[1])[5:]
        first = (json_object_height[person])
        json_acceptable_string = first.replace("'", "\"")
        d = json.loads(json_acceptable_string)
        curr_height = d['height']
        height_array = curr_height.split('-')
        final_height_inch = (int(height_array[0]) * 12) + int(height_array[1])
        values_to_add.append(final_height_inch)
        data.append(values_to_add)

    mms = MinMaxScaler()
    mms.fit(data)
    data_transformed = mms.transform(data)

    Sum_of_squared_distances = []
    K = range(1, 15)
    for k in K:
        km = KMeans(n_clusters=k)
        km = km.fit(data_transformed)
        Sum_of_squared_distances.append(km.inertia_)

    kneedle = KneeLocator(K,
                          Sum_of_squared_distances,
                          S=1.0,
                          curve='convex',
                          direction='decreasing')
    print(round(kneedle.knee, 3))
def elbowPointLocate():
    global data
    localdata = data
    dictionaryvalues = {}
    elbowPoint = []
    for k in range(1, 20):
        kmeans = KMeans(n_clusters=k, max_iter=1000).fit(localdata)
        localdata["labeldata"] = kmeans.labels_
        dictionaryvalues[k] = kmeans.inertia_
        elbowPoint.append((k, kmeans.inertia_))
    findElbow = []
    createElbow = pd.DataFrame(elbowPoint, columns=["x", "y"])
    kn = KneeLocator(createElbow.x,
                     createElbow.y,
                     curve='convex',
                     direction='decreasing')
    print("*****************ELBOW KNEE VALUE*********************")
    print(kn.knee)
    print("**********************************************")
    findElbow = pd.DataFrame(data=findElbow, columns=["x", "y"])
    findElbow["x"] = list(dictionaryvalues.keys())
    findElbow["y"] = list(dictionaryvalues.values())
    findElbow = findElbow.to_dict(orient='records')
    findElbow = {'data': findElbow}
    return jsonify(findElbow)
def kmeans_elbow(points, range_, title):
    scaler = MinMaxScaler()
    points_scaled = scaler.fit_transform(points)

    inertia = []
    clusters_n = range(1, range_)
    for k in clusters_n:
        kmeans = KMeans(n_clusters=k, random_state=5221)
        kmeans.fit(points_scaled)
        y_km = kmeans.predict(points)
        inertia.append(kmeans.inertia_)
    plt.figure(figsize=(10, 6))
    plt.plot(
        clusters_n,
        inertia,
    )
    plt.scatter(clusters_n, inertia, marker='x', c='r', s=100, label='Inertia')
    plt.legend()
    plt.xlabel('K')
    plt.ylabel('Sum_of_squared_distances')
    plt.title('Elbow Method For Optimal k (' + title + ' )')
    plt.show()
    kn = KneeLocator(clusters_n,
                     inertia,
                     S=2.0,
                     curve='convex',
                     direction='decreasing')
    return kn.knee
Exemplo n.º 30
0
def distance_curve(distances, mode='show'):
    """
    Save distance curve with knee candidates in file.
    :param distances:
    :param mode: show | save
    :return:
    """
    sensitivity = [1, 3, 5, 10, 100, 150]
    knees = []
    y = list(range(len(distances)))
    for s in sensitivity:
        kl = KneeLocator(distances, y, S=s)
        knees.append(kl.knee)

    plt.style.use('ggplot');
    plt.figure(figsize=(10, 10))
    plt.plot(distances, y)
    colors = ['r', 'g', 'k', 'm', 'c', 'b', 'y']
    for k, c, s in zip(knees, colors, sensitivity):
        plt.vlines(k, 0, len(distances), linestyles='--', colors=c, label=f'S = {s}')
        plt.legend()
        if mode == 'show':
            plt.show()
        else:
            plt.savefig("distance_curve.png")