def no_transaction_vs_harmonic():
    merchant_data_path = os.path.join(DATASET_DIR,
                                      "MerchantSumAmountPerDay.txt")
    data_reader = MerchantData(merchant_data_path)
    X = data_reader.get_clean_data()

    # Data Selection
    harmonic_amount = X[:, 0]  # Recency
    no_transaction = X[:, 1]  # Frequency

    # Plotting
    title = "Trans vs Harmonic"
    labels = ("No Transaction", "Harmonic Sum")
    plotlyvisualize.scatter(no_transaction,
                            harmonic_amount,
                            title,
                            labels,
                            out_path=PLOT_OUT_DIR)
def no_transactions_vs_sum_amounts():
    merchant_data_path = os.path.join(DATASET_DIR,
                                      "MerchantSumAmountPerDay.txt")
    data_reader = MerchantData(merchant_data_path)
    X = data_reader.get_clean_data()

    # Data Selection
    no_transaction = X[:, 1]  # Frequency
    sum_amounts = X[:, 2]  # Money

    # Plotting
    title = "No Transactions vs Sum Amounts"
    labels = ("No Transaction", "Sum Amounts")
    no_transaction = np.log(no_transaction)
    #sum_amounts = np.log(sum_amounts)/np.log(1.5)

    plotlyvisualize.scatter(no_transaction,
                            sum_amounts,
                            title,
                            labels,
                            out_path=PLOT_OUT_DIR)
Пример #3
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.neighbors import KernelDensity
import os
from readdata.merchantdata import MerchantData

DATASET_DIR = "dataset"
PLOT_OUT_DIR = "plotsout"

merchant_data_path = os.path.join(DATASET_DIR, "MerchantSumAmountPerDay.txt")
data_reader = MerchantData(merchant_data_path)
X = data_reader.get_clean_data()

# Data Selection
no_transaction = X[:, 1]  # Frequency
sum_amounts = X[:, 2]  # Money

# Plot a 1D density example
N = 100
np.random.seed(1)
N = no_transaction.shape[0]
X = no_transaction[:,
                   np.newaxis]  #np.random.normal(0, 1, 0.3 * N)[:, np.newaxis]
X_plot = np.linspace(np.min(X), np.max(X), 1000)[:, np.newaxis]

fig, ax = plt.subplots()

for kernel in ['gaussian', 'tophat', 'epanechnikov']:
    kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X)
    log_dens = kde.score_samples(X_plot)