Exemplo n.º 1
0
import numpy as np
import matplotlib.pyplot as plt

import datasets

plt.rc('font', family='Arial', size=10)

# Load all the DAN data
X, Y, X_error, names = datasets.read_dan_data()

# Specify which DAN measurements we want to plot
h1 = 'DNB_456787389EAC06680361170_______M1' # 2.6 H, 0.0169 BNACS
h2 = 'DNB_455739444EAC06560341120_______M1' # 3.3 H, 0.0169 BNACS

acs1 = 'DNB_456787389EAC06680361170_______M1' # 2.6 H, 0.0169 BNACS
acs2 = 'DNB_459287442EAC06960391552_______M1' # 2.6 H, 0.0143 ACS

# Get the matching data
h1_idx = np.where(names == h1)
h2_idx = np.where(names == h2)
acs1_idx = np.where(names == acs1)
acs2_idx = np.where(names == acs2)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(15,5))

# The last time bin is the end of the last bin
time_bins = datasets.time_bins_dan[:-1]

# Avoid error with x-axis log scale
time_bins[0] = 1e-20
import datasets

# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--n_components',
                    type=int,
                    default=3,
                    help='number of principal components to use for PCA')
parser.add_argument(
    '--use_restricted_bins',
    action='store_true',
    help='only use bins 18-34 and 13-17 for thermal and epithermal')
parser.add_argument('--score', help='likelihood or mse')
args = parser.parse_args()

X, Y, Y_err, names = datasets.read_dan_data()
n_meas = X.shape[0]

if args.use_restricted_bins:
    X = np.take(X, range(17, 34) + range(64 + 12, 64 + 17), axis=1)

scores = []
for i in range(n_meas):
    if i in range(5):
        scores.append(0)
        continue
    pca = PCA(n_components=args.n_components)
    # Build a model using measurements up to this point
    pca.fit(X[:i, :])
    # Get the log likelihood that the new sample belongs to the distribution
    # of data in the previous sols
Exemplo n.º 3
0
                    type=int,
                    default=2,
                    help='number of neighbors to evaluate')
parser.add_argument('--n_components',
                    type=int,
                    default=3,
                    help='number of principal components to use for PCA')
parser.add_argument(
    '--use_restricted_bins',
    action='store_true',
    help='only use bins 18-34 and 13-17 for thermal and epithermal')
args = parser.parse_args()

# Load the data sets
X, y = datasets.read_acs_grid_data()
dan_X, dan_y, _, names = datasets.read_dan_data()

# Normalize counts to approximately same range
X = datasets.normalize_counts(X)
dan_X = datasets.normalize_counts(dan_X)

if args.use_restricted_bins:
    n_bins = 64
    X = np.take(X, range(17, 34) + range(n_bins + 12, n_bins + 17), axis=1)
    dan_X = np.take(dan_X,
                    range(17, 34) + range(n_bins + 12, n_bins + 17),
                    axis=1)

# Project the data into principal subspace of model data
pca = PCA(n_components=args.n_components)
pca.fit(X)
Exemplo n.º 4
0
    '--plot_sol_hist',
    action='store_true',
    help='plot frequency of sol occurrence for DANs in each cluster')
parser.add_argument(
    '--plot_geochem_hist',
    action='store_true',
    help='plot histogram of H and ACS values for DANs in each cluster')
# parser.add_argument('--plot_cluster_centers', action='store_true', help='plot DAN at center point of each cluster (as opposed to mean measurement)')
# parser.add_argument('--plot_cluster_means', action='store_true', help='plot mean measurement of each cluster (as opposed to center point)')
parser.add_argument(
    '--use_restricted_bins',
    action='store_true',
    help='only run analysis for bins 18-34 (CTN) and 12-17 (CETN)')
args = parser.parse_args()

X, Y, Y_err, X_filenames = datasets.read_dan_data()
time_bins = datasets.time_bins_dan
n_bins = 64
if args.use_restricted_bins:
    X = np.take(X, range(17, 34) + range(n_bins + 12, n_bins + 17), axis=1)

# Fit PCA model and project data into PC space (and back out)
pca = PCA(n_components=args.n_components)
pca.fit(X)
print sum(pca.explained_variance_ratio_)

transformed = pca.transform(X)

# gm = GaussianMixture(n_components=args.n_gaussians).fit(transformed)
# assignments = gm.predict(transformed)
Exemplo n.º 5
0
# import matplotlib
# # matplotlib.use('TkAgg')

import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

import datasets

plt.rc('font', family='Arial', size=10)

X_d, _, _, _ = datasets.read_dan_data()
#X_d = datasets.normalize_counts(X_d)
X_d = np.take(X_d, range(17, 34) + range(64 + 12, 64 + 17), axis=1)

X_m, _ = datasets.read_highweh_grid()
#X_m = datasets.normalize_counts(X_m)

variances_d = []
variances_m = []

for k in range(1, 11):
    pca_d = PCA(n_components=k)
    pca_m = PCA(n_components=k)
    pca_d.fit(X_d)
    pca_m.fit(X_m)

    print k
    print 'DAN'
    print pca_d.explained_variance_ratio_
    print sum(pca_d.explained_variance_ratio_)
Exemplo n.º 6
0
    n_test = int(X.shape[0] * args.testing_percentage/100.0)
    X_test = X[:n_test]
    Y_test = Y[:n_test]
    X_train = X[n_test:]
    Y_train = Y[n_test:]
elif args.test_sebina:
    X, Y = datasets.read_acs_grid_data()
    n_test = int(X.shape[0] * args.testing_percentage/100.0)
    X_test = X[:n_test]
    Y_test = Y[:n_test]
    X_train = X[n_test:]
    Y_train = Y[n_test:]
elif args.test_dan:
    if args.model_grid == 'hardgrove2011':
        X, Y = datasets.read_sim_data(use_dan_bins=True)
        X_test, Y_test, Y_test_error, test_names = datasets.read_dan_data(limit_2000us=True)
        n_bins = 34
    elif args.model_grid == 'sabina':
        X, Y = datasets.read_grid_data()
        X_test, Y_test, Y_test_error, test_names = datasets.read_dan_data(limit_2000us=False, label_source='iki')
        n_bins = len(datasets.time_bins_dan)-1
    elif args.model_grid == 'acs':
        X, Y = datasets.read_acs_grid_data()
        X_test, Y_test, Y_chi2, test_names = datasets.read_dan_data(limit_2000us=False, label_source='asu')
        n_bins = len(datasets.time_bins_dan)-1
    elif args.model_grid == 'both':
        X_full, Y_full = datasets.read_sim_data(use_dan_bins=True)
        X_rover, Y_rover = datasets.read_grid_data(limit_2000us=True)
        X = np.concatenate([X_full, X_rover])
        Y = np.concatenate([Y_full, Y_rover])
        X_test, Y_test, Y_test_error, test_names = datasets.read_dan_data(limit_2000us=True)
Exemplo n.º 7
0
import datasets

# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--n_components',
                    type=int,
                    default=3,
                    help='number of principal components to use for PCA')
parser.add_argument('--normalize',
                    action='store_true',
                    help='normalize the data before PCA')
args = parser.parse_args()

X_sebina, Y_sebina = datasets.read_acs_grid_data()
print Y_sebina.shape
X_dan, Y_dan, err_dan, names_dan = datasets.read_dan_data()
print Y_dan.shape

time_bins = datasets.time_bins_dan
n_bins = 64

if args.normalize:
    X_sebina = datasets.normalize_counts(X_sebina)
    X_dan = datasets.normalize_counts(X_dan)

pca = PCA(n_components=args.n_components)
X_t = pca.fit_transform(X_sebina)

# Plot the Sebina grid points in PC space
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1, projection='3d')