def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    non_numeric = [
        'Branch', 'Gender', 'Component', "weight_kg", "stature_m", "BMI_class",
        "Height_class"
    ]
    ansur_df = df.drop(non_numeric, axis=1)
    return ansur_df
Exemplo n.º 2
0
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    cols = [
        'elbowrestheight', 'wristcircumference', 'anklecircumference',
        'buttockheight', 'crotchheight'
    ]
    ansur_df = df[cols]
    return ansur_df
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    cols = [
        'headbreadth', 'headcircumference', 'headlength', 'tragiontopofhead'
    ]
    head_df = df[cols]
    head_df["n_hairs"] = np.random.normal(100000, 10)
    head_df["measurement_error"] = np.random.normal(0.1, 1.387893e-17)
    return head_df
Exemplo n.º 4
0
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data()
    cols = [
        'stature_m', 'buttockheight', 'waistcircumference',
        'shouldercircumference'
    ]
    ansur_df = ansur_ii_male[cols].sample(250)
    # X = ansur_ii_male[cols]
    # y = ansur_ii_male["bicepscircumferenceflexed"]
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    # return X_train, X_test, y_train, y_test, X, y
    return ansur_df
Exemplo n.º 5
0
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(1000)
    cols = [
        'acromialheight', 'axillaheight', 'bideltoidbreadth',
        'buttockcircumference', 'buttockkneelength', 'buttockpopliteallength',
        'cervicaleheight', 'chestcircumference', 'chestheight',
        'earprotrusion', 'footbreadthhorizontal', 'forearmcircumferenceflexed',
        'handlength', 'headbreadth', 'heelbreadth', 'hipbreadth',
        'iliocristaleheight', 'interscyeii', 'lateralfemoralepicondyleheight',
        'lateralmalleolusheight', 'neckcircumferencebase',
        'radialestylionlength', 'shouldercircumference', 'shoulderelbowlength',
        'sleeveoutseam', 'thighcircumference', 'thighclearance',
        'verticaltrunkcircumferenceusa', 'waistcircumference', 'waistdepth',
        'wristheight', 'BMI'
    ]
    X = ansur_ii_male[cols]
    y = ansur_ii_male["bicepscircumferenceflexed"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    return X_train, X_test, y_train, y_test, X, y
Exemplo n.º 6
0
def pca_on_larger_dataset():
    ansur_ii_male = load_ansur_ii_male_data()
    cols = [
        'stature_m', 'buttockheight', 'waistdepth', 'span',
        'waistcircumference', 'shouldercircumference', 'footlength',
        'handlength', 'functionalleglength', 'chestheight',
        'chestcircumference', 'cervicaleheight', 'sittingheight'
    ]

    ansur_df = ansur_ii_male[cols].sample(250)
    # Scale the data
    scaler = StandardScaler()
    ansur_std = scaler.fit_transform(ansur_df)

    # Apply PCA
    pca = PCA()
    pca.fit(ansur_std)

    # Inspect the explained variance ratio per component
    print(pca.explained_variance_ratio_)

    # Print the cumulative sum of the explained variance ratio
    print(pca.explained_variance_ratio_.cumsum())
    # Fit and transform the t-SNE model on the numeric dataset
    tsne_features = m.fit_transform(df_numeric)
    print(tsne_features.shape)

    return tsne_features


def tsne_visualization_of_dimensionality(df, tsne_features):
    df["x"] = tsne_features[:, 0]
    df["y"] = tsne_features[:, 1]
    # Color the points according to Army Component
    sns.scatterplot(x="x", y="y", hue="Component", data=df)
    # Show the plot
    plt.show()

    # Color the points by Army Branch
    sns.scatterplot(x="x", y="y", hue="Branch", data=df)
    plt.show()

    # Color the points by Gender
    sns.scatterplot(x="x", y="y", hue="Gender", data=df)
    plt.show()


ansur_ii_male = load_ansur_ii_male_data()
ansur_ii_female = load_ansur_ii_female_data()
df = pd.concat([ansur_ii_male, ansur_ii_female])
tsne_features = fitting_tsne_to_ansur_data(df)
tsne_visualization_of_dimensionality(df, tsne_features)
Exemplo n.º 8
0
def accuracy_after_dimensionality_reduction(ansur_df):
    # Assign just the 'neckcircumferencebase' column from ansur_df to X
    X = ansur_df[["neckcircumferencebase"]]
    y = ansur_df["Gender"]

    # Split the data, instantiate a classifier and fit the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    svc = SVC()
    svc.fit(X_train, y_train)

    # Calculate accuracy scores on both train and test data
    accuracy_train = accuracy_score(y_train, svc.predict(X_train))
    accuracy_test = accuracy_score(y_test, svc.predict(X_test))

    print("{0:.1%} accuracy on test set vs. {1:.1%} on training set".format(
        accuracy_test, accuracy_train))


ansur_ii_male = load_ansur_ii_male_data().sample(500)
ansur_ii_female = load_ansur_ii_female_data().sample(500)
df = pd.concat([ansur_ii_male, ansur_ii_female])
non_numeric = [
    'Branch', 'Component', "weight_kg", "stature_m", "BMI", "BMI_class",
    "Height_class"
]

df.drop(non_numeric, axis=1, inplace=True)
accuracy_with_large_dimension(df)
accuracy_after_dimensionality_reduction(df)
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    return df