Пример #1
0
 def test_dame_gender_dataset2genderlist_method_returns_correct_result(
         self):
     g = Gender()
     gl = g.dataset2genderlist(dataset="files/names/all.csv")
     self.assertEqual(gl[0:4], [1, 1, 1, 1])
     gl2 = g.dataset2genderlist(dataset="files/names/yob2017.txt")
     self.assertEqual(gl2[0:4], [0, 0, 0, 0])
Пример #2
0
    elif (args.categorical == "noletters"):
        g.features_list2csv(categorical="noletters",
                            path="files/names/allnoundefined.csv")
        features = "files/features_list_cat.csv"
    elif (args.categorical == "nocategorical"):
        g.features_list2csv(categorical="nocategorical",
                            path="files/names/allnoundefined.csv")
        features = "files/features_list_no_cat.csv"
    else:
        g.features_list2csv(categorical="both", path="files/names/all.csv")
        features = "files/features_list.csv"
    ## STEP1: N COMPONENTS + 1 TARGET
    x = pd.read_csv(features)
    #print(x.columns)

    y = g.dataset2genderlist(dataset="files/names/allnoundefined.csv")
    #print(y)

    # STEP2: ADDING TARGET
    target = pd.DataFrame(data=y, columns=['target component'])
    finalDf = x.join(target)

    # STEP3: NORMALIZE DATA
    from sklearn import preprocessing
    data_scaled = pd.DataFrame(preprocessing.scale(finalDf),
                               columns=finalDf.columns)

    # STEP4: PCA
    pca = PCA(n_components=int(args.components))
    pca.fit_transform(data_scaled)