Exemplo n.º 1
0
 def average(self, name, surname):
     r = 0
     count = 0
     avg = 0
     dgg = DameGenderGuesser()
     guess1 = int(dgg.guess(name, binary="True"))
     if (guess1 != 2):
         r = r + guess1
         count = count + 1
     if (self.config['DEFAULT']['genderapi'] == 'yes'):
         dga = DameGenderApi()
         guess2 = int(dga.guess(name, binary="True"))
         if (guess2 != 2):
             r = r + guess2
             count = count + 1
     if (self.config['DEFAULT']['genderize'] == 'yes'):
         dg = DameGenderize()
         guess3 = int(dg.guess(name, binary="True"))
         if (guess3 != 2):
             r = r + guess3
             count = count + 1
     if (self.config['DEFAULT']['namsor'] == 'yes'):
         dn = DameNamsor()
         guess4 = int(dn.guess(str(name), str(surname), binary="True"))
         if (guess4 != 2):
             r = r + guess4
             count = count + 1
     avg = r / count
     return avg
 def test_dame_genderize_get(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         string1 = dg.get("peter")
         self.assertEqual(string1, {'probability': 0.99, 'count': 165452, 'name': 'peter', 'gender': 'male'})
         string2 = dg.get(name="peter", surname="smith", country_id="US")
         self.assertEqual(string2, {'count': 6521, 'country_id': 'US', 'gender': 'male', 'name': 'peter', 'probability': 0.99})
 def test_dame_genderize_get2to10(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         string1 = dg.get2to10(["peter", "lois", "stevie"])
         self.assertEqual(string1, [{'count': 165452, 'gender': 'male', 'name': 'peter', 'probability': 0.99}, {'count': 2510, 'gender': 'female', 'name': 'lois', 'probability': 0.58}, {'count': 2568, 'gender': 'male', 'name': 'stevie', 'probability': 0.87}])
         string2 = dg.get2to10(["peter", "lois", "stevie", "john", "paul", "mike", "mary", "anna"])
         self.assertEqual(string2, [{"name":"peter","gender":"male","probability":0.99,"count":165452},{"name":"lois","gender":"female","probability":0.58,"count":2510},{"name":"stevie","gender":"male","probability":0.87,"count":2568},{"name":"john","gender":"male","probability":0.99,"count":218952},{"name":"paul","gender":"male","probability":0.99,"count":148099},{"name":"mike","gender":"male","probability":0.99,"count":109844},{"name":"mary","gender":"female","probability":0.99,"count":142684},{"name":"anna","gender":"female","probability":0.98,"count":383713}])
 def test_dame_genderize_guess_list(self):
     dg = DameGenderize()
     gl1 = dg.json2guess_list(jsonf="files/names/genderizefiles_names_min.csv.json", binary=True)
     self.assertEqual(gl1, [1, 1, 1, 1, 1, 0])
     gl2 = dg.json2guess_list(jsonf="files/names/genderizefiles_names_partialnoundefined.csv.json", binary=True)
     self.assertEqual(gl2, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1])
     gl3 = dg.json2guess_list(jsonf="files/names/genderizefiles_names_allnoundefined0.csv.json", binary=True)
     self.assertEqual(gl3, [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
 def test_dame_genderize_gender_list(self):
     dg = DameGenderize()
     gl = dg.gender_list(path="files/names/partial.csv")
     self.assertEqual(gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1,
                           2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(dg.females, 3)
     self.assertEqual(dg.males, 16)
     self.assertEqual(dg.unknown, 2)
 def test_dame_genderize_guess_list(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         self.assertEqual(['male', 'male', 'male', 'male', 'male',
                       'male', 'female', 'female', 'male', 'male'],
                      dg.guess_list(path="files/names/partial.csv",
                                    binary=False)[0:10])
         self.assertEqual([1, 1, 1, 1, 1, 1, 0, 0, 1, 1],
                      dg.guess_list(path="files/names/partial.csv",
                                    binary=True)[0:10])
Exemplo n.º 7
0
 def test_dame_genderize_gender_list_method_returns_correct_result(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         gl = dg.gender_list()
         self.assertEqual(gl, [
             1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1
         ])
         self.assertEqual(len(gl), 21)
         self.assertEqual(dg.females, 3)
         self.assertEqual(dg.males, 16)
         self.assertEqual(dg.unknown, 2)
Exemplo n.º 8
0
 def test_dame_genderize_guess_file_method_returns_correct_result(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         self.assertEqual(dg.guess("David", binary=True), 1)
Exemplo n.º 9
0
args = parser.parse_args()

du = DameUtils()

if (args.api == "all"):
    dg = Gender()

    if (dg.config['DEFAULT']['namsor'] == 'yes'):
        dn = DameNamsor()
        dn.pretty_gg_list(path=args.csv,
                          jsonf=args.jsondownloaded,
                          measure=args.measure,
                          api='Namsor')

    if (dg.config['DEFAULT']['genderize'] == 'yes'):
        dg = DameGenderize()
        dg.pretty_gg_list(path=args.csv,
                          jsonf=args.jsondownloaded,
                          measure=args.measure,
                          api='Genderize')

    dgg = DameGenderGuesser()
    dgg.pretty_gg_list(path=args.csv,
                       jsonf=args.jsondownloaded,
                       measure=args.measure,
                       api='Genderguesser')

    ds = DameSexmachine()
    ds.pretty_gg_list(path=args.csv,
                      jsonf=args.jsondownloaded,
                      measure=args.measure,
Exemplo n.º 10
0
    dn = DameNamsor()
    print("################### Namsor!!")
    gl = dn.gender_list(path=args.csv)
    print("Gender list: " + str(gl))
    sl = dn.guess_list(path=args.csv, binary=True)
    print("Guess list:  " +str(sl))

    namsor_accuracy = dn.accuracy_score_dame(gl, sl)

    #print(dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 1, 1]))
    #namsor_accuracy = dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])

    print("Namsor accuracy: %s" % namsor_accuracy)

    dg = DameGenderize()
    print("################### Genderize!!")
    gl = dg.gender_list(path=args.csv)
    print("Gender list: " + str(gl))
    sl = dg.guess_list(path=args.csv, binary=True)
    print("Guess list:  " +str(gl))

    genderize_accuracy = dg.accuracy_score_dame(gl,sl)
    print("Genderize accuracy: %s" % genderize_accuracy)

    dgg = DameGenderGuesser()
    print("################### GenderGuesser!!")
    gl = dgg.gender_list(path=args.csv)
    print("Gender list: " + str(gl))
    sl = dgg.guess_list(path=args.csv, binary=True)
    print("Guess list:  " +str(sl))
Exemplo n.º 11
0

print("A confusion matrix C is such that Ci,j is equal to the number of observations known to be in group i but predicted to be in group j.")
print("If the classifier is nice, the diagonal is high because there are true positives")


if (args.api == "all"):
    dn = DameNamsor()
    if (args.dimensions == "3x2"):
        print("Namsor confusion matrix:\n")
        dn.print_confusion_matrix_dame(path=args.csv)
    elif (args.dimensions == "3x3"):
        namsor_confusion_matrix = dn.confusion_matrix(path=args.csv)
        print("Namsor confusion matrix:\n %s" % namsor_confusion_matrix)

    dg = DameGenderize()
    if (args.dimensions == "3x2"):
        print("Genderize confusion matrix:\n")
        dg.print_confusion_matrix_dame(path=args.csv)
    elif (args.dimensions == "3x3"):
        dg = DameGenderize()
        genderize_confusion_matrix = dg.confusion_matrix(path=args.csv)
        print("Genderize confusion matrix:\n %s" % genderize_confusion_matrix)

    dgg = DameGenderGuesser()
    if (args.dimensions == "3x2"):
        print("Gender Guesser confusion matrix:\n")
        dgg.print_confusion_matrix_dame(path=args.csv)
    elif (args.dimensions == "3x3"):
        genderguesser_confusion_matrix = dgg.confusion_matrix(path=args.csv)
        print("Gender Guesser confusion matrix:\n %s" % genderguesser_confusion_matrix)
Exemplo n.º 12
0
parser.add_argument('--csv',
                    type=str,
                    required=True,
                    default="files/names/min.csv",
                    help='input file for names')
parser.add_argument('--api',
                    required=True,
                    choices=['namsor', 'genderize', 'genderapi', 'nameapi'])
parser.add_argument("--surnames",
                    default=False,
                    action="store_true",
                    help="Flag to surnames")
args = parser.parse_args()

if (args.api == 'genderize'):
    dg = DameGenderize()
    text1 = dg.download(path=args.csv, surnames=args.surnames)
elif (args.api == 'genderapi'):
    dga = DameGenderApi()
    if (dga.config['DEFAULT']['genderapi'] == 'yes'):
        if (dga.apikey_limit_exceeded_p() == False):
            text1 = dga.download(path=args.csv)
        elif (dga.apikey_count_requests() < len(dga.csv2names(args.csv))):
            print("You don't have enough requests with this api key")
        elif (dga.apikey_count_requests() >= len(dga.csv2names(args.csv))):
            text1 = dga.download(path=args.csv)
        else:
            print("You have not money with this api key")
    else:
        print("You must enable genderapi in config.cfg")
elif (args.api == 'namsor'):
Exemplo n.º 13
0
    ec = d.error_coded(gl1, gl2)
    print("+ The error code: %s" % ec)
    ecwa = d.error_coded_without_na(gl1, gl2)
    print("+ The error code without na: %s" % ecwa)
    naCoded = d.na_coded(gl1, gl2)
    print("+ The na coded: %s" % naCoded)
    egb = d.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" % egb)

elif ((args.api != "damegender") and (args.ml in [
        'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB', 'forest',
        'xgboost'
])):
    print("The machine learning prediction is only for damegender")
elif (args.api == "genderize"):
    d = DameGenderize()
    print("Genderize with %s has: " % args.csv)
    gl1 = d.gender_list(path=args.csv)
    if (os.path.isfile(args.jsondownloaded)):
        gl2 = d.json2guess_list(jsonf=args.jsondownloaded, binary=True)
    else:
        gl2 = d.guess_list(path=args.csv, binary=True)
    ec = d.error_coded(gl1, gl2)
    print("+ The error code: %s" % ec)
    ecwa = d.error_coded_without_na(gl1, gl2)
    print("+ The error code without na: %s" % ecwa)
    naCoded = d.na_coded(gl1, gl2)
    print("+ The na coded: %s" % naCoded)
    egb = d.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" % egb)
elif (args.api == "genderapi"):
Exemplo n.º 14
0
parser.add_argument('--version', action='version', version='0.1')

args = parser.parse_args()

du = DameUtils()

if (len(sys.argv) > 1):
    if (args.api == "genderguesser"):
        dgg = DameGenderGuesser()
        print(dgg.guess(args.name))
    elif (args.api == "genderapi"):
        dga = DameGenderApi()
        print(dga.guess(args.name, binary=False))
        print("accuracy: " + str(dga.accuracy(args.name)))
    elif (args.api == "genderize"):
        dg = DameGenderize()
        print(dg.guess(args.name))
        print("probability: " + str(dg.prob(args.name)))
    elif (args.api == "namsor"):
        dn = DameNamsor()
        if (du.is_not_blank(args.surname)):
            print(dn.guess(str(args.name), str(args.surname)))
            print("scale: " + str(dn.scale(str(args.name), str(args.surname))))
        else:
            print("Surname is required in namsor api")
    elif (args.api == "nameapi"):
        dn = DameNameapi()
        print(dn.guess(str(args.name), str(args.surname)))
        print("confidence: " + str(dn.confidence(str(args.name), str(args.surname))))
    elif (args.api == "average"):
        da = DameAll()
Exemplo n.º 15
0
 def test_dame_genderize_json2names(self):
     dg = DameGenderize()
     l = dg.json2names(jsonf="files/names/genderizefiles_names_min.csv.json")
     self.assertEqual(['Pierre', 'Raul', 'Adriano', 'Ralf',
                       'Guillermo', 'Sabina'], l)
Exemplo n.º 16
0
 def test_dame_genderize_prob(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         self.assertEqual(dg.prob("David"), 0.99)
Exemplo n.º 17
0
 def test_dame_genderize_guess(self):
     dg = DameGenderize()
     if (dg.config['DEFAULT']['genderize'] == 'yes'):
         self.assertEqual(dg.guess("David"), "male")
         self.assertEqual(dg.guess("David", binary=True), 1)