Python maxLikelyGroupForName示例

编程语言: Python

命名空间/包名称: yp_babyname.babyname

方法/功能: maxLikelyGroupForName

hotexamples.com的示例: 2

Python maxLikelyGroupForName - 已找到2个示例。这些是从开源项目中提取的最受好评的yp_babyname.babyname.maxLikelyGroupForName现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： main.py 项目： pyongjoo/tweets_process

def run_group():
    file_dir = "/home/pyongjoo/workspace/tweetsprocess/data/name-feature/"
    infile = file_dir + "screename-May10-AlmostVerified.csv"
    
    print infile
    
    csvreader = csv.reader(open(infile, 'rb'))

    conf_matrix = []
    for i in range(4):
        conf_matrix.append([0,0,0,0])
    
    for row in csvreader:
        screen_name = row[0]
        fullname = row[1]
        age = row[2]
    
        firstname = (fullname.split(' '))[0]
        age_group = babyname.ageToAgeGroup(age)
        
        group_prob = babyname.probHashInGroupForName(firstname)
        
        predicted_group = babyname.maxLikelyGroupForName(firstname)

        print (age_group, predicted_group,
               group_prob[0], group_prob[1], group_prob[2], group_prob[3])

        if predicted_group != -1:
            conf_matrix[age_group][predicted_group] += 1

    print "Confusion Matrix:"
    for i in range(len(conf_matrix)):
        for j in range(len(conf_matrix[0])):
            sys.stdout.write(str(conf_matrix[i][j]) + " ")
        print

示例#2

显示文件

文件： nameAggregate.py 项目： pyongjoo/twitter-research

def procedure1():
    '''
    TESTING THE FIRST NAME ON THE CENTRAL NODES (procedure1)

    1. Retrieve central nodes by selecting edges.this and removing duplicates.

    2. Get the age and name info of those guys by joining with users table and
    selecting appropriate column.

    3. Run the first name system to get the prediction, match the predicted
    group with the real age group.

    tested on 9pm Jun 26. working fine. maybe I can add an English filter.
    '''
    # Store pairs of name and age group for central nodes.
    # Most of the complicated operations are handled in db, and we only use
    # the name and the corresponding age info from the result set.
    centralNodes = []

    con = mdb.connect('localhost', 'yongjoo', 'Fgla4Zp0', 'yongjoo')

    with con:
        cur = con.cursor()
        cur.execute('''SELECT DISTINCT e.this, u.age, u.name
            FROM edges e
            INNER JOIN users u
            ON e.this = u.user_id
            ''')
        numrows = int(cur.rowcount)
        for i in range(numrows):
            row = cur.fetchone()
            name = row[2]
            age = row[1]
            ageGroup = ageToAgeGroup(age)
            centralNodes.append([name, ageGroup])


    # Validate

    # the number of cases where the db does not hold the first name.
    non_predictable_count = 0

    # confusion matrix
    confusion_mat = []
    for i in range(4):
        confusion_mat.append([0, 0, 0, 0])

    for [name, ageGroup] in centralNodes:
        firstname = name.split(' ')[0]
        predictGroup = maxLikelyGroupForName(firstname)

        if predictGroup == -1:
            non_predictable_count += 1
        else:
            confusion_mat[ageGroup][predictGroup] += 1


    # Report the result

    # report the accuracy
    nu = sum([confusion_mat[i][i] for i in range(4)])
    denom = sum([confusion_mat[i][j] for i in range(4) for j in range(4)])
    accuracy = float(nu) / float(denom)
    real_accuracy = (float(nu + non_predictable_count * 0.25) /
            float(denom + non_predictable_count))
    print "Accuracy: " + str(accuracy)
    print "Real accuracy: " + str(real_accuracy)

    # report the confusion matrix
    print "Confusion Matrix:"
    for i in range(4):
        for j in range(4):
            sys.stdout.write(str(confusion_mat[i][j]) + ' ')
        print