Пример #1
0
                'referenceValues'][ref_retrieved_index]
    return employee_dict


# -----------------
# Begin script
# -----------------

# Calculate the reference date retrieved value for all statements
whole_time_string_z = datetime.datetime.utcnow().isoformat(
)  # form: 2019-12-05T15:35:04.959311
dateZ = whole_time_string_z.split('T')[0]  # form 2019-12-05
ref_retrieved = '+' + dateZ + 'T00:00:00Z'  # form +2019-12-05T00:00:00Z as provided by Wikidata

filename = deptShortName + '-employees-with-wikidata.csv'
employees = vbc.readDict(filename)

# create a list of the employees who have Wikidata qIDs
qIds = []
for employee in employees:
    if employee['wikidataId'] != '':
        qIds.append(employee['wikidataId'])

# ------------------------------------------------------
# get all of the ORCID data that is already in Wikidata
#prop = 'P496' # ORCID ID
#value = '' # since no value is passed, the search will retrieve the value
#refProps = ['P813'] # retrieved

# The script determines what's being tracked with respect to references by whether these field name strings are empty or not.
field_name = 'orcid'
Пример #2
0
    'string': 'J.D.',
    'value': 'J.D.'
}]

# NCBI identification requirements:
# tool name and email address should be sent with all requests
# see https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
emailAddress = '*****@*****.**'  # put your email address here
toolName = 'VanderBot'  # give your application a name here

# -----------------
# Begin script
# -----------------

filename = deptShortName + '-employees.csv'
employees = vbc.readDict(filename)

filename = 'orcid_data.csv'
orcidData = vbc.readDict(filename)

testRatio = 90
departmentTestRatio = 90
for employeeIndex in range(0, len(employees)):
    matched = False
    for row in orcidData:
        name = row['givenNames'] + ' ' + row['familyName']
        #ratio = fuzz.ratio(name, employees[employeeIndex][0])
        #partialRatio = fuzz.partial_ratio(name, employees[employeeIndex][0])
        #sortRatio = fuzz.token_sort_ratio(name, employees[employeeIndex][0])

        output = ''