def main(locname, relLoc="./"): csv.field_size_limit(sys.maxsize) locD = geo.readIdToLoc(locname) flist = glob.glob(relLoc + '20*') fname = flist.pop() fin = csv.reader(open(fname + '/profiles.csv', 'r')) pDict = prof.builddict(fin) uin = csv.reader(open(fname +'/users.csv', 'r')) uDict = user.builddict(uin) fullR = bcr.buildRosterDict(pDict, uDict, locD) for f in flist: fin = csv.reader(open(f + '/profiles.csv', 'r')) addDict = prof.builddict(fin) uin = csv.reader(open(f + '/users.csv', 'r')) uDict = user.builddict(uin) addR = bcr.buildRosterDict(addDict, uDict, locD) for i in iter(addR): if i not in fullR: fullR[i] = addR[i] outname = relLoc + 'FullRoster.csv' bcr.writeRoster(fullR, outname)
def main(): ulist = glob.glob('*/*/users.csv') idDict = {} nameDict = {} dupNameDict = {} dupIdDict = {} for fname in ulist: fin = open(fname, 'r') fcsv = csv.reader(fin) udict = user.builddict(fcsv) for u in iter(udict): if u not in idDict: idDict[u] = udict[u].username elif idDict[u] != udict[u].username: addDup(dupNameDict, u, idDict[u], udict[u].username) if udict[u].username not in nameDict: nameDict[udict[u].username] = u elif nameDict[udict[u].username] != u: addDup(dupIdDict, udict[u].username, nameDict[udict[u].username], u) fin.close() idOut = csv.writer(open('globalid2name.csv', 'w')) idOut.writerow(['User ID', 'User Name']) for i in iter(idDict): idOut.writerow([i, idDict[i]]) nameOut = csv.writer(open('globalname2id.csv', 'w')) nameOut.writerow(['User Name', 'User ID']) for n in iter(nameDict): nameOut.writerow([n, nameDict[n]]) if len(dupNameDict) > 0: nameDupOut = csv.writer(open('nameDups.csv', 'w')) for u in iter(dupNameDict): nameDupOut.writerow[u, dupNameDict[u]] else: print("No duplicate names found") if len(dupIdDict) > 0: idDupOut = csv.writer(open('idDups.csv', 'w')) for u in iter(dupIdDict): idDupOut.write([u, dupIdDict[u]]) else: print("No duplicate ids found")
def main(): ulist = glob.glob('*/*/users.csv') idDict = {} nameDict = {} dupNameDict = {} dupIdDict = {} for fname in ulist: fin = open(fname, 'r') fcsv = csv.reader(fin) udict = user.builddict(fcsv) buildMaps(udict, idDict, nameDict, dupNameDict, dupIdDict) fin.close() idOut = csv.writer(open('globalid2name.csv', 'w')) idOut.writerow(['User ID', 'User Name']) for i in iter(idDict): idOut.writerow([i, idDict[i]]) nameOut = csv.writer(open('globalname2id.csv', 'w')) nameOut.writerow(['User Name', 'User ID']) for n in iter(nameDict): nameOut.writerow([n, nameDict[n]]) if len(dupNameDict) > 0: nameDupOut = csv.writer(open('nameDups.csv', 'w')) for u in iter(dupNameDict): nameDupOut.writerow[u, dupNameDict[u]] else: print("No duplicate names found") if len(dupIdDict) > 0: idDupOut = csv.writer(open('idDups.csv', 'w')) for u in iter(dupIdDict): idDupOut.write([u, dupIdDict[u]]) else: print("No duplicate ids found")
if (len(sys.argv) < 2): print( 'Usage: buildAnonProfile.py courseName profileFile userFile countryFile certFile' ) sys.exit() csv.field_size_limit(1000000) out_name = sys.argv[1] + 'anonProfile.csv' o1 = csv.writer(open(out_name, 'w')) ufile = csv.reader(open(sys.argv[2], 'r')) uprof = prof.builddict(ufile) udfile = csv.reader(open(sys.argv[3], 'r')) udict = user.builddict(udfile) countryFile = csv.reader(open(sys.argv[4], 'r')) locDict = geo.builddict(countryFile) certs = False if (len(sys.argv) > 5): certfile = csv.reader(open(sys.argv[5], 'r')) certDict = cs.builddict(certfile) certs = True students = uprof.keys() for s in students: p = uprof[s] if (s in udict): usrName = udict[s].username
ck_course = sys.argv[1] dump2 = 'harvardx-2013-06-16' dump1 = 'harvardx-2013-06-02' userFile = '/' + ck_course + '/users.csv' certFile = '/' + ck_course + '/certificates.csv' enroll = '/' + ck_course + '/enrollment.csv' uf1 = csv.reader(open(dump1 + userFile, 'r')) uf2 = csv.reader(open(dump2 + userFile, 'r')) cf1 = csv.reader(open(dump1 + certFile, 'r')) cf2 = csv.reader(open(dump2 + certFile, 'r')) ef1 = csv.reader(open(dump1 + enroll, 'r')) ef2 = csv.reader(open(dump2 + enroll, 'r')) u1dict = user.builddict(uf1) u2dict = user.builddict(uf2) c1dict = certificates.builddict(cf1) c2dict = certificates.builddict(cf2) e1dict = course_enrollment.builddict(ef1) e2dict = course_enrollment.builddict(ef2) OneNotTwo = compareUsers(u1dict, u2dict) TwoNotOne = compareUsers(u2dict, u1dict) for u in iter(OneNotTwo): if u in c1dict and c1dict[u].status == 'downloadable': OneNotTwo[u] = 'y' for u in iter(TwoNotOne): if u in c2dict and c2dict[u].status == 'downloadable':
rl = rosterLine(sid, name, uname, maddr, cnt, age, edu, gender) retDict[sid] = rl ofile.close() return retDict def writeRoster(rDict, filein): ''' Write a roster dictionary to a .csv file Write a class roster dictionary to a .csv file named by the input parameter. ''' ofile = open(filein, 'w') rf = csv.writer(ofile) rf.writerow(['Student ID', 'Name', 'User Name', 'Email', 'Country', 'Age', 'Education Level', 'Gender']) for s in iter(rDict): wl = rDict[s] rf.writerow([wl.sid, wl.name, wl.uname, wl.maddr, wl.cnt, wl.age, wl.ed, wl.gender]) ofile.close() if __name__ == '__main__': csv.field_size_limit(sys.maxsize) cl_name = sys.argv[1] proDict = profile.builddict(csv.reader(open(cl_name + '/profiles.csv', 'r'))) uDict = user.builddict(csv.reader(open(cl_name + '/users.csv', 'r'))) loc_name = sys.argv[2] locDict = geo.readIdToLoc(loc_name) rDict = buildRosterDict(proDict, uDict, locDict) writeRoster(rDict, cl_name + '/class_roster.csv')
Looks at the user, enrollment, and profile file in the directory in which the script is run to insure that all of the entries in the user file have entries in the enrollment and profiles file, and that fall of the entries in the profiles and enrollment file have entries in the user file. """ import csv import user import demographics.userprofile as uprofile import course_enrollment as ce csv.field_size_limit(1000000) uIn = csv.reader(open("users.csv", "r")) uDict = user.builddict(uIn) upIn = csv.reader(open("profiles.csv", "r")) upDict = uprofile.builddict(upIn) ceIn = csv.reader(open("enrollment.csv", "r")) ceDict = ce.builddict(ceIn) of = csv.writer(open("userDiffs.csv", "w")) of.writerow(["ids in user file, not in profiles file"]) for u in iter(uDict): if u not in upDict: of.writerow([u]) of.writerow(["ids in profiles file, not in user file"])
ck_course = sys.argv[1] dump2 = 'harvardx-2013-06-16' dump1 = 'harvardx-2013-06-02' userFile = '/' + ck_course + '/users.csv' certFile = '/' + ck_course + '/certificates.csv' enroll = '/' + ck_course + '/enrollment.csv' uf1 = csv.reader(open(dump1 + userFile, 'r')) uf2 = csv.reader(open(dump2 + userFile, 'r')) cf1 = csv.reader(open(dump1 + certFile, 'r')) cf2 = csv.reader(open(dump2 + certFile, 'r')) ef1 = csv.reader(open(dump1 + enroll, 'r')) ef2 = csv.reader(open(dump2 + enroll, 'r')) u1dict = user.builddict(uf1) u2dict = user.builddict(uf2) c1dict = certificates.builddict(cf1) c2dict = certificates.builddict(cf2) e1dict = course_enrollment.builddict(ef1) e2dict = course_enrollment.builddict(ef2) OneNotTwo = compareUsers(u1dict, u2dict) TwoNotOne = compareUsers(u2dict, u1dict) for u in iter(OneNotTwo): if u in c1dict and c1dict[u].status =='downloadable': OneNotTwo[u] = 'y' for u in iter(TwoNotOne): if u in c2dict and c2dict[u].status == 'downloadable':
self.country = '' self.classList = [className] geoFile = csv.reader(open(sys.argv[1], 'r')) geoDict = loc.builddict(geoFile) dirList = glob.glob('[A-Z]*') classDict = {} for d in dirList: filein = open(d+'/users.csv', 'r') fin = csv.reader(filein) cName = d fin.next() udict = user.builddict(fin) for u in iter(udict): if u in classDict: classDict[u].numClasses += 1 classDict[u].classList.append(cName) if udict[u].username != classDict[u].uname: classDict[u].uname = 'Duplicate user name' else: classDict[u] = userClasses(udict[u].username, cName) if udict[u].username in geoDict: classDict[u].country = geoDict[udict[u].username] filein.close() outf = csv.writer(open('studentClassList.csv', 'w')) outf.writerow(['user Id', 'User name','country', 'number of classes', 'classes'])
retDict[username] = country return retDict if len(sys.argv) > 3: cFileName = sys.argv[1] userFileName = sys.argv[2] clName = sys.argv[3] else: cFileName = utils.getFileName('user name to country file') userFileName = utils.getFileName('user file') clName = raw_input("Please enter the name of the class : ") cfile = csv.reader(open(cFileName, 'r')) nameDict = buildNameCountry(cfile) ufile = csv.reader(open(userFileName, 'r')) userDict = user.builddict(ufile) clfName = clName + '_id_country.csv' outfile = csv.writer(open(clfName, 'w')) users = userDict.keys() outfile.writerow(['User id', 'Country']) for u in users: userName = userDict[u].username if (userName in nameDict): country = nameDict[userDict[u].username] outfile.writerow([u, country]) else: print ('unknown userName ' + userName)
if __name__ == '__main__': if len(sys.argv) > 1: locname = sys.argv[1] else: locname = utils.getFileName('Enter name of the id=>location file :') csv.field_size_limit(sys.maxsize) locD = geo.readIdToLoc(locname) flist = glob.glob('20*') fname = flist.pop() fin = csv.reader(open(fname + '/profiles.csv', 'r')) pDict = prof.builddict(fin) uin = csv.reader(open(fname +'/users.csv', 'r')) uDict = user.builddict(uin) fullR = bcr.buildRosterDict(pDict, uDict, locD) for f in flist: fin = csv.reader(open(f + '/profiles.csv', 'r')) addDict = prof.builddict(fin) uin = csv.reader(open(f + '/users.csv', 'r')) uDict = user.builddict(uin) addR = bcr.buildRosterDict(addDict, uDict, locD) for i in iter(addR): if i not in fullR: fullR[i] = addR[i] outname = 'FullRoster.csv' bcr.writeRoster(fullR, outname)
''' import glob import sys import csv import user course = sys.argv[1] flist = glob.glob('harvardx*/' + course + '/users.csv') if len(flist) < 2: exit() f = iter(flist).next() flist.remove(f) ufile = open(f, 'r') oldDict = user.builddict(ufile) ufile.close() out = csv.writer(open(course+'diffs.csv', 'w')) for f in flist: ufile = open(f, 'r') newDict = user.builddict(csv.reader(ufile)) ufile.close() out.writerow(['In older course list, not new']) i = 0 for u in iter(oldDict): if u not in newDict: out.writerow([u, oldDict[u].username]) i += 1 out.writerow(['Total deleted between files: ', str(i)]) i = 0 out.writerow(['In new course list, not old'])
script is run, and takes as arguments the file names of the two user files to be compared. ''' import user import csv import sys import certificates f1 = csv.reader(open(sys.argv[1], 'r')) f2 = csv.reader(open(sys.argv[2], 'r')) f3 = csv.writer(open('additions.csv', 'w')) f4 = csv.reader(open('certificates.csv', 'r')) f3.writerow(['id', 'in certificate file']) f3.writerow(['User ids in first file, not in second']) u1 = user.builddict(f1) u2 = user.builddict(f2) cdict = certificates.builddict(f4) for key in u1.iterkeys(): if u1[key].id not in u2: if key in cdict: f3.writerow([key, 'Yes']) else: f3.writerow([key, 'No']) f3.writerow(['User ids in second file, not in first']) for key in u2.iterkeys(): if u2[key].id not in u1: if key in cdict:
Write a roster dictionary to a .csv file Write a class roster dictionary to a .csv file named by the input parameter. ''' ofile = open(filein, 'w') rf = csv.writer(ofile) rf.writerow([ 'Student ID', 'Name', 'User Name', 'Email', 'Country', 'Age', 'Education Level', 'Gender' ]) for s in iter(rDict): wl = rDict[s] rf.writerow([ wl.sid, wl.name, wl.uname, wl.maddr, wl.cnt, wl.age, wl.ed, wl.gender ]) ofile.close() if __name__ == '__main__': csv.field_size_limit(sys.maxsize) cl_name = sys.argv[1] proDict = profile.builddict( csv.reader(open(cl_name + '/profiles.csv', 'r'))) uDict = user.builddict(csv.reader(open(cl_name + '/users.csv', 'r'))) loc_name = sys.argv[2] locDict = geo.readIdToLoc(loc_name) rDict = buildRosterDict(proDict, uDict, locDict) writeRoster(rDict, cl_name + '/class_roster.csv')
''' import glob import sys import csv import user course = sys.argv[1] flist = glob.glob('harvardx*/' + course + '/users.csv') if len(flist) < 2: exit() f = iter(flist).next() flist.remove(f) ufile = open(f, 'r') oldDict = user.builddict(ufile) ufile.close() out = csv.writer(open(course + 'diffs.csv', 'w')) for f in flist: ufile = open(f, 'r') newDict = user.builddict(csv.reader(ufile)) ufile.close() out.writerow(['In older course list, not new']) i = 0 for u in iter(oldDict): if u not in newDict: out.writerow([u, oldDict[u].username]) i += 1 out.writerow(['Total deleted between files: ', str(i)]) i = 0 out.writerow(['In new course list, not old'])
''' retDict= {} fin.readrow() for name, iden in fin: retDict[name] = iden return retDict if __name__ == '__main__': ulist = glob.glob('*/*/users.csv') idDict = {} nameDict = {} for fname in ulist: fin = open(fname, 'r') fcsv = csv.reader(fin) udict = user.builddict(fcsv) for u in iter(udict): idDict[u] = udict[u].username nameDict[udict[u].username] = u fin.close() idOut = csv.writer(open('globalid2name.csv', 'w')) idOut.writerow(['User ID', 'User Name']) for i in iter(idDict): idOut.writerow([i, idDict[i]]) nameOut = csv.writer(open('globalname2id.csv', 'w')) nameOut.writerow(['User Name', 'User ID']) for n in iter(nameDict): nameOut.writerow([n, nameDict[n]])
if (len(sys.argv) < 2): print('Usage: buildAnonProfile.py courseName profileFile userFile countryFile certFile') sys.exit() csv.field_size_limit(1000000) out_name = sys.argv[1] + 'anonProfile.csv' o1 = csv.writer(open(out_name, 'w')) ufile = csv.reader(open(sys.argv[2], 'r')) uprof = prof.builddict(ufile) udfile = csv.reader(open(sys.argv[3], 'r')) udict = user.builddict(udfile) countryFile = csv.reader(open(sys.argv[4], 'r')) locDict = geo.builddict(countryFile) certs = False if (len(sys.argv) > 5): certfile = csv.reader(open(sys.argv[5], 'r')) certDict = cs.builddict(certfile) certs = True students = uprof.keys() for s in students: p = uprof[s] if (s in udict):
#!/usr/bin/env python import user import certificates import os import csv def getFileName(prompt): while True: fname = raw_input("Please enter file name for " + prompt + " : ") if os.path.exists(fname): return fname else: print("file entered does not exist, please retry") if __name__ == "__main__": f1name = getFileName("Enter name of the user file") f1 = csv.reader(open(f1name, "r")) f2name = getFileName("Enter the name of the certificates file") f2 = csv.reader(open(f2name, "r")) udict = user.builddict(f1) cdict = certificates.builddict(f2) out1 = open("allmail", "w") out2 = open("certMail", "w") for u in iter(udict): out1.write(udict[u].email + "\n") if u in cdict: out2.write(udict[u].email + "\n")
Checks to insure that users appear in the enrollment and profiles file Looks at the user, enrollment, and profile file in the directory in which the script is run to insure that all of the entries in the user file have entries in the enrollment and profiles file, and that fall of the entries in the profiles and enrollment file have entries in the user file. ''' import csv import user import demographics.userprofile as uprofile import course_enrollment as ce csv.field_size_limit(1000000) uIn = csv.reader(open('users.csv', 'r')) uDict = user.builddict(uIn) upIn = csv.reader(open('profiles.csv', 'r')) upDict = uprofile.builddict(upIn) ceIn = csv.reader(open('enrollment.csv', 'r')) ceDict = ce.builddict(ceIn) of = csv.writer(open('userDiffs.csv', 'w')) of.writerow(['ids in user file, not in profiles file']) for u in iter(uDict): if u not in upDict: of.writerow([u]) of.writerow(['ids in profiles file, not in user file'])
#!/usr/bin/env python import user import certificates import os import csv def getFileName(prompt): while (True): fname = raw_input("Please enter file name for " + prompt + ' : ') if os.path.exists(fname): return fname else: print("file entered does not exist, please retry") if __name__ == '__main__': f1name = getFileName('Enter name of the user file') f1 = csv.reader(open(f1name, 'r')) f2name = getFileName('Enter the name of the certificates file') f2 = csv.reader(open(f2name, 'r')) udict = user.builddict(f1) cdict = certificates.builddict(f2) out1 = open('allmail', 'w') out2 = open('certMail', 'w') for u in iter(udict): out1.write(udict[u].email + '\n') if u in cdict: out2.write(udict[u].email + '\n')
return retDict if len(sys.argv) > 3: cFileName = sys.argv[1] userFileName = sys.argv[2] clName = sys.argv[3] else: cFileName = utils.getFileName('user name to country file') userFileName = utils.getFileName('user file') clName = raw_input("Please enter the name of the class : ") cfile = csv.reader(open(cFileName, 'r')) nameDict = buildNameCountry(cfile) ufile = csv.reader(open(userFileName, 'r')) userDict = user.builddict(ufile) clfName = clName + '_id_country.csv' outfile = csv.writer(open(clfName, 'w')) users = userDict.keys() outfile.writerow(['User id', 'Country']) for u in users: userName = userDict[u].username if (userName in nameDict): country = nameDict[userDict[u].username] outfile.writerow([u, country]) else: print('unknown userName ' + userName)
self.country = '' self.classList = [className] geoFile = csv.reader(open(sys.argv[1], 'r')) geoDict = loc.builddict(geoFile) dirList = glob.glob('[A-Z]*') classDict = {} for d in dirList: filein = open(d + '/users.csv', 'r') fin = csv.reader(filein) cName = d fin.next() udict = user.builddict(fin) for u in iter(udict): if u in classDict: classDict[u].numClasses += 1 classDict[u].classList.append(cName) if udict[u].username != classDict[u].uname: classDict[u].uname = 'Duplicate user name' else: classDict[u] = userClasses(udict[u].username, cName) if udict[u].username in geoDict: classDict[u].country = geoDict[udict[u].username] filein.close() outf = csv.writer(open('studentClassList.csv', 'w')) outf.writerow(