def run(self, projectName): self.setup(projectName) self.users = {} for courseId in range(len(self.courseDatasets)): courseName = self.courseDatasets[courseId].name print('Loading ' + courseName) DBSetup.switch(self.courseDatasets[courseId]) try: self.users[courseName]= set(self.loadUsers()) self.users[courseName][1000] except: continue courseNames = self.users.keys() results = np.zeros((len(courseNames),len(courseNames))) #,dtype=np.int) for i in range(len(courseNames)): courseA = self.users[courseNames[i]] print('Intersecting against ' + courseNames[i]) for j in range(len(courseNames)): courseB = self.users[courseNames[j]] overlap = len(courseA.intersection(courseB)) results[i,j] = overlap/float(len(courseA)) path = os.path.join(self.resultsDir, 'results.csv') np.savetxt(path, results, delimiter=",",fmt = '%1.5f') path = os.path.join(self.resultsDir, 'courseList.csv') with open(path,'wt') as fid: for i,course in zip(range(len(courseNames)),courseNames): fid.write(str(i) + '\t' + course + '\n')
def run(self, forcePreprocessOption, forceTrainOption): for dbName in self.dbNames: self.currDB = dbName DBSetup.switch(self.currDB) self.currResultDir = self.createResultDir() self.currDataDir = self.createDataDir() self.setCurrPaths() self.loadThreadTextsRaw() self.preprocessText(forcePreprocessOption) self.trainTopicModel(self.topicModelParams, forceTrainOption) self.createWordleForm(self.topicModelParams) self.createTopicSummary(self.topicModelParams)
def run(self, projectName): self.viewBounds = FileSystem.loadViewBounds() self.setup(projectName) for courseId in range(len(self.courseDatasets)): self.courseName = self.courseDatasets[courseId].name print('Loading ' + self.courseName) DBSetup.switch(self.courseDatasets[courseId]) try: self.views = self.loadViews() self.views[1000] self.viewBounds[self.courseName] except: continue path = os.path.join(self.resultsDir, self.courseName + '.png') self.plotHist(courseId, path)
def run(self, projectName): self.setup(projectName) self.users = {} numSurveyRespondents = [] numPosters = [] numUsers = [] numIntersection = [] allUsers = [] for courseId in range(len(self.courseDatasets)): courseName = self.courseDatasets[courseId].name if courseName not in self.courseNames: continue print('Loading ' + courseName) DBSetup.switch(self.courseDatasets[courseId]) try: self.users[courseName] = list(set(self.loadUsers())) self.users[courseName][100] except KeyError: continue allUsers += self.users[courseName] self.userMap = UserMap(True) surveyRespondents = set(self.loadSurveyRespondents()) posters = set(self.loadPosters()) numUsers.append(len(self.users[courseName])) numSurveyRespondents.append(len(surveyRespondents)) numPosters.append(len(posters)) numIntersection.append(len( posters.intersection(surveyRespondents))) totalEnrollments = sum(numUsers) totalUsers = len(set(allUsers)) totalSurveyRespondents = sum(numSurveyRespondents) fracPosters = sum(numPosters) / float(totalEnrollments) fracPostersSurveyed = sum(numIntersection) / float( totalSurveyRespondents) path = os.path.join(self.resultsDir, 'basicStats.csv') with open(path, 'wt') as fid: fid.write('Number of enrollments: ' + str(totalEnrollments) + '\n') fid.write('Number of users: ' + str(totalUsers) + '\n') fid.write('Number of survey respondents: ' + str(totalSurveyRespondents) + '\n') fid.write('Fraction of enrolled users who posted: ' + str(fracPosters) + '\n') fid.write('Fraction of survey respondents who posted: ' + str(fracPostersSurveyed) + '\n')
def main(): dbSetup = DBSetup() customerDatabaseMapping = CustomerDatabaseMapping(dbSetup) dbExecuteSQL = DBExecuteSQL() customerDatabaseMapping.customerDataBaseSetup() print(customerDatabaseMapping.getCustomerData()) allCustomers = customerDatabaseMapping.createAllCustomers() print(len(allCustomers)) print(customerDatabaseMapping.CustomerFromStub())
def run(self, projectName): self.setup(projectName) self.users = {} numSurveyRespondents = [] numPosters = [] numUsers = [] numIntersection = [] allUsers = [] for courseId in range(len(self.courseDatasets)): courseName = self.courseDatasets[courseId].name if courseName not in self.courseNames: continue print('Loading ' + courseName) DBSetup.switch(self.courseDatasets[courseId]) try: self.users[courseName]= list(set(self.loadUsers())) self.users[courseName][100] except KeyError: continue allUsers += self.users[courseName] self.userMap = UserMap(True) surveyRespondents = set(self.loadSurveyRespondents()) posters = set(self.loadPosters()) numUsers.append(len(self.users[courseName])) numSurveyRespondents.append(len(surveyRespondents)) numPosters.append(len(posters)) numIntersection.append(len(posters.intersection(surveyRespondents))) totalEnrollments = sum(numUsers) totalUsers = len(set(allUsers)) totalSurveyRespondents = sum(numSurveyRespondents) fracPosters = sum(numPosters)/float(totalEnrollments) fracPostersSurveyed = sum(numIntersection) / float(totalSurveyRespondents) path = os.path.join(self.resultsDir, 'basicStats.csv') with open(path,'wt') as fid: fid.write('Number of enrollments: ' + str(totalEnrollments) + '\n') fid.write('Number of users: ' + str(totalUsers) + '\n') fid.write('Number of survey respondents: ' + str(totalSurveyRespondents) + '\n') fid.write('Fraction of enrolled users who posted: ' + str(fracPosters) + '\n') fid.write('Fraction of survey respondents who posted: ' + str(fracPostersSurveyed) + '\n')
def run(self, projectName): self.setup(projectName) fTypes = {} for course in self.courseDatasets: DBSetup.switch(course) try: forums = list(ForumForums.objects.filter(deleted = 0, parent_id = 0)) except: continue for forum in forums: try: fTypes[forum.name].append((course.name, forum.description)) except KeyError: fTypes[forum.name] = [(course.name, forum.description)] path = os.path.join(self.resultsDir,'summary.txt') self.writeSummary(path, fTypes)
def run(self, projectName): self.setup(projectName) fTypes = {} for course in self.courseDatasets: DBSetup.switch(course) try: forums = list( ForumForums.objects.filter(deleted=0, parent_id=0)) except: continue for forum in forums: try: fTypes[forum.name].append((course.name, forum.description)) except KeyError: fTypes[forum.name] = [(course.name, forum.description)] path = os.path.join(self.resultsDir, 'summary.txt') self.writeSummary(path, fTypes)
def computeFeatures(self): logging.info('computeFeatures(' + self.currDB + ')') DBSetup.switch(self.currDB) threads = ForumThreads.objects.all() posts = ForumPosts.objects.all() comments = ForumComments.objects.all() postMap = self.getPostThreadMap(posts) features = {} features['numPosts'] = self.getNumPosts(threads) features['avgPostLen'] = self.getAvgPostLen(threads, posts) features['postTime'] = self.postTime(threads) features['postTimeRank'] = self.postTimeRank(threads) features['timeToFirstResponse'] = self.timeToFirstResponse(threads,posts) features['numContributors'] = self.numContributors(threads,posts,comments,postMap) features['timeOfHighestVotedPost'] = self.timeOfHighestVotedPost(threads,posts) features['originalPostVotes'] = self.originalPostVotes(threads,posts) features['originalPostNumTokens'] = self.originalPostNumTokens(threads,posts) return features
def handler(self): if options.runLocally == True: try: self.courseId = int(args[0]) runAll = False except (ValueError, IndexError): runAll = True if runAll == False: self.currCourse = self.getCourse() #self._setupCourseDirs() DBSetup.switch(self.currCourse) self.runner() else: for courseId in range(len(self.courseDatasets)): self.courseId = courseId self.currCourse = self.getCourse() #self._setupCourseDirs() DBSetup.switch(self.currCourse) self.runner() sys.exit(0) try: int(args[0]) argExists = True except (ValueError,IndexError): argExists = False if argExists: self.courseId = int(args[0]) self.currCourse = self.getCourse() #self._setupCourseDirs() DBSetup.switch(self.currCourse) self.runner() sys.exit(0) else: for courseId in range(len(self.courseDatasets)): self._submitJob(courseId)
def handler(self): if options.runLocally == True: try: self.courseId = int(args[0]) runAll = False except (ValueError, IndexError): runAll = True if runAll == False: self.currCourse = self.getCourse() # self._setupCourseDirs() DBSetup.switch(self.currCourse) self.runner() else: for courseId in range(len(self.courseDatasets)): self.courseId = courseId self.currCourse = self.getCourse() # self._setupCourseDirs() DBSetup.switch(self.currCourse) self.runner() sys.exit(0) try: int(args[0]) argExists = True except (ValueError, IndexError): argExists = False if argExists: self.courseId = int(args[0]) self.currCourse = self.getCourse() # self._setupCourseDirs() DBSetup.switch(self.currCourse) self.runner() sys.exit(0) else: for courseId in range(len(self.courseDatasets)): self._submitJob(courseId)
class Testing(unittest.TestCase): dbSetup = DBSetup() customerMapping = CustomerDatabaseMapping(dbSetup) def testCustomerFromStub(self): customer = self.customerMapping.CustomerFromStub() data = [['*****@*****.**', 'Samuel', 'Lip', '1234']] self.assertEqual(customer, data) def testCustomerFromMock(self): customer = self.customerMapping.CustomerFromMock() data = [['*****@*****.**', 'Samuel', 'Lip', '1234']] self.assertEqual(customer, data) def testCustomerFromFake(self): customer = self.customerMapping.CustomerFromFake() test = self.customerMapping.testData() self.assertEquals(customer, test)
sys.path.append('../util') from FileSystem import FileSystem sys.path.append(FileSystem.getRootDir()) sys.path.append(FileSystem.getSiteDir()) from DBSetup import DBSetup from lyticssite.forumModels.models import * from lyticssite.generalModels.models import * from lyticssite.eventModels.models import * courseDatasetInfo = FileSystem.loadCourseDatasetInfo() for course in courseDatasetInfo: print(course.name) DBSetup.switch(course) #print('\t-----------------') #forums = ForumForums.objects.all() try: userCount = Users.objects.count() print('\tUser Count: ' + str(userCount)) except: pass try: surveyCount = Demographic.objects.count() print('\tSurvey response count: ' + str(surveyCount)) except: pass
def __init__(self): self.dbSetup = DBSetup() self.customerDatabaseMapping = CustomerDatabaseMapping(self.dbSetup)
import os.path sys.path.append('../util') from FileSystem import FileSystem sys.path.append(FileSystem.getRootDir()) sys.path.append(FileSystem.getSiteDir()) from DBSetup import DBSetup from lyticssite.forumModels.models import * from lyticssite.generalModels.models import * from lyticssite.eventModels.models import * courseDatasetInfo = FileSystem.loadCourseDatasetInfo() for course in courseDatasetInfo: print(course.name) DBSetup.switch(course) #print('\t-----------------') #forums = ForumForums.objects.all() try: userCount = Users.objects.count() print('\tUser Count: ' + str(userCount)) except: pass try: surveyCount = Demographic.objects.count() print('\tSurvey response count: ' + str(surveyCount)) except: pass