示例#1
0
    def run(self, projectName):
        self.setup(projectName)
        self.users = {}
        for courseId in range(len(self.courseDatasets)):
            courseName = self.courseDatasets[courseId].name
            print('Loading ' + courseName)
            DBSetup.switch(self.courseDatasets[courseId])
            try:
                self.users[courseName]= set(self.loadUsers())
                self.users[courseName][1000]
            except:
                continue

        courseNames = self.users.keys()
        results = np.zeros((len(courseNames),len(courseNames))) #,dtype=np.int)
        for i in range(len(courseNames)):
            courseA = self.users[courseNames[i]]
            print('Intersecting against ' + courseNames[i])
            for j in range(len(courseNames)):
                courseB = self.users[courseNames[j]]
                overlap = len(courseA.intersection(courseB))
                results[i,j] = overlap/float(len(courseA))

        path = os.path.join(self.resultsDir, 'results.csv')
        np.savetxt(path, results, delimiter=",",fmt = '%1.5f')
        path = os.path.join(self.resultsDir, 'courseList.csv')
        with open(path,'wt') as fid:
            for i,course in zip(range(len(courseNames)),courseNames):
                fid.write(str(i) + '\t' + course + '\n')
示例#2
0
    def run(self, forcePreprocessOption, forceTrainOption):
        for dbName in self.dbNames:
            self.currDB = dbName
            DBSetup.switch(self.currDB)

            self.currResultDir = self.createResultDir()
            self.currDataDir = self.createDataDir()
            self.setCurrPaths()

            self.loadThreadTextsRaw()
            self.preprocessText(forcePreprocessOption)
            self.trainTopicModel(self.topicModelParams, forceTrainOption)
            self.createWordleForm(self.topicModelParams)
            self.createTopicSummary(self.topicModelParams)
示例#3
0
    def run(self, projectName):
        self.viewBounds = FileSystem.loadViewBounds()
        self.setup(projectName)

        for courseId in range(len(self.courseDatasets)):
            self.courseName = self.courseDatasets[courseId].name
            print('Loading ' + self.courseName)
            DBSetup.switch(self.courseDatasets[courseId])
            try:
                self.views = self.loadViews()
                self.views[1000]
                self.viewBounds[self.courseName]
            except:
                continue
            path = os.path.join(self.resultsDir, self.courseName + '.png')
            self.plotHist(courseId, path)
示例#4
0
    def run(self, projectName):
        self.viewBounds = FileSystem.loadViewBounds()
        self.setup(projectName)

        for courseId in range(len(self.courseDatasets)):
            self.courseName = self.courseDatasets[courseId].name
            print('Loading ' + self.courseName)
            DBSetup.switch(self.courseDatasets[courseId])
            try:
                self.views = self.loadViews()
                self.views[1000]
                self.viewBounds[self.courseName]
            except:
                continue
            path = os.path.join(self.resultsDir, self.courseName + '.png')
            self.plotHist(courseId, path)
示例#5
0
    def run(self, projectName):
        self.setup(projectName)
        self.users = {}
        numSurveyRespondents = []
        numPosters = []
        numUsers = []
        numIntersection = []
        allUsers = []
        for courseId in range(len(self.courseDatasets)):
            courseName = self.courseDatasets[courseId].name
            if courseName not in self.courseNames:
                continue
            print('Loading ' + courseName)
            DBSetup.switch(self.courseDatasets[courseId])
            try:
                self.users[courseName] = list(set(self.loadUsers()))
                self.users[courseName][100]
            except KeyError:
                continue
            allUsers += self.users[courseName]
            self.userMap = UserMap(True)
            surveyRespondents = set(self.loadSurveyRespondents())
            posters = set(self.loadPosters())

            numUsers.append(len(self.users[courseName]))
            numSurveyRespondents.append(len(surveyRespondents))
            numPosters.append(len(posters))
            numIntersection.append(len(
                posters.intersection(surveyRespondents)))

        totalEnrollments = sum(numUsers)
        totalUsers = len(set(allUsers))
        totalSurveyRespondents = sum(numSurveyRespondents)
        fracPosters = sum(numPosters) / float(totalEnrollments)
        fracPostersSurveyed = sum(numIntersection) / float(
            totalSurveyRespondents)
        path = os.path.join(self.resultsDir, 'basicStats.csv')
        with open(path, 'wt') as fid:
            fid.write('Number of enrollments: ' + str(totalEnrollments) + '\n')
            fid.write('Number of users: ' + str(totalUsers) + '\n')
            fid.write('Number of survey respondents: ' +
                      str(totalSurveyRespondents) + '\n')
            fid.write('Fraction of enrolled users who posted: ' +
                      str(fracPosters) + '\n')
            fid.write('Fraction of survey respondents who posted: ' +
                      str(fracPostersSurveyed) + '\n')
示例#6
0
def main():
    dbSetup = DBSetup()
    customerDatabaseMapping = CustomerDatabaseMapping(dbSetup)
    dbExecuteSQL = DBExecuteSQL()
    customerDatabaseMapping.customerDataBaseSetup()
    print(customerDatabaseMapping.getCustomerData())
    allCustomers = customerDatabaseMapping.createAllCustomers()
    print(len(allCustomers))
    print(customerDatabaseMapping.CustomerFromStub())
示例#7
0
    def run(self, projectName):
        self.setup(projectName)
        self.users = {}
        numSurveyRespondents = []
        numPosters = []
        numUsers = []
        numIntersection = []
        allUsers = []
        for courseId in range(len(self.courseDatasets)):
            courseName = self.courseDatasets[courseId].name
            if courseName not in self.courseNames:
                continue
            print('Loading ' + courseName)
            DBSetup.switch(self.courseDatasets[courseId])
            try:
                self.users[courseName]= list(set(self.loadUsers()))
                self.users[courseName][100]
            except KeyError:
                continue
            allUsers += self.users[courseName]
            self.userMap = UserMap(True)
            surveyRespondents = set(self.loadSurveyRespondents())
            posters = set(self.loadPosters())

            numUsers.append(len(self.users[courseName]))
            numSurveyRespondents.append(len(surveyRespondents))
            numPosters.append(len(posters))
            numIntersection.append(len(posters.intersection(surveyRespondents)))

     
        totalEnrollments = sum(numUsers)
        totalUsers = len(set(allUsers))
        totalSurveyRespondents = sum(numSurveyRespondents)
        fracPosters = sum(numPosters)/float(totalEnrollments)
        fracPostersSurveyed = sum(numIntersection) / float(totalSurveyRespondents)
        path = os.path.join(self.resultsDir, 'basicStats.csv')
        with open(path,'wt') as fid:
            fid.write('Number of enrollments: ' + str(totalEnrollments) + '\n')
            fid.write('Number of users: ' + str(totalUsers) + '\n')
            fid.write('Number of survey respondents: ' + str(totalSurveyRespondents) + '\n')
            fid.write('Fraction of enrolled users who posted: ' + str(fracPosters) + '\n')
            fid.write('Fraction of survey respondents who posted: ' + str(fracPostersSurveyed) + '\n')
示例#8
0
    def run(self, projectName):
        self.setup(projectName)
        fTypes = {}
        for course in self.courseDatasets:
            DBSetup.switch(course)

            try:
                forums = list(ForumForums.objects.filter(deleted = 0, parent_id = 0))
            except:
                continue

            for forum in forums:
                try:
                    fTypes[forum.name].append((course.name, forum.description))
                except KeyError:
                    fTypes[forum.name] = [(course.name, forum.description)]


        path = os.path.join(self.resultsDir,'summary.txt')
        self.writeSummary(path, fTypes)
示例#9
0
    def run(self, projectName):
        self.setup(projectName)
        fTypes = {}
        for course in self.courseDatasets:
            DBSetup.switch(course)

            try:
                forums = list(
                    ForumForums.objects.filter(deleted=0, parent_id=0))
            except:
                continue

            for forum in forums:
                try:
                    fTypes[forum.name].append((course.name, forum.description))
                except KeyError:
                    fTypes[forum.name] = [(course.name, forum.description)]

        path = os.path.join(self.resultsDir, 'summary.txt')
        self.writeSummary(path, fTypes)
示例#10
0
    def computeFeatures(self):
        logging.info('computeFeatures(' + self.currDB + ')')
        DBSetup.switch(self.currDB)
        
        threads = ForumThreads.objects.all()
        posts = ForumPosts.objects.all()
        comments = ForumComments.objects.all()
        postMap = self.getPostThreadMap(posts)

        features = {}
        features['numPosts'] = self.getNumPosts(threads)
        features['avgPostLen'] = self.getAvgPostLen(threads, posts)
        features['postTime'] = self.postTime(threads)
        features['postTimeRank'] = self.postTimeRank(threads)
        features['timeToFirstResponse'] = self.timeToFirstResponse(threads,posts)
        features['numContributors'] = self.numContributors(threads,posts,comments,postMap)
        features['timeOfHighestVotedPost'] = self.timeOfHighestVotedPost(threads,posts)
        features['originalPostVotes'] = self.originalPostVotes(threads,posts)
        features['originalPostNumTokens'] = self.originalPostNumTokens(threads,posts)
        return features    
示例#11
0
 def handler(self):
     if options.runLocally == True:
         try:
             self.courseId = int(args[0])
             runAll = False
         except (ValueError, IndexError):
             runAll = True
         if runAll == False:
             self.currCourse = self.getCourse()
             #self._setupCourseDirs()
             DBSetup.switch(self.currCourse)
             self.runner()
         else:
             for courseId in range(len(self.courseDatasets)):
                 self.courseId = courseId
                 self.currCourse = self.getCourse()
                 #self._setupCourseDirs()
                 DBSetup.switch(self.currCourse)
                 self.runner()
         sys.exit(0)
     try:
         int(args[0])
         argExists = True
     except (ValueError,IndexError):
         argExists = False
     if argExists:
         self.courseId = int(args[0])
         self.currCourse = self.getCourse()
         #self._setupCourseDirs()
         DBSetup.switch(self.currCourse)
         self.runner()
         sys.exit(0)
     else:
         for courseId in range(len(self.courseDatasets)):
             self._submitJob(courseId)
示例#12
0
 def handler(self):
     if options.runLocally == True:
         try:
             self.courseId = int(args[0])
             runAll = False
         except (ValueError, IndexError):
             runAll = True
         if runAll == False:
             self.currCourse = self.getCourse()
             # self._setupCourseDirs()
             DBSetup.switch(self.currCourse)
             self.runner()
         else:
             for courseId in range(len(self.courseDatasets)):
                 self.courseId = courseId
                 self.currCourse = self.getCourse()
                 # self._setupCourseDirs()
                 DBSetup.switch(self.currCourse)
                 self.runner()
         sys.exit(0)
     try:
         int(args[0])
         argExists = True
     except (ValueError, IndexError):
         argExists = False
     if argExists:
         self.courseId = int(args[0])
         self.currCourse = self.getCourse()
         # self._setupCourseDirs()
         DBSetup.switch(self.currCourse)
         self.runner()
         sys.exit(0)
     else:
         for courseId in range(len(self.courseDatasets)):
             self._submitJob(courseId)
示例#13
0
class Testing(unittest.TestCase):

    dbSetup = DBSetup()
    customerMapping = CustomerDatabaseMapping(dbSetup)

    def testCustomerFromStub(self):
        customer = self.customerMapping.CustomerFromStub()
        data = [['*****@*****.**', 'Samuel', 'Lip', '1234']]

        self.assertEqual(customer, data)

    def testCustomerFromMock(self):
        customer = self.customerMapping.CustomerFromMock()
        data = [['*****@*****.**', 'Samuel', 'Lip', '1234']]

        self.assertEqual(customer, data)

    def testCustomerFromFake(self):
        customer = self.customerMapping.CustomerFromFake()
        test = self.customerMapping.testData()

        self.assertEquals(customer, test)
示例#14
0
sys.path.append('../util')
from FileSystem import FileSystem

sys.path.append(FileSystem.getRootDir())
sys.path.append(FileSystem.getSiteDir())
from DBSetup import DBSetup
from lyticssite.forumModels.models import *
from lyticssite.generalModels.models import *
from lyticssite.eventModels.models import *

courseDatasetInfo = FileSystem.loadCourseDatasetInfo()

for course in courseDatasetInfo:
    print(course.name)

    DBSetup.switch(course)

    #print('\t-----------------')
    #forums = ForumForums.objects.all()
    try:
        userCount = Users.objects.count()
        print('\tUser Count: ' + str(userCount))
    except:
        pass
    try:
        surveyCount = Demographic.objects.count()
        print('\tSurvey response count: ' + str(surveyCount))
    except:
        pass
示例#15
0
 def __init__(self):
     self.dbSetup = DBSetup()
     self.customerDatabaseMapping = CustomerDatabaseMapping(self.dbSetup)
示例#16
0
文件: testDB.py 项目: jinpa/MOOC-data
import os.path
sys.path.append('../util')
from FileSystem import FileSystem
sys.path.append(FileSystem.getRootDir())
sys.path.append(FileSystem.getSiteDir())
from  DBSetup import DBSetup
from lyticssite.forumModels.models import *
from lyticssite.generalModels.models import *
from lyticssite.eventModels.models import *

courseDatasetInfo = FileSystem.loadCourseDatasetInfo()

for course in courseDatasetInfo:
    print(course.name)

    DBSetup.switch(course)
 
    #print('\t-----------------')
    #forums = ForumForums.objects.all()
    try:
        userCount = Users.objects.count()
        print('\tUser Count: ' + str(userCount))
    except:
        pass
    try:
        surveyCount = Demographic.objects.count()
        print('\tSurvey response count: ' + str(surveyCount))
    except:
        pass