Пример #1
0
def generateDiffConfigurableWeeks (cname, prefix, diff):
  p = PostGetter ()
  minWeek = p.getMinWeekForCourse (cname)
  maxWeek = p.getMaxWeekForCourse (cname)
  for week in range(minWeek, maxWeek):
    curr = week
    next = week+diff
    if next > maxWeek:
      break
    currNumPosts = p.getNumPosts(cname, curr)
    nextNumPosts = p.getNumPosts(cname, next)
    if (currNumPosts < 20 or nextNumPosts < 20):
      continue

    nextText = p.getWeeklyDataForCourseAsString (cname, next)
    smartStopWords = True
    (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords)

    for i in range (curr, next):
      currText = p.getWeeklyDataForCourseAsString(cname, i)
      (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,i),'%s_week%s'%(prefix, i), smartStopWords)
      diffDict = getDiff (currWordCountDict, nextWordCountDict)
      nextWordCountDict = diffDict

    (w_,c_)=dictToArray (diffDict)
    generateWC(w_,c_,'%s_week%s_stop_DIFFCONF%s'%(prefix,week+1,diff))
Пример #2
0
def generateWeeklyWC (cname, prefix, smartStopWords):
  p = PostGetter()
  minWeek = p.getMinWeekForCourse(cname)
  maxWeek = p.getMaxWeekForCourse(cname)
  for week in range(minWeek, maxWeek+1):
    numPosts = p.getNumPosts(cname, week)
    if (numPosts < 20):
      continue
    text=p.getWeeklyDataForCourseAsString(cname, week)
    num = (week-minWeek)
    (wordcountDict, w_, c_)=getWordCounts(text,'%s_week%s_stop'%(prefix,week),'%s_week%s'%(prefix, week), smartStopWords)
    generateWC(w_,c_,'%s_week%s_stop'%(prefix,week))
Пример #3
0
def generateDiff (cname, prefix, smartStopWords):    
  p = PostGetter()
  minWeek = p.getMinWeekForCourse(cname)
  maxWeek = p.getMaxWeekForCourse(cname)
  for week in range(minWeek, maxWeek):
    curr = week
    next = week+1
    currNumPosts = p.getNumPosts(cname, curr)
    nextNumPosts = p.getNumPosts(cname, next)
    if (currNumPosts < 20 or nextNumPosts < 20):
      continue
    currText = p.getWeeklyDataForCourseAsString(cname, curr)
    nextText = p.getWeeklyDataForCourseAsString(cname, next)

    (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,curr),'%s_week%s'%(prefix, curr), smartStopWords)
    (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords)
    diffDict = getDiff (currWordCountDict, nextWordCountDict)
    (w_,c_)=dictToArray (diffDict)
    generateWC(w_,c_,'%s_week%s_stop_DIFF'%(prefix,week+1))
Пример #4
0
def generatePenalizeWeeks (cname, prefix, diff):
  p = PostGetter ()
  minWeek = p.getMinWeekForCourse (cname)
  maxWeek = p.getMaxWeekForCourse (cname)
  for week in range(minWeek, maxWeek):
    curr = week
    next = week+1
    currNumPosts = p.getNumPosts(cname, curr)
    nextNumPosts = p.getNumPosts(cname, next)
    if (currNumPosts < 20 or nextNumPosts < 20):
      continue
    currText = p.getWeeklyDataForCourseAsString(cname, curr)
    nextText = p.getWeeklyDataForCourseAsString(cname, next)
    smartStopWords = True

    (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,curr),'%s_week%s'%(prefix, curr), smartStopWords)
    (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords)
    penalty = 0.5
    diffDict = weightWords (currWordCountDict, nextWordCountDict , penalty)
    (w_,c_)=dictToArray (diffDict)
    generateWC(w_,c_,'%s_week%s_stop_DIFFPENALTY%s'%(prefix,week+1,diff))