def plotTimeSeries(hashtagObject): def getDataToPlot(occ): occurranceDistributionInEpochs = getOccurranceDistributionInEpochs(occ) startEpoch, endEpoch = min(occurranceDistributionInEpochs, key=itemgetter(0))[0], max(occurranceDistributionInEpochs, key=itemgetter(0))[0] dataX = range(startEpoch, endEpoch, TIME_UNIT_IN_SECONDS) occurranceDistributionInEpochs = dict(occurranceDistributionInEpochs) for x in dataX: if x not in occurranceDistributionInEpochs: occurranceDistributionInEpochs[x]=0 return zip(*sorted(occurranceDistributionInEpochs.iteritems(), key=itemgetter(0))) outputFile = hashtagsImagesFirstActiveTimeSeriesAnalysisFolder%outputFolder+'%s.png'%(hashtagObject['h']); FileIO.createDirectoryForFile(outputFile) print unicode(outputFile).encode('utf-8') timeUnits, timeSeries = getDataToPlot(hashtagObject['oc']) occurencesInActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True) timeUnitsForActiveRegion, timeSeriesForActiveRegion = getDataToPlot(occurencesInActiveRegion) lid, count = getSourceLattice(hashtagObject['oc']) if isFirstActiveRegion and count>=MIN_OCCURRENCES_TO_DETERMINE_SOURCE_LATTICE: ax=plt.subplot(211) plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnits), timeSeries, '-') if not isFirstActiveRegion: plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, 'o', c='r') else: plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, 'o', c='k') plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10) plt.title(hashtagObject['h'] + '(%s)'%count) ax=plt.subplot(212) plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, '-') plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10) # if isFirstActiveRegion: # lid, count = getSourceLattice(hashtagObject['oc']) # if count>=MIN_OCCURRENCES_TO_DETERMINE_SOURCE_LATTICE: # print lid, count # plt.show() plt.savefig(outputFile); plt.clf()
def tempAnalysis(timeRange, outputFolder): for i, hashtagObject in enumerate(FileIO.iterateJsonFromFile(hashtagsFile%(outputFolder,'%s_%s'%timeRange))): occurances = getOccuranesInHighestActiveRegion(hashtagObject) timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) ax = plt.subplot(211) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o') plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) plt.title(hashtagObject['h']+ ' (%s)'%len(timeUnits)) ax = plt.subplot(212) timeUnits1, timeSeries1 = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits1], timeSeries1, '-o') plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c='m') plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) # plt.show() plt.savefig('/Users/kykamath/Desktop/hashtags/%s.png'%(hashtagObject['h'])); plt.clf() print i
def plotHastagClasses(timeRange, folderType): def getFileName(): for i in combinations('abcedfghijklmnopqrstuvwxyz',2): yield ''.join(i)+'.png' count=1 # for hashtagObject in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(folderType,'%s_%s'%timeRange)): for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('testing_world','%s_%s'%(2,11))): # HashtagsClassifier.classify(hashtagObject) print count; count+=1 # if hashtagObject['h']=='ripamy': classId = HashtagsClassifier.classify(hashtagObject) if classId!=None: classId = 1 outputFile = hashtagsImagesHashtagsClassFolder%folderType+'%s/%s.png'%(classId, hashtagObject['h']); FileIO.createDirectoryForFile(outputFile) fileNameIterator = getFileName() timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) occurancesInActivityRegions = [[getOccuranesInHighestActiveRegion(hashtagObject), 'm']] # for hashtagPropagatingRegion in HashtagsClassifier._getActivityRegionsWithActivityAboveThreshold(hashtagObject): # validTimeUnits = [timeUnits[i] for i in range(hashtagPropagatingRegion[0], hashtagPropagatingRegion[1]+1)] # occurancesInActiveRegion = [(p,t) for p,t in hashtagObject['oc'] if GeneralMethods.approximateEpoch(t, TIME_UNIT_IN_SECONDS) in validTimeUnits] # occurancesInActivityRegions.append([occurancesInActiveRegion, GeneralMethods.getRandomColor()]) currentMainRangeId = 0 for occurances1, color1 in occurancesInActivityRegions: # outputFile=outputFolder+fileNameIterator.next();FileIO.createDirectoryForFile(outputFile) print outputFile ax = plt.subplot(312) subRangeId = 0 for occurances, color in occurancesInActivityRegions: if subRangeId==currentMainRangeId: color='m' timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) # if len(timeUnits)<24: # difference = 24-len(timeUnits) # timeUnits=list(timeUnits)+[timeUnits[-1]+(i+1)*HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS for i in range(difference)] # timeSeries=list(timeSeries)+[0 for i in range(difference)] # print len(timeUnits[:24]), len(timeSeries[:24]) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color) subRangeId+=1 # plt.ylim(ymax=1) plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) ax=plt.subplot(313) subRangeId = 0 timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-') for occurances, color in occurancesInActivityRegions: if subRangeId==currentMainRangeId: color='m' timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color) subRangeId+=1 plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) plt.subplot(311) occurancesGroupedByLattice = sorted( [(getLocationFromLid(lid.replace('_', ' ')), len(list(occs))) for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occurances1], key=itemgetter(0)), key=itemgetter(0))], key=itemgetter(1) ) points, colors = zip(*occurancesGroupedByLattice) cm = matplotlib.cm.get_cmap('cool') if len(points)>1: sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, alpha=1.0) plt.colorbar(sc) else: sc = plotPointsOnWorldMap(points, c='m', lw=0) plt.title(hashtagObject['h']+ '(%d)'%len(occurancesGroupedByLattice)) # plt.show() try: plt.savefig(outputFile); plt.clf() except: pass currentMainRangeId+=1