Exemplo n.º 1
0
    def weightplot(self,clip=1):       
        dtb = db.WikiDatabase()
        pages = dtb.getallfetched()
        print "got", len(pages)

        import itertools
        possw = list(itertools.product([0,1], repeat=6))

        shuffle(pages)
        for r in pages[:clip]:
            article = r[0].decode('utf-8')
            pageid = r[1]
            domain = r[2]
            xaxisname = "Username"
            yaxisname = "Edit share",
            title = "Editors of " + safefilename(article) 
            title += ", " + domain + str(pageid) + ", by edit count"
            filename = domain + str(pageid) + "editcW"

            dbdata = dtb.getuserchange2(pageid,domain)
            dat = WikiDataHandle()

            xlz = []
            ylz = [[],[]]
            for w in possw:
                for d in (0,1):
                    ww = list(w + (d,))
                    f = filename + str(ww)
                    xlabels, ypoints = dat.editsharedata(pageid,domain,weights=ww,
                                                         namesort=True,dbdata=dbdata)
                    xlz = xlabels
                    ylz[d].append(ypoints)

            realy = [[],[]]
            for i,y in enumerate(ylz):
                yy = zip(*y)
                yyy = [sum(e) for e in yy]
                assert len(yyy) == len(xlz)
                realy[i] = yyy
            
            xf = []
            yf = [[],[]]
            for i,y in enumerate(realy):
                f = filename + "average" + "G" + str(i)
                comb = zip(xlz,y)
                comb = sorted(comb, key = lambda x: x[1])
                xf, yf[i] = zip(*comb) 
                xpoints = list(range(len(xf)))
            print self.specialtrajectory(xpoints, yf[0], xpoints,
                                         yf[1], None, None, None,
                                         xaxisname, yaxisname, None,
                                         title, filename, width=20, height=12)
Exemplo n.º 2
0
    def talkplots(self):
        dat = WikiDataHandle()

        titles, pids, tids, domains = dat.talkpages()
        

        for i in range(len(titles)):
            domain = domains[i]
            pageid = pids[i]
            talkid = tids[i]
            title = titles[i]
        
            print title, pageid

            try:
                x1, t1, _ = dat.trajectorydata(pageid, domain, normalise=False)
                x2, t2, growth = dat.trajectorydata(talkid, domain, normalise=False)
            except:
                continue

            t1sum = max(t1) 
            if t1sum:
                for i in xrange(len(t1)):
                    t1[i] /= t1sum

            t2sum = max(t2)
            if t2sum:
                for i in xrange(len(t2)):
                    t2[i] /= t2sum

            gsum = max(growth)
            if gsum:
                for i in xrange(len(growth)):
                    growth[i] /= gsum

            creation = min(x1[0],x2[0])
            for x in (x1, x2):
                for i in xrange(len(x)):
                    x[i] = (x[i]-creation).total_seconds()/3600

            title = title.decode('utf-8').replace('/','') + " article vs talk page trajectories"
            xaxisname = "Hours since creation"
            taxisname = "Change"
            gaxisname = "Article page size"
            filename = domain + str(pageid) + " Special Combo"

            print self.talktrajectory(x1, t1, x2, t2, growth,
                                         xaxisname, taxisname, gaxisname,
                                         title, filename, width=20, height=12)
Exemplo n.º 3
0
    def plot(self, title, pageid, domain, trajectory=True,
             editcount=True, editshare=True, weights=None):   
        filenames = []
        dat = WikiDataHandle()

        try:
            title = title.decode('utf-8')
        except:
            title = "id" + str(pageid)
        domain = domain.decode('utf-8')
        
        if trajectory:
            xpoints, tpoints, gpoints = dat.trajectorydata(pageid, domain)
            filenames.append(self.trajectory(xpoints, tpoints,
                                             gpoints,
                                             "Hours since creation",
                                             "Edit distance from final",
                                             "Article size",
                                             "Trajectory of " + safefilename(title) + ", " +\
                                                 domain + str(pageid),
                                             domain + str(pageid) + "traj",
                                             width=13, height=8))
            print "plotted", filenames[-1]
            logDebug("plotted "+ filenames[-1])

        if editcount:
            xlabels, ypoints = dat.editcountdata(pageid, domain)
            filenames.append(self.barchart(xlabels, ypoints,
                                           "Username", "Edit count",
                                           "Editors of " + safefilename(title) + ", " +
                                           domain + str(pageid) + ", by edit count", 
                                           domain + str(pageid) + "editc",
                                           width = 13, height = 8))
            print "plotted", filenames[-1]
            logDebug("plotted "+ filenames[-1])

        if editshare:
            xlabels, ypoints = dat.editsharedata(pageid, domain, self.weights)
            filenames.append(self.barchart(xlabels, ypoints,
                                           "Username", "Edit share", 
                                           "Editors of " + safefilename(title) + ", " + domain\
                                               + str(pageid) + ", by share", 
                                           domain + str(pageid) + "share",
                                           width = 13, height = 8))
            print "plotted", filenames[-1]
            logDebug("plotted "+ filenames[-1])

        return filenames