def graph_loader(file_name):
    """
    Loads a sonet.graph object from a pickle/graphml/... file
    """
    try:
        with Timr("GRAPH LOADING"):
            return sg.load(file_name)
    except IOError:
        logging.exception("unable to load a graph from passed file: %s" % (file_name,))
        sys.exit()
Пример #2
0
def graph_loader(file_name):
    """
    Loads a sonet.graph object from a pickle/graphml/... file
    """
    try:
        with Timr("GRAPH LOADING"):
            return sg.load(file_name)
    except IOError:
        logging.exception("unable to load a graph from passed file: %s",
                          file_name)
        sys.exit()
Пример #3
0
def main():
    op = create_option_parser()

    (options, args) = op.parse_args()

    if len(args) != 1:
        print "Insert one (and only one) file to process\n"
        op.print_help()
        sys.exit(2)

    fn = args[0]
    lang, date, type_ = mwlib.explode_dump_filename(fn)

    g = sg.load(fn)
    g.time_slice_subgraph(start=options.start, end=options.end)
    g.invert_edge_attr('weight', 'length')

    vn = len(g.g.vs)  # number of vertexes
    en = len(g.g.es)  # number of edges

    timr = Timr()

    if options.as_table:
        tablr = Tablr()
        tablr.start(1024 * 32, lang)

    if options.group or options.users_role or options.histogram:
        for group_name, group_attr in groups.iteritems():
            g.defineClass(group_name, group_attr)
            print ' * %s : nodes number : %d' % (group_name,
                                                 len(g.classes[group_name]))
    else:
        g.defineClass('all', {})

    print " * filename: %s" % (fn, )
    print " * lang: %s" % (lang, )
    print " * date: %s" % (date, )

    if options.details:
        with Timr("details"):
            print " * nodes number: %d" % (vn, )
            print " * edges number: %d" % (en, )

            nodes_with_outdegree = len(g.g.vs.select(_outdegree_ge=1))
            nodes_with_indegree = len(g.g.vs.select(_indegree_ge=1))
            self_loop_edges = len([edge for edge in g.g.es \
                                   if edge.target == edge.source])

            print " * nodes with out edges number: %d (%6f%%)" % (
                nodes_with_outdegree, 100. * nodes_with_outdegree / vn)
            print " * nodes with in edges number: %d (%6f%%)" % (
                nodes_with_indegree, 100. * nodes_with_indegree / vn)
            print " * max weights on edges : %s" % top(g.g.es['weight'])

            print " * self-loop edges: %d" % self_loop_edges
            #print " * diameter : %6f" % g.g.diameter(weights='length')
            #print " * average weight : %6f" % numpy.average(g.g.es['weight'])

    if options.density or options.reciprocity:
        with Timr('density&reciprocity'):
            for cls, vs in g.classes.iteritems():
                if not len(vs) > 1:
                    continue

                subgraph = vs.subgraph()

                print " * %s : density : %.10f" % (cls, subgraph.density())
                print " * %s : reciprocity : %.10f" % (cls,
                                                       subgraph.reciprocity())

    if options.degree:
        with Timr('degree'):
            g.g.vs['indegree'] = g.g.degree(type=ig.IN)
            g.g.vs['outdegree'] = g.g.degree(type=ig.OUT)

            for cls, vs in g.classes.iteritems():
                if not vs:
                    continue

                ind = numpy.array(vs['indegree'])
                outd = numpy.array(vs['outdegree'])

                print " * %s : mean IN degree (no weights): %f" % (
                    cls, numpy.average(ind))
                print " * %s : mean OUT degree (no weights): %f" % (
                    cls, numpy.average(outd))
                print " * %s : max IN degrees (no weights): %s" % (cls,
                                                                   top(ind))
                print " * %s : max OUT degrees (no weights): %s" % (cls,
                                                                    top(outd))

                print " * %s : stddev IN degree (no weights): %f" % (
                    cls, numpy.sqrt(numpy.var(ind)))
                print " * %s : stddev OUT degree (no weights): %f" % (
                    cls, numpy.sqrt(numpy.var(outd)))

    if options.transitivity:
        ##print " * transitivity: %f" % (nx.transitivity(g), )
        pass

    if options.summary:
        # don't use with --as-table
        print " * summary: %s" % (g.g.summary(), )

    if options.distance:
        with Timr('split clusters'):
            vc = g.g.clusters()
            size_clusters = vc.sizes()
            giant = vc.giant()

            print " * length of 5 max clusters: %s" % top(size_clusters)
            #print " * #node in 5 max clusters/#all nodes: %s" % top(
            #    [1.*cluster_len/vn for cluster_len in size_clusters])

    if options.distance:
        with Timr('distance'):
            gg = sg.Graph(giant)
            print " * average distance in the giant component: %f" % \
                  gg.averageDistance(weight='length')
            print " * average hops in the giant component: %f" % \
                  gg.averageDistance()

            #print "Average distance 2: %f" % giant.average_path_length(True,
            #                                                           False)

    if options.efficiency:
        with Timr('efficiency'):
            print " * efficiency: %f" % g.efficiency(weight='length')

    ##TODO: compute for centrality only if "all" or "degree"
    if (options.plot or options.histogram or options.power_law
            or options.centrality):
        with Timr('set weighted indegree'):
            g.set_weighted_degree()

    if options.centrality:
        timr.start('centrality')
        centralities = options.centrality.split(',')
        if 'all' in centralities:
            centralities = 'betweenness,pagerank,degree'.split(',')

        if set(centralities).difference(
                'betweenness,pagerank,degree'.split(',')):
            logging.error('Unknown centrality')
            sys.exit(0)

        if "betweenness" in centralities:
            print >> sys.stderr, "betweenness"
            g.g.vs['bw'] = g.g.betweenness(weights='length', directed=True)

        #g.g.vs['ev'] = g.g.evcent(weights='weight') # eigenvector centrality

        if 'pagerank' in centralities:
            print >> sys.stderr, "pagerank"
            g.g.vs['pr'] = g.g.pagerank(weights='weight')  # pagerank

        if 'degree' in centralities:
            print >> sys.stderr, "outdegree"
            g.set_weighted_degree(type=ig.OUT)
        #total_weights = sum(g.g.es['weight'])
        max_edges = vn * (vn - 1)

        for cls, vs in g.classes.iteritems():
            if not vs:
                continue

            if "betweenness" in centralities:
                norm_betweenness = numpy.array(g.classes[cls]['bw']) \
                                   / max_edges
                print " * %s : average betweenness : %.10f" % (
                    cls, numpy.average(norm_betweenness))
                print " * %s : stddev betweenness : %.10f" % (
                    cls, numpy.sqrt(numpy.var(norm_betweenness)))
                print " * %s : max betweenness: %s" % (
                    cls, top(numpy.array(g.classes[cls]['bw']) / max_edges))

            #print " * Average eigenvector centrality : %6f" % numpy.average(
            #    g.vs['ev'])
            if 'pagerank' in centralities:
                print " * %s : average pagerank : %.10f" % (
                    cls, numpy.average(g.classes[cls]['pr']))
                print " * %s : stddev pagerank : %.10f" % (
                    cls, numpy.sqrt(numpy.var(g.classes[cls]['pr'])))
                print " * %s : max pagerank: %s" % (cls,
                                                    top(g.classes[cls]['pr']))

            if 'degree' in centralities:
                wi = g.classes[cls]['weighted_indegree']
                print " * %s : average IN degree centrality (weighted): %.10f" % (
                    cls, numpy.average(wi))
                print " * %s : stddev IN degree centrality (weighted): %.10f" % (
                    cls, numpy.sqrt(numpy.var(wi)))
                print " * %s : max IN degrees centrality (weighted): %s" % (
                    cls, top(wi))
                del wi

                wo = g.classes[cls]['weighted_outdegree']
                print " * %s : average OUT degree centrality (weighted) : %.10f" %\
                      (cls, numpy.average(wo))
                print " * %s : stddev OUT degree centrality (weighted) : %.10f" % \
                      (cls, numpy.sqrt(numpy.var(wo)))
                print " * %s : max OUT degrees centrality (weighted): %s" % (
                    cls, top(wo))
                del wo

        timr.stop('centrality')

    if options.power_law:
        with Timr('power law'):
            for cls, vs in g.classes.iteritems():
                if not vs:
                    continue

                indegrees = vs['weighted_indegree']

                try:
                    alpha_exp = ig.statistics.power_law_fit(indegrees, xmin=6)
                    print " * %s : alpha exp IN degree distribution : %10f " %\
                          (cls, alpha_exp)
                except ValueError:
                    print >> sys.stderr,\
                          " * %s : alpha exp IN degree distribution : ERROR" %\
                          (cls,)

    if options.histogram:
        list_with_index = lambda degrees, idx: [(degree, idx)
                                                for degree in degrees
                                                if degree]
        all_list = []

        nogrp_indegrees = g.g.vs.select(sysop_ne=True,
                                        bureaucrat_ne=True,
                                        steward_ne=True,
                                        founder_ne=True,
                                        bot_ne=True)['weighted_indegree']
        all_list += list_with_index(nogrp_indegrees, 1)

        sysops_indegrees = g.classes['sysop']['weighted_indegree']
        all_list += list_with_index(sysops_indegrees, 2)

        burs_indegrees = g.classes['bureaucrat']['weighted_indegree']
        all_list += list_with_index(burs_indegrees, 3)

        stewards_indegrees = g.classes['steward']['weighted_indegree']
        all_list += list_with_index(stewards_indegrees, 4)

        founders_indegrees = g.classes['founder']['weighted_indegree']
        all_list += list_with_index(founders_indegrees, 5)

        bots_indegrees = g.classes['bot']['weighted_indegree']
        all_list += list_with_index(bots_indegrees, 6)

        if options.gnuplot:
            f = open('hist.dat', 'w')
        else:
            f = open('%swiki-%s-hist.dat' % (lang, date), 'w')

        all_list.sort(reverse=True)

        for indegree, grp in all_list:
            for _ in range(grp - 1):
                print >> f, 0,
            print >> f, indegree,
            for _ in range(grp, 6):
                print >> f, 0,
            print >> f, ""
        f.close()

    if options.gnuplot:
        from popen2 import Popen3

        process = Popen3('gnuplot hist.gnuplot')
        process.wait()

        os.rename('hist.png', '%swiki-%s-hist.png' % (lang, date))
        os.rename('hist.dat', '%swiki-%s-hist.dat' % (lang, date))

    if options.plot:
        ## TODO: evaluate if this can be done with
        ## http://bazaar.launchpad.net/~igraph/igraph/0.6-main/revision/2018
        with Timr('plot'):
            import math

            ## filter:
            #print len(g.g.vs), len(g.g.es)
            #g.set_weighted_degree(type=ig.OUT)
            #g.g = g.g.subgraph(g.g.vs.select(weighted_indegree_ge=10,
            #                           weighted_outdegree_ge=1))
            #g.g.write_graphml('itwiki-20100729-stub-meta-history_in10_out1.graphml')
            #print len(g.g.vs), len(g.g.es)

            bots = g.g.vs.select(bot=True)
            bots['color'] = ('purple', ) * len(bots)
            logging.debug('bots: ok')

            anonyms = g.g.vs.select(anonymous=True)
            anonyms['color'] = ('blue', ) * len(anonyms)

            sysops = g.g.vs.select(sysop=True)
            sysops['color'] = ('yellow', ) * len(sysops)

            bur_sysops = g.g.vs.select(bureaucrat=True, sysop=True)
            bur_sysops['color'] = ('orange', ) * len(bur_sysops)

            g.g.vs['size'] = [
                math.sqrt(v['weighted_indegree'] + 1) * 10 for v in g.g.vs
            ]

            logging.debug('plot: begin')
            ig.plot(g.g,
                    target=lang + "_general.png",
                    bbox=(0, 0, 8000, 8000),
                    edge_color='grey',
                    layout='drl')
            logging.debug('plot: end')
            weights = g.g.es['weight']
            max_weight = max(weights)

            g.g.es['color'] = [(255. * e['weight'] / max_weight, 0., 0.)
                               for e in g.g.es]
            g.g.es['width'] = weights

            ig.plot(g.g,
                    target=lang + "_weighted_edges.png",
                    bbox=(0, 0, 4000, 2400),
                    layout='fr',
                    vertex_label=' ')

    if options.as_table:
        tablr.stop()

        #tablr.printHeader()
        #tablr.printData()
        tablr.saveInDjangoModel()

    if options.adjacency:
        giant = g.g.clusters().giant()
        #destAdj = "%s/%swiki-%s-adj.csv" % (os.path.split(fn)[0], lang, date)
        destAdj = "%swiki-%s-adj.csv" % (lang, date)
        #destRec = "%s/%swiki-%s-rec.csv" % (os.path.split(fn)[0], lang, date)
        destRec = "%swiki-%s-rec.csv" % (lang, date)
        sg.Graph(giant).writeAdjacencyMatrix(destAdj, 'username')
        sg.Graph(giant).writeReciprocityMatrix('username', destRec)

    if options.users_role:
        l = g.get_user_class('username',
                             ('anonymous', 'bot', 'bureaucrat', 'sysop'))

        #destUR = "%s/%swiki-%s-ur.csv" % (os.path.split(fn)[0], lang, date)
        destUR = "%swiki-%s-ur.csv" % (lang, date)
        with open(destUR, 'w') as f:
            for username, role in sorted(l):
                print >> f, "%s,%s" % (username, role)

        from random import shuffle
        #destCls = "%s/%swiki-%s-%%s.csv" % (os.path.split(fn)[0], lang, date)
        destCls = "%swiki-%s-%%s.csv" % (lang, date)
        for cls in ('anonymous', 'bot', 'bureaucrat', 'sysop', 'normal_user'):
            users = g.classes[cls]['username']
            shuffle(users)
            with open(destCls % cls, 'w') as f:
                for username in users:
                    print >> f, \
                          ("%s,http://vec.wikipedia.org/w/index.php?title=" + \
                          "Discussion_utente:%s&action=history&offset=" + \
                          "20100000000001") % (username, username)
Пример #4
0
def main():
    op = create_option_parser()

    (options, args) = op.parse_args()

    if len(args) != 1:
        print "Insert one (and only one) file to process\n"
        op.print_help()
        sys.exit(2)

    fn = args[0]
    lang, date, type_ = mwlib.explode_dump_filename(fn)

    g = sg.load(fn)
    g.time_slice_subgraph(start=options.start, end=options.end)
    g.invert_edge_attr('weight', 'length')

    vn = len(g.g.vs) # number of vertexes
    en = len(g.g.es) # number of edges

    timr = Timr()

    if options.as_table:
        tablr = Tablr()
        tablr.start(1024*32, lang)

    if options.group or options.users_role or options.histogram:
        for group_name, group_attr in groups.iteritems():
            g.defineClass(group_name, group_attr)
            print ' * %s : nodes number : %d' % (group_name,
                                                 len(g.classes[group_name]))
    else:
        g.defineClass('all', {})

    print " * filename: %s" % (fn,)
    print " * lang: %s" % (lang,)
    print " * date: %s" % (date,)

    if options.details:
        with Timr("details"):
            print " * nodes number: %d" % (vn,)
            print " * edges number: %d" % (en,)

            nodes_with_outdegree = len(g.g.vs.select(_outdegree_ge=1))
            nodes_with_indegree = len(g.g.vs.select(_indegree_ge=1))
            self_loop_edges = len([edge for edge in g.g.es \
                                   if edge.target == edge.source])

            print " * nodes with out edges number: %d (%6f%%)" % (
                nodes_with_outdegree, 100.*nodes_with_outdegree/vn)
            print " * nodes with in edges number: %d (%6f%%)" % (
                nodes_with_indegree, 100.*nodes_with_indegree/vn)
            print " * max weights on edges : %s" % top(g.g.es['weight'])

            print " * self-loop edges: %d" % self_loop_edges
            #print " * diameter : %6f" % g.g.diameter(weights='length')
            #print " * average weight : %6f" % numpy.average(g.g.es['weight'])


    if options.density or options.reciprocity:
        with Timr('density&reciprocity'):
            for cls, vs in g.classes.iteritems():
                if not len(vs) > 1:
                    continue

                subgraph = vs.subgraph()

                print " * %s : density : %.10f" % (cls, subgraph.density())
                print " * %s : reciprocity : %.10f" % (cls,
                                                       subgraph.reciprocity())


    if options.degree:
        with Timr('degree'):
            g.g.vs['indegree'] = g.g.degree(type=ig.IN)
            g.g.vs['outdegree'] = g.g.degree(type=ig.OUT)

            for cls, vs in g.classes.iteritems():
                if not vs:
                    continue

                ind = numpy.array(vs['indegree'])
                outd = numpy.array(vs['outdegree'])

                print " * %s : mean IN degree (no weights): %f" % (
                    cls, numpy.average(ind))
                print " * %s : mean OUT degree (no weights): %f" % (
                    cls, numpy.average(outd))
                print " * %s : max IN degrees (no weights): %s" % (cls,
                                                                   top(ind))
                print " * %s : max OUT degrees (no weights): %s" % (cls,
                                                                    top(outd))

                print " * %s : stddev IN degree (no weights): %f" % (
                    cls, numpy.sqrt(numpy.var(ind)))
                print " * %s : stddev OUT degree (no weights): %f" % (
                    cls, numpy.sqrt(numpy.var(outd)))

    if options.transitivity:
        ##print " * transitivity: %f" % (nx.transitivity(g), )
        pass

    if options.summary:
        # don't use with --as-table
        print " * summary: %s" % (g.g.summary(), )

    if options.distance:
        with Timr('split clusters'):
            vc = g.g.clusters()
            size_clusters = vc.sizes()
            giant = vc.giant()

            print " * length of 5 max clusters: %s" % top(size_clusters)
            #print " * #node in 5 max clusters/#all nodes: %s" % top(
            #    [1.*cluster_len/vn for cluster_len in size_clusters])


    if options.distance:
        with Timr('distance'):
            gg = sg.Graph(giant)
            print " * average distance in the giant component: %f" % \
                  gg.averageDistance(weight='length')
            print " * average hops in the giant component: %f" % \
                  gg.averageDistance()

            #print "Average distance 2: %f" % giant.average_path_length(True,
            #                                                           False)


    if options.efficiency:
        with Timr('efficiency'):
            print " * efficiency: %f" % g.efficiency(weight='length')


    ##TODO: compute for centrality only if "all" or "degree"
    if (options.plot or options.histogram or options.power_law or
        options.centrality):
        with Timr('set weighted indegree'):
            g.set_weighted_degree()


    if options.centrality:
        timr.start('centrality')
        centralities = options.centrality.split(',')
        if 'all' in centralities:
            centralities = 'betweenness,pagerank,degree'.split(',')

        if set(centralities).difference(
            'betweenness,pagerank,degree'.split(',')):
            logging.error('Unknown centrality')
            sys.exit(0)

        if "betweenness" in centralities:
            print >> sys.stderr, "betweenness"
            g.g.vs['bw'] = g.g.betweenness(weights='length', directed = True)

        #g.g.vs['ev'] = g.g.evcent(weights='weight') # eigenvector centrality

        if 'pagerank' in centralities:
            print >> sys.stderr, "pagerank"
            g.g.vs['pr'] = g.g.pagerank(weights='weight') # pagerank

        if 'degree' in centralities:
            print >> sys.stderr, "outdegree"
            g.set_weighted_degree(type=ig.OUT)
        #total_weights = sum(g.g.es['weight'])
        max_edges = vn*(vn-1)

        for cls, vs in g.classes.iteritems():
            if not vs:
                continue

            if "betweenness" in centralities:
                norm_betweenness = numpy.array(g.classes[cls]['bw'])/max_edges
                print " * %s : average betweenness : %.10f" % (
                    cls, numpy.average(norm_betweenness))
                print " * %s : stddev betweenness : %.10f" % (
                    cls, numpy.sqrt(numpy.var(norm_betweenness)))
                print " * %s : max betweenness: %s" % (
                    cls, top(numpy.array(g.classes[cls]['bw'])/max_edges))

            #print " * Average eigenvector centrality : %6f" % numpy.average(
            #    g.vs['ev'])
            if 'pagerank' in centralities:
                print " * %s : average pagerank : %.10f" % (
                    cls, numpy.average(g.classes[cls]['pr']))
                print " * %s : stddev pagerank : %.10f" % (
                    cls, numpy.sqrt(numpy.var(g.classes[cls]['pr'])))
                print " * %s : max pagerank: %s" % (
                    cls, top(g.classes[cls]['pr']))

            if 'degree' in centralities:
                wi = g.classes[cls]['weighted_indegree']
                print " * %s : average IN degree centrality (weighted): %.10f" % (
                    cls, numpy.average(wi))
                print " * %s : stddev IN degree centrality (weighted): %.10f" % (
                    cls, numpy.sqrt(numpy.var(wi)))
                print " * %s : max IN degrees centrality (weighted): %s" % (
                    cls, top(wi))
                del wi

                wo = g.classes[cls]['weighted_outdegree']
                print " * %s : average OUT degree centrality (weighted) : %.10f" %\
                      (cls, numpy.average(wo))
                print " * %s : stddev OUT degree centrality (weighted) : %.10f" % \
                      (cls, numpy.sqrt(numpy.var(wo)))
                print " * %s : max OUT degrees centrality (weighted): %s" % (
                    cls, top(wo))
                del wo

        timr.stop('centrality')

    if options.power_law:
        with Timr('power law'):
            for cls, vs in g.classes.iteritems():
                if not vs:
                    continue

                indegrees = vs['weighted_indegree']

                try:
                    alpha_exp = ig.statistics.power_law_fit(indegrees, xmin=6)
                    print " * %s : alpha exp IN degree distribution : %10f " %\
                          (cls, alpha_exp)
                except ValueError:
                    print >> sys.stderr,\
                          " * %s : alpha exp IN degree distribution : ERROR" %\
                          (cls,)

    if options.histogram:
        list_with_index = lambda degrees, idx: [(degree, idx) for degree
                                                in degrees if degree]
        all_list = []

        nogrp_indegrees = g.g.vs.select(sysop_ne=True, bureaucrat_ne=True,
                                        steward_ne=True, founder_ne=True,
                                        bot_ne=True)['weighted_indegree']
        all_list += list_with_index(nogrp_indegrees, 1)

        sysops_indegrees = g.classes['sysop']['weighted_indegree']
        all_list += list_with_index(sysops_indegrees, 2)

        burs_indegrees = g.classes['bureaucrat']['weighted_indegree']
        all_list += list_with_index(burs_indegrees, 3)

        stewards_indegrees = g.classes['steward']['weighted_indegree']
        all_list += list_with_index(stewards_indegrees, 4)

        founders_indegrees = g.classes['founder']['weighted_indegree']
        all_list += list_with_index(founders_indegrees, 5)

        bots_indegrees = g.classes['bot']['weighted_indegree']
        all_list += list_with_index(bots_indegrees, 6)

        if options.gnuplot:
            f = open('hist.dat', 'w')
        else:
            f = open('%swiki-%s-hist.dat' % (lang, date), 'w')

        all_list.sort(reverse=True)

        for indegree, grp in all_list:
            for _ in range(grp - 1):
                print >> f, 0,
            print >> f, indegree,
            for _ in range(grp, 6):
                print >> f, 0,
            print >> f, ""
        f.close()

    if options.gnuplot:
        from popen2 import Popen3

        process = Popen3('gnuplot hist.gnuplot')
        process.wait()

        os.rename('hist.png', '%swiki-%s-hist.png' % (lang, date))
        os.rename('hist.dat', '%swiki-%s-hist.dat' % (lang, date))

    if options.plot:
        ## TODO: evaluate if this can be done with
        ## http://bazaar.launchpad.net/~igraph/igraph/0.6-main/revision/2018
        with Timr('plot'):
            import math

            ## filter:
            #print len(g.g.vs), len(g.g.es)
            #g.set_weighted_degree(type=ig.OUT)
            #g.g = g.g.subgraph(g.g.vs.select(weighted_indegree_ge=10,
            #                           weighted_outdegree_ge=1))
            #g.g.write_graphml('itwiki-20100729-stub-meta-history_in10_out1.graphml')
            #print len(g.g.vs), len(g.g.es)

            bots = g.g.vs.select(bot=True)
            bots['color'] = ('purple',)*len(bots)
            logging.debug('bots: ok')

            anonyms = g.g.vs.select(anonymous=True)
            anonyms['color'] = ('blue',)*len(anonyms)

            sysops = g.g.vs.select(sysop=True)
            sysops['color'] = ('yellow',)*len(sysops)

            bur_sysops = g.g.vs.select(bureaucrat=True, sysop=True)
            bur_sysops['color'] = ('orange',)*len(bur_sysops)

            g.g.vs['size'] = [math.sqrt(v['weighted_indegree']+1)*10 for v
                              in g.g.vs]

            logging.debug('plot: begin')
            ig.plot(g.g, target=lang+"_general.png", bbox=(0, 0, 8000, 8000),
                    edge_color='grey', layout='drl')
            logging.debug('plot: end')
            weights = g.g.es['weight']
            max_weight = max(weights)

            g.g.es['color'] = [(255.*e['weight']/max_weight, 0., 0.) for e
                               in g.g.es]
            g.g.es['width'] = weights

            ig.plot(g.g, target=lang+"_weighted_edges.png", bbox=(0, 0, 4000,
                                                                  2400),
                    layout='fr', vertex_label=' ')


    if options.as_table:
        tablr.stop()

        #tablr.printHeader()
        #tablr.printData()
        tablr.saveInDjangoModel()


    if options.adjacency:
        giant = g.g.clusters().giant()
        #destAdj = "%s/%swiki-%s-adj.csv" % (os.path.split(fn)[0], lang, date)
        destAdj = "%swiki-%s-adj.csv" % (lang, date)
        #destRec = "%s/%swiki-%s-rec.csv" % (os.path.split(fn)[0], lang, date)
        destRec = "%swiki-%s-rec.csv" % (lang, date)
        sg.Graph(giant).writeAdjacencyMatrix(destAdj, 'username')
        sg.Graph(giant).writeReciprocityMatrix('username', destRec)


    if options.users_role:
        l = g.get_user_class('username', ('anonymous', 'bot', 'bureaucrat',
                                        'sysop'))

        #destUR = "%s/%swiki-%s-ur.csv" % (os.path.split(fn)[0], lang, date)
        destUR = "%swiki-%s-ur.csv" % (lang, date)
        with open(destUR, 'w') as f:
            for username, role in sorted(l):
                print >> f, "%s,%s" % (username, role)

        from random import shuffle
        #destCls = "%s/%swiki-%s-%%s.csv" % (os.path.split(fn)[0], lang, date)
        destCls = "%swiki-%s-%%s.csv" % (lang, date)
        for cls in ('anonymous', 'bot', 'bureaucrat', 'sysop', 'normal_user'):
            users = g.classes[cls]['username']
            shuffle(users)
            with open(destCls % cls, 'w') as f:
                for username in users:
                    print >> f, \
                          ("%s,http://vec.wikipedia.org/w/index.php?title="+\
                          "Discussion_utente:%s&action=history&offset="+\
                          "20100000000001") % (username, username)