Exemplo n.º 1
0
    def get_globalusage(self, site, image, shared = False):
        self.connect_http()
        if type(site) is str:
            hostname = site
            apipath = '/w/api.php'
        else:
            hostname = site.hostname()
            apipath = site.apipath()

        kwargs = {'action': 'query', 'titles': u'File:' + image,
                  'prop': 'globalusage|imageinfo', 
                  'iiprop': '', 'guprop': 'namespace', 'gulimit': '500'}

        while True:
            res = self.http.query_api(hostname, apipath, **kwargs)
            if not res or not res['query'] or not res['query']['pages']:
                return
            if res['query']['pages'].values()[0].get('imagerepository') == 'local' and shared:
                return

            usages = res['query']['pages'].values()[0].get('globalusage', ())
            for usage in usages:
                title = usage['title'].replace(' ', '_')
                namespace = int(usage['ns'])
                site = family(usage['wiki'])

                if namespace != 0:
                    yield site, (namespace, strip_ns(title), title)
                else:
                    yield site, (namespace, title, title)

            if 'globalusage' in res.get('query-continue', ()):
                kwargs.update(res['query-continue']['globalusage'])
            else:
                return
Exemplo n.º 2
0
	def __init__(self, limit = 100, 
			mysql_default_server = 2, mysql_host_prefix = 'sql-s', mysql_kwargs = {}, 
			no_db = False, use_autoconn = False, 
			
			http_retry_timeout = 30, http_max_retries = -1, 
			http_callback = lambda *args: None,
			
			mysql_retry_timeout = 60,
			mysql_max_retries = -1, mysql_callback = lambda *args: None):
				
		self.http = None 
		self.http_retry_timeout = http_retry_timeout
		self.http_max_retries = http_max_retries
		self.http_callback = http_callback
		
		if no_db: return
 
		self.mysql_host_prefix = mysql_host_prefix
		if 'host' in mysql_kwargs: del mysql_kwargs['host']
		self.mysql_kwargs = mysql_kwargs
		self.use_autoconn = use_autoconn
		self.mysql_retry_timeout = mysql_retry_timeout
		self.mysql_max_retries = mysql_max_retries
		self.mysql_callback = mysql_callback
		
		self.connections = []
		
		# Mapping database name -> mysql connection
		self.databases = {}
		# Mapping server id -> mysql connection
		self.servers = {}
		# Mapping database name -> (lang, family)
		self.sites = {}
		
		self.unknown_families = []
		# Mapping family name -> family object
		self.known_families = {}
 
		database, cursor = self.connect_mysql(mysql_host_prefix + str(mysql_default_server))
		self.servers[mysql_default_server] = (database, cursor)
 
		# Find where the databases are located
		cursor.execute('SELECT dbname, domain, server FROM toolserver.wiki ORDER BY size DESC LIMIT %s', (limit, ))
		for dbname, domain, server in cursor.fetchall():
			if server not in self.servers:
				self.servers[server] = self.connect_mysql(mysql_host_prefix + str(server))
			
			# FIXME: wikimediafoundation!
			try:
				lang, fam = family(domain)
				if fam not in self.known_families:
					self.known_families[fam] = wikipedia.Family(fam, fatal = False)
			except (RuntimeError, ValueError):
				self.unknown_families.append(domain)
			else:
				self.sites[dbname] = (lang, fam)
				self.databases[dbname] = self.servers[server]
Exemplo n.º 3
0
 def startElement(self, name, attr):
     if name == "Family":
         self.family = family()
         self.family.name = attr.getValue("Name")
     if name == "Product":
         attributes = attr.getNames()
         prod = product()
         prod.name = attr.getValue("Name")
         if "DisplayMode" in attributes:
             prod.displayMode = attr.getValue("DisplayMode")
         else:
             prod.displayMode = "Visible"
         self.family.products.append(prod)
Exemplo n.º 4
0
def return_family_list(n=1000, m=5, seed=0):
    # generate fam_list
    G = nx.random_graphs.barabasi_albert_graph(n, m, seed)
    fam_list = list()
    adj = list(G.edges())
    for g in G:
        fam = family(label=g)
        fam_list.append(fam)
    for g_1, g_2 in adj:
        f_1 = fam_list[g_1]
        f_2 = fam_list[g_2]
        f_1.relate(f_2)
    return fam_list, G
Exemplo n.º 5
0
    def __init__(self, limit = 100,
            mysql_default_server = 3, mysql_host_prefix = 'sql-s', mysql_host_suffix = '',
            mysql_kwargs = {}, no_db = False, use_autoconn = False,

            http_retry_timeout = 30, http_max_retries = -1,
            http_callback = lambda *args: None,

            mysql_retry_timeout = 60,
            mysql_max_retries = -1, mysql_callback = lambda *args: None):

        self.http = None
        self.http_retry_timeout = http_retry_timeout
        self.http_max_retries = http_max_retries
        self.http_callback = http_callback

        if no_db: return

        self.mysql_host_prefix = mysql_host_prefix
        self.mysql_kwargs = mysql_kwargs.copy() # To be safe
        if 'host' in self.mysql_kwargs: del self.mysql_kwargs['host']
        self.use_autoconn = use_autoconn
        self.mysql_retry_timeout = mysql_retry_timeout
        self.mysql_max_retries = mysql_max_retries
        self.mysql_callback = mysql_callback

        self.connections = []

        # Mapping database name -> mysql connection
        self.databases = {}
        # Mapping server id -> mysql connection
        self.servers = {}
        # Mapping database name -> (lang, family)
        self.sites = {}

        self.domains = {}

        self.unknown_families = []
        # Mapping family name -> family object
        self.known_families = {}

        database, cursor = self.connect_mysql(mysql_host_prefix + str(mysql_default_server))
        self.servers[mysql_default_server] = (database, cursor)

        # Find where the databases are located
        cursor.execute('SELECT dbname, domain, server FROM toolserver.wiki ORDER BY size DESC LIMIT %s', (limit, ))
        for dbname, domain, server in cursor.fetchall():
            if server not in self.servers:
                self.servers[server] = self.connect_mysql(mysql_host_prefix + str(server) + mysql_host_suffix)

            # FIXME: wikimediafoundation!
            # TODO: This is one big mess
            try:
                lang, fam = family(domain)
                if fam not in self.known_families:
                    self.known_families[fam] = wikipedia.Family(fam, fatal = False)
            except (RuntimeError, ValueError, SyntaxError):
                self.unknown_families.append(domain)
            else:
                self.sites[dbname] = (lang, fam)
                self.databases[dbname] = self.servers[server]

            self.domains[dbname] = domain