示例#1
0
 def test_equality(self):
     u1 = page.Page(self.site, "GoodUsername", check=True)
     u2 = page.Page(self.site, "GoodUsername", check=False)
     self.assertEqual(u1, u2)
     site2 = wiki.Wiki("https://en.wikipedia.org/w/api.php")
     u3 = user.User(site2, "GoodUsername")
     self.assertNotEqual(u1, u3)
示例#2
0
def queries():
    if args.queryfile:
        for l in open(args.queryfile):
            yield l.strip()
    elif args.query:
        yield args.query
    elif args.category and not HAS_WIKITOOLS:
        sys.exit(
            "-cat option given, but wikitools package is not present, see < https://github.com/alexz-enwp/wikitools >"
        )
    elif args.category and HAS_WIKITOOLS:
        site = wiki.Wiki("https://commons.wikimedia.org/w/api.php")
        query = []
        params = {
            'action': 'query',
            'prop': 'imageinfo',
            'iiprop': 'url',
            'generator': 'categorymembers',
            'gcmtitle': 'Category:' + args.category,
            'gcmnamespace': '6',
            'gcmprop': 'title'
        }

        req = api.APIRequest(site, params)

        for data in req.queryGen():
            keys = data['query']['pages'].keys()

            for key in keys:
                url = data['query']['pages'][key]['imageinfo'][0]['url']
                yield re.sub("https://upload.wikimedia.org", "", url)
    else:
        sys.exit("No query given")
示例#3
0
def fileHook(parser_env, namespace, body):
    (file_name, pipe, size) = body.partition('|')

    site = wiki.Wiki('https://en.wikipedia.org/w/api.php')
    params = {
        'action': 'query',
        'titles': 'File:' + file_name,
        'prop': 'imageinfo',
        'iiprop': 'url|thumbmime',
        'iiurlwidth': size
    }
    request = api.APIRequest(site, params)
    result = request.query()
    try:
        url = result['query']['pages'].values()[0]['imageinfo'][0]['thumburl']
        desc_url = result['query']['pages'].values(
        )[0]['imageinfo'][0]['descriptionurl']
        width = result['query']['pages'].values(
        )[0]['imageinfo'][0]['thumbwidth']
        height = result['query']['pages'].values(
        )[0]['imageinfo'][0]['thumbheight']
    except:
        return file_name
    text = '<a href="%s" class="image">' % desc_url
    text += '<img alt="%s" src="%s" width="%s" height="%s"></a>' % (
        file_name, url, width, height)
    return text
def genotype_getter(my_filtered_snps):
    site = wiki.Wiki("http://snpedia.com/api.php")
    genotypes = {}

    for single_snp in my_filtered_snps:
        type_counter = 1
        wikipage = page.Page(site, single_snp.name)
        snp_page = wikipage.getWikiText()

        while snp_page.find("geno" + str(type_counter)) != -1:

            if genotypes.has_key(single_snp.name):
                current_genotypes = genotypes[single_snp.name]
                type_start = snp_page.find("geno" + str(type_counter))
                type_start = snp_page.find("(", type_start)
                type_stop = snp_page.find(")", type_start)
                current_genotypes.append(
                    str(snp_page[type_start:type_stop + 1]))
                genotypes[single_snp.name] = current_genotypes

            else:
                type_start = snp_page.find("geno" + str(type_counter))
                type_start = snp_page.find("(", type_start)
                type_stop = snp_page.find(")", type_start)
                genotypes[single_snp.name] = [
                    str(snp_page[type_start:type_stop + 1])
                ]

            type_counter += 1

        print "Got genotypes for " + str(single_snp.name)
    genotype_outfile = open("genotypes.data", "wb")
    pickle.dump(genotypes, genotype_outfile)
    genotype_outfile.close()
    return genotypes
示例#5
0
    def loadData(self):
        from wikitools import wiki
        from wikitools import category

        wikiobj = wiki.Wiki("https://en.wikipedia.org/w/api.php")
        wikicat = category.Category(wikiobj, title="2016_films")
        self.wikipages = wikicat.getAllMembers()
    def extlinks_extraction(self, lang, title):
        links = []
        linklist = []

        site = wiki.Wiki("https://" + lang + ".wikipedia.org/w/api.php")

        #urllib2.quote(title.encode("utf8"))
        #title = title.encode("utf-8")
        params = {
            'action': 'query',
            'titles': title,
            'prop': 'extlinks',
            'ellimit': 500
        }
        req = api.APIRequest(site, params)

        for res in req.queryGen():
            #pprint.pprint(res)
            for pidkey in res['query']['pages']:
                #print res['query']['pages']
                if 'extlinks' in res['query']['pages'][pidkey]:
                    linklist = res['query']['pages'][pidkey][
                        'extlinks'] + linklist
            links = links + linklist
            linklist = []

    #    print links
        return links
示例#7
0
def getBLPs():
	site = wiki.Wiki()
	site.login(settings.bot, settings.botpass)
	site.setMaxlag(-1)
	date = datetime.datetime.utcnow()+datetime.timedelta(days=5)	
	table = date.strftime('pop_%b%y')
	db = MySQLdb.connect(host="sql-s1-user", read_default_file="/home/alexz/.my.cnf")
	cursor = db.cursor()
	insertquery = 'INSERT INTO u_alexz.'+table+' (title, project_assess) VALUES( %s, %s )'
	updatequery = 'UPDATE u_alexz.'+table+' SET project_assess=CONCAT(project_assess,",",%s) WHERE title=%s'
	selectquery = """SELECT page_title FROM enwiki_p.page 
		JOIN enwiki_p.categorylinks ON page_id=cl_from 
		WHERE cl_to='Living_people' AND page_namespace=0 AND page_is_redirect=0 """
	cursor.execute(selectquery)
	pagesincat = cursor.fetchall()
	project_assess = "'wpblp':(None,None)"
	for title in pagesincat:			
		realtitle = title[0].decode('utf8').encode('utf8')
		if realtitle in titlelist:
			bits = (project_assess, realtitle)
			cursor.execute(updatequery, bits)
		else:
			titlelist.add(realtitle)
			bits = (realtitle, project_assess)
			cursor.execute(insertquery, bits)	
	db.close()	
def getcontent(title):
    site = wiki.Wiki("http://wiki.chinahpo.org/api.php?")
    pagehandle = page.Page(site, title)  #title is the name of each SNP
    snp_page = pagehandle.getWikiText()  #Wiki page parse
    #print snp_page.encode('u8')
    title = title.replace("/", "&")
    open('./CHPO/%s' % title, 'w+').write(snp_page)  # write into file
def wikipedia_query(query_params):
    """
	An extremely basic wrapper for the wikitools api.
	"""
    site = wiki.Wiki()  # This defaults to en.wikipedia.org
    request = api.APIRequest(site, query_params)
    result = request.query()
    return result[query_params['action']]
示例#10
0
def wiki_login(w_url, w_user, w_pwd):

    print("Login ...")
    w_site = wiki.Wiki(w_url)
    w_site.login(w_user, w_pwd)
    print("... done.")

    return w_site
示例#11
0
 def get_site(user='******',
              api_site='http://wiki.travellerrpg.com/api.php',
              password=False):
     site = wiki.Wiki(api_site)
     access = site.login(user, password=password, remember=True)
     if not access:
         logger.error('Unable to log in')
     return site
示例#12
0
def search_snpedia(snp):
    """
    http://snpedia.com/index.php/Bulk
    """
    site = wiki.Wiki("http://bots.snpedia.com/api.php")
    pagehandle = page.Page(site, snp)
    snp_page = pagehandle.getWikiText()
    return snp_page
示例#13
0
 def __init__(self, project, dumpspath):
     #self.db = None
     self.basic, self.fixes, self.paths = loadPathsAndLibs(
         project, dumpspath)
     self.project = project
     self.wiki = wiki.Wiki(self.paths['siteurl'])
     if self.paths:
         self.test()
示例#14
0
    def __init__(self):

        for site_lang_code in self.lang_codes.keys():
            self.sites[site_lang_code] = wiki.Wiki('https://' +
                                                   site_lang_code +
                                                   '.wikipedia.org/w/api.php')

        self.deserialize_progress_tracker()
示例#15
0
def get_article(url, source_id, rfc_DB):
    cmd = 'select id, disqus_id, section_index, title from website_article where url = %s'
    article_result = rfc_DB.fetch_one(cmd, (urllib2.unquote(url), ))

    if article_result is not None:
        article_id, disqus_id, section_index, title = article_result
        return article_id, disqus_id, section_index, title
    else:
        if 'wikipedia.org/wiki/' in url:
            url_parts = url.split('/wiki/')
            wiki_sub = url_parts[1].split(':')
            wiki_parts = ':'.join(wiki_sub[1:]).split('#')
            wiki_page = wiki_parts[0]
            section = None
            if len(wiki_parts) > 1:
                section = wiki_parts[1]

            from wikitools import wiki, api
            site = wiki.Wiki(_DOMAIN + '/w/api.php')
            page = urllib2.unquote(
                str(wiki_sub[0]) + ':' + wiki_page.encode('ascii', 'ignore'))
            params = {
                'action': 'parse',
                'prop': 'sections',
                'page': page,
                'redirects': 'yes'
            }
            from wikitools import wiki, api
            try:
                request = api.APIRequest(site, params)

                result = request.query()

                disqus_id = str(result['parse']['pageid'])
                section_title = None
                section_index = None

                if section:
                    for s in result['parse']['sections']:
                        if s['anchor'] == section:
                            disqus_id = str(disqus_id) + '#' + str(s['index'])
                            section_title = s['line']
                            section_index = s['index']
                title = result['parse']['title']
                if section_title is not None:
                    title = title + ' - ' + section_title

                link = urllib2.unquote(url)
                article_insert_command = " insert into website_article (disqus_id, title, url, source_id, section_index)\
                                            values (%s, %s, %s, %s, %s)"

                article_id = rfc_DB.insert(
                    article_insert_command,
                    (disqus_id, title, link, source_id, section_index))
                return article_id, disqus_id, section_index, title

            except api.APIError as e:
                print e
示例#16
0
    def __init__(self, language='en'):
        self.language = language

        # base_sites_by_language = {
        #     'en':   "https://en.wikipedia.org/w/api.php",
        #     'es':   "http://es.wikipedia.org/w/api.php"
        # }
        self.site_url = "https://{:s}.wikipedia.org/w/api.php".format(language)
        self.site = wiki.Wiki(self.site_url)
示例#17
0
def import_wiki_authors(authors, rfc_DB):
    found_authors = set()
    anonymous_exist = False
    for author in authors:
        if author:
            found_authors.add(author)
        else:
            anonymous_exist = True
    authors_list = '|'.join(found_authors)

    from wikitools import wiki, api
    site = wiki.Wiki(_DOMAIN + '/w/api.php')
    params = {
        'action': 'query',
        'list': 'users',
        'ususers': authors_list,
        'usprop': 'blockinfo|groups|editcount|registration|emailable|gender',
        'format': 'json'
    }

    request = api.APIRequest(site, params)
    result = request.query()
    comment_authors = []
    for user in result['query']['users']:
        comment_author_id = None
        try:
            author_id = user['userid']
            # first check if the author exists using the username
            command = "select id from website_commentauthor where username = %s"
            (comment_author_id, ) = rfc_DB.fetch_one(command, (user['name'], ))
            # if no author exists with the same username
            if comment_author_id is None:
                author_insert_command = " insert into website_commentauthor (username, disqus_id, joined_at, edit_count, gender, groups, is_wikipedia)\
                        values (%s, %s, %s, %s, %s, %s, %s)"

                joined_at = datetime.datetime.strptime(user['registration'],
                                                       '%Y-%m-%dT%H:%M:%SZ')
                params = (user['name'], author_id, joined_at,
                          user['editcount'], user['gender'],
                          ','.join(user['groups']), 1)
                comment_author_id = rfc_DB.insert(author_insert_command,
                                                  params)

        except Exception:
            command = " insert into website_commentauthor (username, is_wikipedia)\
                        values (%s, %s)"

            comment_author_id = rfc_DB.insert(command, (user['name'], 1))

        if comment_author_id is not None:
            comment_authors.append(comment_author_id)

    if anonymous_exist:
        anonymous_id = rfc_DB.get_anonymous_id()
        comment_authors.append(anonymous_id)

    return comment_authors
示例#18
0
 def test_equality(self):
     p1 = page.Page(self.site, "Page", check=True)
     p2 = page.Page(self.site, "Page", check=False)
     self.assertEqual(p1, p2)
     site2 = wiki.Wiki("https://en.wikipedia.org/w/api.php")
     p3 = page.Page(site2, "Page")
     self.assertNotEqual(p1, p3)
     p4 = page.Page(self.site, "Talk:Page")
     self.assertNotEqual(p1, p4)
示例#19
0
def setupProject(project, abbrv):
	site = wiki.Wiki()
	site.login(settings.bot, settings.botpass)
	site.setMaxlag(-1)
	date = datetime.datetime.utcnow()+datetime.timedelta(days=5)	
	table = date.strftime('pop_%b%y')
	db = MySQLdb.connect(host="sql-s1-user", read_default_file="/home/alexz/.my.cnf")
	cursor = db.cursor()
	projecttitles = set()
	project = project.replace(' ', '_')
	types = ['FA', 'FL', 'A', 'GA', 'B', 'C', 'start', 'stub', 'list', 'image', 'portal', 'category', 'book', 'disambig', 'template', 'unassessed', 'blank', 'non-article']
	insertquery = 'INSERT INTO u_alexz.'+table+' (title, project_assess) VALUES( %s, %s )'
	updatequery = 'UPDATE u_alexz.'+table+' SET project_assess=CONCAT(project_assess,",",%s) WHERE title=%s'
	selectquery = """SELECT page_namespace-1, page_title, SUBSTRING_INDEX(clB.cl_to, '-', 1) FROM enwiki_p.page 
		JOIN enwiki_p.categorylinks AS clA ON page_id=clA.cl_from 
		LEFT JOIN enwiki_p.categorylinks AS clB ON page_id=clB.cl_from AND clB.cl_to LIKE "%%-importance_"""+project+"""_articles"
		WHERE clA.cl_to=%s AND page_is_redirect=0 """
	for type in types:
		if type == "unassessed":
			cat = "Category:Unassessed "+project+" articles"
		elif type == "non-article":
			cat = "Category:Non-article "+project+" pages"
		elif type == "blank":
			cat = "Category:"+project+" pages"
		else:
			cat = "Category:"+type+"-Class "+project+" articles"
		catpage = page.Page(site, cat)
		if not catpage.exists:
			continue
		catpage.setNamespace(0)
		catname = catpage.title.replace(' ', '_').encode('utf-8')
		print catname
		cursor.execute(selectquery, (catname))
		pagesincat = cursor.fetchall()
		for title in pagesincat:			
			if not title[0]%2 == 0:
				continue
			realtitle = title[1].decode('utf8').encode('utf8')
			if title[0] != 0:
				p = page.Page(site, realtitle, check=False, namespace=title[0])
				realtitle = p.title.encode('utf8').replace(' ', '_')
			if realtitle in projecttitles:
				continue
			if title[2] is None:
				project_assess = "'%s':('%s',None)" % (abbrv, type)
			else:
				project_assess = "'%s':('%s','%s')" % (abbrv, type, title[2])
			projecttitles.add(realtitle)
			if realtitle in titlelist:
				bits = (project_assess, realtitle)
				cursor.execute(updatequery, bits)
			else:
				titlelist.add(realtitle)
				bits = (realtitle, project_assess)
				cursor.execute(insertquery, bits)	
	del projecttitles
	db.close()
示例#20
0
def get_snpedia_snp_names():

    site = wiki.Wiki('http://bots.snpedia.com/api.php')
    snps = category.Category(site, 'Is_a_snp')
    snpedia = set()

    for article in snps.getAllMembersGen(namespaces=[0]):
        snpedia.add(article.title.lower())

    return snpedia
示例#21
0
 def __init__(self, project, dumpspath, username='', password=''):
     self.basic, self.fixes, self.paths = loadPathsAndLibs(
         project, dumpspath)
     self.project = project
     self.wiki = wiki.Wiki(self.paths['siteurl'])
     self.username = username
     self.password = password
     if self.paths:
         self.test()
     print('UploadFixes initilised')
示例#22
0
def get_drugs(fname):
    site = wiki.Wiki("http://bots.snpedia.com/api.php")
    drugs = category.Category(site, "Is_a_medicine")
    n = 0

    with open(fname, 'w') as f:
        for article in drugs.getAllMembersGen(namespaces=[0]):
            drug = _normalize_str(article.title.strip())
            f.write(drug + '\n')
            n += 1

    print 'drugs extracted:', n
示例#23
0
文件: wiki.py 项目: afcarl/sicekit
def getWiki(configuration):
    """Create the wiki object from the configuration."""
    _wiki = wiki.Wiki(configuration.wiki_apiurl)
    _wiki.cookiepath = configuration.cookiejar
    if not _wiki.login(configuration.wiki_username,
                       configuration.wiki_password,
                       domain=configuration.wiki_domain,
                       remember=True):
        raise WikiLoginError("Login failed early")
    if not _wiki.isLoggedIn():
        raise WikiLoginError("Login failed")
    return _wiki
示例#24
0
def crawl(url_param):

    # Fix eventual full URL
    url_param = unquote_plus(basename(url_param))

    # Generate query
    params = {
            'action'        : 'query',
            'prop'          : 'imageinfo|revisions',
            'iiprop'        : 'url|sha1|size',
            'rvprop'        : 'content',
            'rawcontinue'   : '' }

    url_type = get_url_type(url_param)

    if url_type == 'category':
        params['generator'] = 'categorymembers'
        params['gcmtitle']  = url_param
        params['gcmlimit']  = 'max'
    elif url_type == 'file':
        params['titles']    = url_param
    else:
        params['generator'] = 'images'
        params['titles']    = url_param
        params['gimlimit']  = 'max'


    # Call API
    site = wiki.Wiki(API_URL)
    request = api.APIRequest(site, params)

    print_verbose("Site: %s" % str(site), 2)
    print_verbose("Query: ", 2)
    pprint_verbose(params, 2)

    result = request.query(querycontinue=True)
    print_verbose("Result: ", 4)
    pprint_verbose(result, 4)

    # Check result
    if 'error' in result:
        raise Error(result['error'])

    if 'warnings' in result:
        sys.stderr.write(result['warnings'])
        return None

    if '-1' in result['query']['pages']:
        sys.stderr.write(result['query']['pages']['-1'])
        return None

    return result['query']['pages']
示例#25
0
def import_wiki_authors(authors, article):
    found_authors = []
    anonymous_exist = False
    for author in authors:
        if author:
            found_authors.append(author)
        else:
            anonymous_exist = True
    authors_list = '|'.join(found_authors)

    from wikitools import wiki, api
    domain = article.url.split('/wiki/')[0]
    site = wiki.Wiki(domain + '/w/api.php')
    params = {
        'action': 'query',
        'list': 'users',
        'ususers': authors_list,
        'usprop': 'blockinfo|groups|editcount|registration|emailable|gender',
        'format': 'json'
    }

    request = api.APIRequest(site, params)
    result = request.query()
    comment_authors = []
    for user in result['query']['users']:
        try:
            author_id = user['userid']
            comment_author = CommentAuthor.objects.filter(disqus_id=author_id)
            if comment_author.count() > 0:
                comment_author = comment_author[0]
            else:
                joined_at = datetime.datetime.strptime(user['registration'],
                                                       '%Y-%m-%dT%H:%M:%SZ')
                comment_author = CommentAuthor.objects.create(
                    username=user['name'],
                    disqus_id=author_id,
                    joined_at=user['registration'],
                    edit_count=user['editcount'],
                    gender=user['gender'],
                    groups=','.join(user['groups']),
                    is_wikipedia=True)
        except Exception:
            comment_author = CommentAuthor.objects.create(
                username=user['name'], is_wikipedia=True)
        comment_authors.append(comment_author)

    if anonymous_exist:
        comment_authors.append(
            CommentAuthor.objects.get(disqus_id='anonymous',
                                      is_wikipedia=True))

    return comment_authors
示例#26
0
 def toolbar_icon_clicked(self, widget, movie):
     import pprint # Used for formatting the output for viewing, not necessary for most code
     from wikitools import wiki, api
     site = wiki.Wiki("http://de.wikipedia.org/w/api.php")
     params = {'action':'query',
         'list':'search',
         'srsearch':'rocky',
         'srprop':'',
         'srlimit':'50'
     }
     req = api.APIRequest(site, params)
     res = req.query(querycontinue=False)
     pprint.pprint(res)
示例#27
0
 def test_parseJSON_maxlag(self):
     site = wiki.Wiki("https://en.wikipedia.org/w/api.php")
     params = {"action": "query"}
     req = api.APIRequest(site, params)
     req.changeParam("maxlag", "-1")
     warnings.filterwarnings("error",
                             category=UserWarning,
                             module="wikitools.api")
     with self.assertRaises(UserWarning):
         req.query(False)
     warnings.filterwarnings("default",
                             category=UserWarning,
                             module="wikitools.api")
def snpedia_getter():
    site = wiki.Wiki("http://snpedia.com/api.php")  # open snpedia
    snps = category.Category(site, "Is_a_snp")
    snpedia = {}

    for article in snps.getAllMembersGen(namespaces=[0]):  # get all snp-names
        snpedia[article.title.lower()] = "in snpedia"
        print article.title

    snpedia_outfile = open("snpedia.data", "wb")  # save all snps to cache
    pickle.dump(snpedia, snpedia_outfile)
    snpedia_outfile.close()
    return snpedia
示例#29
0
def listFromCategory(project,
                     dumpspath,
                     categorytitle,
                     namespaces=None,
                     username=None,
                     password=None):
    mdlfixes, paths = loadPathsAndLibs(project, dumpspath)
    site = wiki.Wiki(paths['siteurl'], username, password)
    c = category.Category(site, categorytitle)
    titles = c.getAllMembers(namespaces)
    with open(paths['list'], 'wt', encoding='utf_8') as ftitles:
        ftitles.write('\n'.join(titles))
        print('titles written')
示例#30
0
    def __init__(self, config, config_section, wiki_name, callback):
        self.config = config
        self.config_section = config_section
        self.wiki_name = wiki_name
        self.wiki = _wiki.Wiki('https://%s/w/api.php' % self.wiki_name)
        self.username = config.get(self.config_section, 'wiki_user')
        self.password = config.get(self.config_section, 'wiki_password')
        self.callback = callback

        if self.callback.__self__.log:
            # We could use .getChild(), but then %(name)s would be 'bot.wiki', we want instead 'bot:wiki'
            self.log = logging.getLogger('%s:%s' % (self.callback.__self__.nickname, self.wiki_name))

        self.load_wiki_configuration()
        self.loop = LoopingCall(self.fetch_log)