Пример #1
0
def get_q_list():
    # 202444 - личное имя
    # 12308941 - мужское
    # 11879590 - женское
    # 3409032 - юнисекс

    url = 'http://tools.wmflabs.org/wikidata-terminator/?list&lang=ru&mode=t1000&q=claim[31:202444,12308941,11879590,3409032]'
    url = 'http://tools.wmflabs.org/wikidata-terminator/?list&lang=ru&mode=t1000&q=claim[31:(claim[279:202444])]%20OR%20claim[31:202444]'
    response = urllib.request.urlopen(url)
    str_response = response.readall().decode('utf-8')

    q_list = re.findall("<tr><td><a href='//www.wikidata.org/wiki/(Q\d+?)'.*?<small>\((.+?)\)</small>", str_response)

    return q_list
Пример #2
0
def get_ru_labels(ids, q):
    # https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q42|Q1&props=labels&languages=ru
    url_ru_labels = 'https://www.wikidata.org/w/api.php?format=json&action=wbgetentities&ids=' + ids + '&props=labels&languages=ru'
    response = urllib.request.urlopen(url_ru_labels)
    str_response = response.readall().decode('utf-8')
    item_json = json.loads(str_response)

    ru_labels = []
    for ent in item_json["entities"]:
        try:
            ru_labels.append(item_json["entities"][ent]["labels"]["ru"]["value"])
        except:
            ''

    try:
        existing_title = item_json["entities"][q]["labels"]["ru"]["value"]
        ru_labels.remove(existing_title)
    except:
        existing_title = ''

    return existing_title, ru_labels
Пример #3
0
    print('--------')

    latin_title = qel[1]
    q = qel[0]

    print(q, latin_title.encode('cp1251', 'replace'))

    if " " in latin_title:
        continue

    # url = "http://www.wikidata.org/w/api.php?format=json&action=wbgetentities&ids="+q+"&props=labels&languages=ru|en"
    # http://wdq.wmflabs.org/api?q=claim[735:7451984]%20AND%20link[ruwiki]
    url_what_have_this_name = 'http://wdq.wmflabs.org/api?q=claim[735:' + q.replace('Q', '') + ']%20AND%20link[ruwiki]'

    response = urllib.request.urlopen(url_what_have_this_name)
    str_response = response.readall().decode('utf-8')
    item_json = json.loads(str_response)

    ids = q
    for i in item_json["items"]:
        ids += '|Q' + str(i)

    print(ids)

    existing_title, ru_labels = get_ru_labels(ids, q)

    print(existing_title.encode('cp1251', 'replace'))
    try:
        print(ru_labels)
    except:
        ''
Пример #4
0
def get_text_by_url(url):
    response = urllib.request.urlopen(url)
    return response.readall().decode('utf-8')