Пример #1
0
    def test_basedatatype_if_exists(self):
        instance_of_append = wbi_datatype.ItemID(prop_nr='P31',
                                                 value='Q1234',
                                                 if_exists='APPEND')
        instance_of_forceappend = wbi_datatype.ItemID(prop_nr='P31',
                                                      value='Q1234',
                                                      if_exists='FORCE_APPEND')
        instance_of_replace = wbi_datatype.ItemID(prop_nr='P31',
                                                  value='Q1234',
                                                  if_exists='REPLACE')
        instance_of_keep = wbi_datatype.ItemID(prop_nr='P31',
                                               value='Q1234',
                                               if_exists='KEEP')

        item = wbi_core.ItemEngine(
            item_id="Q2", data=[instance_of_append, instance_of_append])
        claims = [
            x['mainsnak']['datavalue']['value']['id']
            for x in item.get_json_representation()['claims']['P31']
        ]
        assert len(claims) > 1 and 'Q1234' in claims and claims.count(
            'Q1234') == 1

        item = wbi_core.ItemEngine(
            item_id="Q2",
            data=[instance_of_forceappend, instance_of_forceappend])
        claims = [
            x['mainsnak']['datavalue']['value']['id']
            for x in item.get_json_representation()['claims']['P31']
        ]
        assert len(claims) > 1 and 'Q1234' in claims and claims.count(
            'Q1234') == 2

        item = wbi_core.ItemEngine(item_id="Q2",
                                   data=[instance_of_replace],
                                   debug=True)
        claims = [
            x['mainsnak']['datavalue']['value']['id']
            for x in item.get_json_representation()['claims']['P31']
            if 'remove' not in x
        ]
        removed_claims = [
            True for x in item.get_json_representation()['claims']['P31']
            if 'remove' in x
        ]
        assert len(claims) == 1 and 'Q1234' in claims and len(
            removed_claims) == 3 and True in removed_claims

        item = wbi_core.ItemEngine(item_id="Q2",
                                   data=[instance_of_keep],
                                   debug=True)
        claims = [
            x['mainsnak']['datavalue']['value']['id']
            for x in item.get_json_representation()['claims']['P31']
        ]
        assert len(claims) == 3 and 'Q1234' not in claims
Пример #2
0
    def test_label(self):
        item = wbi_core.ItemEngine(item_id="Q2")

        assert item.get_label('en') == "Earth"

        assert "globe" in item.get_aliases()

        assert item.get_label("es") == "Tierra"

        item.set_label("Earth")
        item.set_label("lorem")
        item.set_label("lorem ipsum", lang='en', if_exists='KEEP')
        assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'lorem'}
        assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'}
        item.set_aliases(["fake alias"], if_exists='APPEND')
        assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en']

        item.set_label(label=None, lang='fr')
        item.set_label(label=None, lang='non-exist-key')
        assert 'remove' in item.json_representation['labels']['fr']

        item.get_label("ak")
        item.get_description("ak")
        item.get_aliases("ak")
        item.set_label("label", lang='ak')
        item.set_description("d", lang='ak')
        item.set_aliases(["a"], lang='ak', if_exists='APPEND')
        assert item.get_aliases('ak') == ['a']
        item.set_aliases("b", lang='ak')
        assert item.get_aliases('ak') == ['a', 'b']
        item.set_aliases("b", lang='ak', if_exists='REPLACE')
        assert item.get_aliases('ak') == ['b']
        item.set_aliases(["c"], lang='ak', if_exists='REPLACE')
        assert item.get_aliases('ak') == ['c']
Пример #3
0
    def test_description(self):
        item = wbi_core.ItemEngine(item_id="Q2")
        descr = item.get_description('en')
        assert len(descr) > 3

        assert "planet" in item.get_description()

        # set_description on already existing description
        item.set_description(descr)
        item.set_description("lorem")
        item.set_description("lorem ipsum", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {
            'language': 'en',
            'value': 'lorem'
        }
        # set_description on empty desription
        item.set_description("")
        item.set_description("lorem ipsum", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {
            'language': 'en',
            'value': 'lorem ipsum'
        }

        item.set_description("lorem", lang='fr', if_exists='KEEP')
        item.set_description("lorem ipsum", lang='fr', if_exists='REPLACE')
        item.set_description("lorem", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {
            'language': 'en',
            'value': 'lorem ipsum'
        }
        assert item.json_representation['descriptions']['fr'] == {
            'language': 'fr',
            'value': 'lorem ipsum'
        }
Пример #4
0
    def test_fastrun_label(self):
        # tests fastrun label, description and aliases, and label in another language
        data = [wbi_core.ExternalID('/m/02j71', 'P646')]
        fast_run_base_filter = {'P361': 'Q18589965'}
        item = wbi_core.ItemEngine(item_id="Q2",
                                   data=data,
                                   fast_run=True,
                                   fast_run_base_filter=fast_run_base_filter)

        frc = wbi_core.ItemEngine.fast_run_store[0]
        frc.debug = True

        assert item.get_label('en') == "Earth"
        descr = item.get_description('en')
        assert len(descr) > 3
        aliases = item.get_aliases()
        assert "Terra" in aliases

        assert list(
            item.fast_run_container.get_language_data("Q2", 'en',
                                                      'label'))[0] == "Earth"
        assert item.fast_run_container.check_language_data(
            "Q2", ['not the Earth'], 'en', 'label')
        assert "Terra" in item.get_aliases()
        assert "planet" in item.get_description()

        assert item.get_label("es") == "Tierra"

        item.set_description(descr)
        item.set_description("fghjkl")
        assert item.json_representation['descriptions']['en'] == {
            'language': 'en',
            'value': 'fghjkl'
        }
        item.set_label("Earth")
        item.set_label("xfgfdsg")
        assert item.json_representation['labels']['en'] == {
            'language': 'en',
            'value': 'xfgfdsg'
        }
        item.set_aliases(["fake alias"], if_exists='APPEND')
        assert {
            'language': 'en',
            'value': 'fake alias'
        } in item.json_representation['aliases']['en']

        # something thats empty (for now.., can change, so this just makes sure no exception is thrown)
        frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label')
        frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label')
        frc.check_language_data("Q2", [''], 'ak', 'description')
        frc.check_language_data("Q2", [], 'ak', 'aliases')
        frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases')

        item.get_label("ak")
        item.get_description("ak")
        item.get_aliases("ak")
        item.set_label("label", lang="ak")
        item.set_description("d", lang="ak")
        item.set_aliases(["a"], lang="ak", if_exists='APPEND')
Пример #5
0
def test_sitelinks():
    data = [wbi_core.ItemID(value='Q12136', prop_nr='P31')]
    item = wbi_core.ItemEngine(item_id='Q622901', data=data)
    item.get_sitelink("enwiki")
    assert "enwiki" not in item.json_representation['sitelinks']
    item.set_sitelink("enwiki", "something")
    assert item.get_sitelink("enwiki")['title'] == "something"
    assert "enwiki" in item.json_representation['sitelinks']
Пример #6
0
 def test_item_engine(self):
     wbi_core.ItemEngine(debug=True)
     wbi_core.ItemEngine(data=None, debug=True)
     wbi_core.ItemEngine(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True)
     wbi_core.ItemEngine(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True)
     with self.assertRaises(TypeError):
         wbi_core.ItemEngine(data='test', debug=True)
     with self.assertRaises(ValueError):
         wbi_core.ItemEngine(fast_run_case_insensitive=True, debug=True)
     with self.assertRaises(TypeError):
         wbi_core.ItemEngine(ref_handler='test', debug=True)
     with self.assertRaises(ValueError):
         wbi_core.ItemEngine(global_ref_mode='CUSTOM', debug=True)
     wbi_core.ItemEngine(item_id='Q2', fast_run=True, debug=True)
Пример #7
0
def test_nositelinks():
    # this item doesn't and probably wont ever have any sitelinks (but who knows?? maybe one day..)
    data = [wbi_core.ItemID(value='Q5', prop_nr='P31')]
    item = wbi_core.ItemEngine(item_id='Q27869338', data=data)
    item.get_sitelink("enwiki")
    assert "enwiki" not in item.json_representation['sitelinks']
    item.set_sitelink("enwiki", "something")
    assert item.get_sitelink("enwiki")['title'] == "something"
    assert "enwiki" in item.json_representation['sitelinks']
Пример #8
0
def import_entity(username, password, data, label="", item_id=""):
    login_instance = wbi_login.Login(user=username, pwd=password)

    entity = wbi_core.ItemEngine(data=data, item_id=item_id)
    if label:
        entity.set_label(label, ENGLISH)

    entity_id = entity.write(login_instance)
    return entity_id
Пример #9
0
    def test_live_item(self):
        """
        Test an item against Wikidata
        """
        item = wbi_core.ItemEngine(item_id='Q423111')

        mass_statement = [x for x in item.statements if x.get_prop_nr() == 'P2067'].pop()
        pprint.pprint(mass_statement.get_json_representation())

        if not mass_statement:
            raise
Пример #10
0
    def test_search_only(self):
        item = wbi_core.ItemEngine(item_id="Q2", search_only=True)

        assert item.get_label('en') == "Earth"
        descr = item.get_description('en')
        assert len(descr) > 3

        assert "globe" in item.get_aliases()
        assert "planet" in item.get_description()

        assert item.get_label("es") == "Tierra"
Пример #11
0
def add_usage_example(
    document_id=None,
    sentence=None,
    lid=None,
    form_id=None,
    sense_id=None,
    word=None,
):
    # Use WikibaseIntegrator aka wbi to upload the changes
    link_to_form = wbi_core.Form(prop_nr="P5830",
                                 value=form_id,
                                 is_qualifier=True)
    link_to_sense = wbi_core.Sense(prop_nr="P6072",
                                   value=sense_id,
                                   is_qualifier=True)
    reference = [
        wbi_core.ItemID(
            prop_nr="P248",  # Stated in Riksdagen open data portal
            value="Q21592569",
            is_reference=True),
        wbi_core.ExternalID(
            prop_nr="P8433",  # Riksdagen Document ID
            value=document_id,
            is_reference=True),
        wbi_core.Time(
            prop_nr="P813",  # Fetched today
            time=datetime.datetime.utcnow().replace(
                tzinfo=datetime.timezone.utc).replace(
                    hour=0,
                    minute=0,
                    second=0,
                ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
            is_reference=True,
        )
    ]
    claim = wbi_core.MonolingualText(
        sentence,
        "P5831",
        language="sv",
        qualifiers=[link_to_form, link_to_sense],
        references=[reference],
    )
    # print(claim)
    if debug:
        print(claim.get_json_representation())
    item = wbi_core.ItemEngine(data=[claim], item_id=lid)
    if debug:
        print(item.get_json_representation())
    result = item.write(
        login_instance,
        edit_summary="Added usage example with [[Wikidata:rikslex]]")
    return result
    def test_label(self):
        item = wbi_core.ItemEngine(item_id="Q2")

        assert item.get_label('en') == "Earth"
        descr = item.get_description('en')
        assert len(descr) > 3

        assert "Terra" in item.get_aliases()
        assert "planet" in item.get_description()

        assert item.get_label("es") == "Tierra"

        # set_description on already existing description
        item.set_description(descr)
        item.set_description("fghjkl")
        item.set_description("fghjkltest", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'}
        # set_description on empty desription
        item.set_description("")
        item.set_description("zaehjgreytret", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'zaehjgreytret'}

        item.set_label("Earth")
        item.set_label("xfgfdsg")
        item.set_label("xfgfdsgtest", lang='en', if_exists='KEEP')
        assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'}
        assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'}
        item.set_aliases(["fake alias"], if_exists='APPEND')
        assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en']

        item.set_label(label=None, lang='fr')
        item.set_label(label=None, lang='non-exist-key')
        assert 'remove' in item.json_representation['labels']['fr']

        item.get_label("ak")
        item.get_description("ak")
        item.get_aliases("ak")
        item.set_label("label", lang='ak')
        item.set_description("d", lang='ak')
        item.set_aliases(["a"], lang='ak', if_exists='APPEND')
        assert item.get_aliases('ak') == ['a']
        item.set_aliases("b", lang='ak')
        assert item.get_aliases('ak') == ['a', 'b']
        item.set_aliases("b", lang='ak', if_exists='REPLACE')
        assert item.get_aliases('ak') == ['b']
        item.set_aliases(["c"], lang='ak', if_exists='REPLACE')
        assert item.get_aliases('ak') == ['c']
Пример #13
0
    def test_new_item_creation(self):
        data = [
            wbi_datatype.String(value='test1', prop_nr='P1'),
            wbi_datatype.String(value='test2', prop_nr='1'),
            wbi_datatype.String(value='test3', prop_nr=1),
            wbi_datatype.Math("xxx", prop_nr="P2"),
            wbi_datatype.ExternalID("xxx", prop_nr="P3"),
            wbi_datatype.ItemID("Q123", prop_nr="P4"),
            wbi_datatype.ItemID("123", prop_nr="P4"),
            wbi_datatype.ItemID(123, prop_nr="P4"),
            wbi_datatype.Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"),
            wbi_datatype.Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"),
            wbi_datatype.Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"),
            wbi_datatype.Url("http://www.wikidata.org", prop_nr="P6"),
            wbi_datatype.Url("https://www.wikidata.org", prop_nr="P6"),
            wbi_datatype.Url("ftp://example.com", prop_nr="P6"),
            wbi_datatype.Url("ssh://user@server/project.git", prop_nr="P6"),
            wbi_datatype.Url("svn+ssh://user@server:8888/path", prop_nr="P6"),
            wbi_datatype.MonolingualText(text="xxx", language="fr", prop_nr="P7"),
            wbi_datatype.Quantity(quantity=-5.04, prop_nr="P8"),
            wbi_datatype.Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"),
            wbi_datatype.CommonsMedia("xxx", prop_nr="P9"),
            wbi_datatype.GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"),
            wbi_datatype.GeoShape("Data:xxx.map", prop_nr="P11"),
            wbi_datatype.Property("P123", prop_nr="P12"),
            wbi_datatype.Property("123", prop_nr="P12"),
            wbi_datatype.Property(123, prop_nr="P12"),
            wbi_datatype.TabularData("Data:Taipei+Population.tab", prop_nr="P13"),
            wbi_datatype.MusicalNotation("\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr="P14"),
            wbi_datatype.Lexeme("L123", prop_nr="P15"),
            wbi_datatype.Lexeme("123", prop_nr="P15"),
            wbi_datatype.Lexeme(123, prop_nr="P15"),
            wbi_datatype.Form("L123-F123", prop_nr="P16"),
            wbi_datatype.Sense("L123-S123", prop_nr="P17"),
            wbi_datatype.EDTF("2004-06-~01/2004-06-~20", prop_nr="P18")
        ]
        core_props = {f"P{x}" for x in range(20)}

        for d in data:
            item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props)
            assert item.get_json_representation()
            item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props)
            assert item.get_json_representation()
            item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set())
            assert item.get_json_representation()
            item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set())
            assert item.get_json_representation()

        item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props)
        assert item.get_json_representation()
        item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set())
        assert item.get_json_representation()
def add_source_in_db(source_name, stable_url, username=None, password=None):
    """"""
    # Load Wikibase Integrator config with the environment
    wbi_config["MEDIAWIKI_API_URL"] = os.environ[API_URL]
    wbi_config["SPARQL_ENDPOINT_URL"] = os.environ[SPARQL_BIGDATA_URL]
    wbi_config["WIKIBASE_URL"] = SVC_URL

    if not username:
        username = os.environ[USERNAME]
    if not password:
        password = os.environ[PASSWORD]
    stable_url_prop = os.environ[STABLE_URL_PROP]
    catalog_prop = os.environ[CATALOG_PROP]

    login_instance = wbi_login.Login(
        user=username,
        pwd=password,
    )

    source_instance_of = wbi_core.ItemID(
        prop_nr=os.environ[INSTANCE_PROP], value=os.environ[GTFS_SCHEDULE_SOURCE_CODE]
    )
    try:
        source_stable_url = wbi_core.Url(value=stable_url, prop_nr=stable_url_prop)
    except ValueError as ve:
        print(f"url {stable_url} for source name {source_name} raised {ve}")
        raise ve
    source_catalog_ref = wbi_core.ItemID(
        prop_nr=catalog_prop,
        value=os.environ[GTFS_CATALOG_OF_SOURCES_CODE],  # fix this
    )
    source_catalog_entity = wbi_core.ItemEngine(
        item_id=os.environ[GTFS_CATALOG_OF_SOURCES_CODE]
    )

    # Create the source entity
    source_data = [source_instance_of, source_stable_url, source_catalog_ref]
    source_entity = wbi_core.ItemEngine(data=source_data, core_props={stable_url_prop})

    source_entity.set_label(f"{source_name}")
    source_entity_id = source_entity.write(login=login_instance)

    # Create the Archives ID using the name and the entity code of the source
    archives_id_prefix = source_name.replace("'s GTFS Schedule source", "")
    archives_id_prefix = archives_id_prefix[:15]
    archives_id_prefix = re.sub(
        NON_ALPHABETICAL_CHAR_REGEX, "-", archives_id_prefix
    ).lower()
    archives_id_suffix = source_entity_id.lower()
    source_archives_id = f"{archives_id_prefix}-gtfs-{archives_id_suffix}"

    source_archives_id_data = wbi_core.String(
        prop_nr=os.environ[ARCHIVES_ID_PROP],
        value=source_archives_id,
    )
    source_data_updated = [source_archives_id_data]

    source_entity_updated = wbi_core.ItemEngine(item_id=source_entity_id)
    source_entity_updated.update(source_data_updated)
    source_entity_updated.write(login=login_instance)

    # Update the catalog of sources
    source_entity_prop = wbi_core.ItemID(
        value=source_entity_id, prop_nr=os.environ[SOURCE_ENTITY_PROP], if_exists=APPEND
    )
    catalog_data = [source_entity_prop]
    source_catalog_entity.update(catalog_data)
    source_catalog_entity.write(login_instance)

    return source_entity_id, source_archives_id
Пример #15
0
def add_usage_example(
    document_id=None,
    sentence=None,
    lid=None,
    form_id=None,
    sense_id=None,
    word=None,
    publication_date=None,
    language_style=None,
    type_of_reference=None,
    source=None,
    line=None,
):
    # Use WikibaseIntegrator aka wbi to upload the changes in one edit
    link_to_form = wbi_core.Form(prop_nr="P5830",
                                 value=form_id,
                                 is_qualifier=True)
    link_to_sense = wbi_core.Sense(prop_nr="P6072",
                                   value=sense_id,
                                   is_qualifier=True)
    if language_style == "formal":
        style = "Q104597585"
    else:
        if language_style == "informal":
            style = "Q901711"
        else:
            print(f"Error. Language style {language_style} " +
                  "not one of (formal,informal)")
            exit(1)
    logging.debug("Generating qualifier language_style " + f"with {style}")
    language_style_qualifier = wbi_core.ItemID(prop_nr="P6191",
                                               value=style,
                                               is_qualifier=True)
    # oral or written
    if type_of_reference == "written":
        medium = "Q47461344"
    else:
        if type_of_reference == "oral":
            medium = "Q52946"
        else:
            print(f"Error. Type of reference {type_of_reference} " +
                  "not one of (written,oral)")
            exit(1)
    logging.debug("Generating qualifier type of reference " + f"with {medium}")
    type_of_reference_qualifier = wbi_core.ItemID(prop_nr="P3865",
                                                  value=medium,
                                                  is_qualifier=True)
    if source == "riksdagen":
        if publication_date is not None:
            publication_date = datetime.fromisoformat(publication_date)
        else:
            print("Publication date of document {document_id} " +
                  "is missing. We have no fallback for that at the moment. " +
                  "Abort adding usage example.")
            return False
        stated_in = wbi_core.ItemID(prop_nr="P248",
                                    value="Q21592569",
                                    is_reference=True)
        document_id = wbi_core.ExternalID(
            prop_nr="P8433",  # Riksdagen Document ID
            value=document_id,
            is_reference=True)
        reference = [
            stated_in,
            document_id,
            wbi_core.Time(
                prop_nr="P813",  # Fetched today
                time=datetime.utcnow().replace(tzinfo=timezone.utc).replace(
                    hour=0,
                    minute=0,
                    second=0,
                ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
                is_reference=True,
            ),
            wbi_core.Time(
                prop_nr="P577",  # Publication date
                time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"),
                is_reference=True,
            ),
            type_of_reference_qualifier,
        ]
    if source == "europarl":
        stated_in = wbi_core.ItemID(prop_nr="P248",
                                    value="Q5412081",
                                    is_reference=True)
        reference = [
            stated_in,
            wbi_core.Time(
                prop_nr="P813",  # Fetched today
                time=datetime.utcnow().replace(tzinfo=timezone.utc).replace(
                    hour=0,
                    minute=0,
                    second=0,
                ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
                is_reference=True,
            ),
            wbi_core.Time(
                prop_nr="P577",  # Publication date
                time="+2012-05-12T00:00:00Z",
                is_reference=True,
            ),
            wbi_core.Url(
                prop_nr="P854",  # reference url
                value="http://www.statmt.org/europarl/v7/sv-en.tgz",
                is_reference=True,
            ),
            # filename in archive
            wbi_core.String(
                (f"europarl-v7.{config.language_code}" +
                 f"-en.{config.language_code}"),
                "P7793",
                is_reference=True,
            ),
            # line number
            wbi_core.String(
                str(line),
                "P7421",
                is_reference=True,
            ),
            type_of_reference_qualifier,
        ]
    # This is the usage example statement
    claim = wbi_core.MonolingualText(
        sentence,
        "P5831",
        language=config.language_code,
        # Add qualifiers
        qualifiers=[
            link_to_form,
            link_to_sense,
            language_style_qualifier,
        ],
        # Add reference
        references=[reference],
    )
    if config.debug_json:
        logging.debug(f"claim:{claim.get_json_representation()}")
    item = wbi_core.ItemEngine(
        data=[claim],
        append_value=["P5831"],
        item_id=lid,
    )
    # if config.debug_json:
    #     print(item.get_json_representation())
    if config.login_instance is None:
        # Authenticate with WikibaseIntegrator
        print("Logging in with Wikibase Integrator")
        config.login_instance = wbi_login.Login(user=config.username,
                                                pwd=config.password)
    result = item.write(
        config.login_instance,
        edit_summary="Added usage example with [[Wikidata:LexUse]]")
    if config.debug_json:
        logging.debug(f"result from WBI:{result}")
    return result
Пример #16
0
from wikibaseintegrator import wbi_login, wbi_core
import logging
logging.basicConfig(level=logging.INFO)

login_instance = wbi_login.Login(user='******', pwd='VP4ptJbLhNM9vB4')

my_first_wikidata_item = wbi_core.ItemEngine(item_id='Q1')

# to check successful installation and retrieval of the data, you can print the json representation of the item
print(my_first_wikidata_item.get_json_representation())

result = wbi_core.ItemEngine(
    item_id='Q1', data={'P3': 'http://www.wikidata.org/entity/Q65216433'})
Пример #17
0
class TestWbiCore(unittest.TestCase):
    common_item = wbi_core.ItemEngine(item_id="Q2")

    def test_item_engine(self):
        wbi_core.ItemEngine(debug=True)
        wbi_core.ItemEngine(data=None, debug=True)
        wbi_core.ItemEngine(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True)
        wbi_core.ItemEngine(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True)
        with self.assertRaises(TypeError):
            wbi_core.ItemEngine(data='test', debug=True)
        with self.assertRaises(ValueError):
            wbi_core.ItemEngine(fast_run_case_insensitive=True, debug=True)
        with self.assertRaises(TypeError):
            wbi_core.ItemEngine(ref_handler='test', debug=True)
        with self.assertRaises(ValueError):
            wbi_core.ItemEngine(global_ref_mode='CUSTOM', debug=True)
        wbi_core.ItemEngine(item_id='Q2', fast_run=True, debug=True)

    def test_search_only(self):
        item = wbi_core.ItemEngine(item_id="Q2", search_only=True)

        assert item.get_label('en') == "Earth"
        descr = item.get_description('en')
        assert len(descr) > 3

        assert "globe" in item.get_aliases()
        assert "planet" in item.get_description()

        assert item.get_label("es") == "Tierra"

    def test_basedatatype_if_exists(self):
        instance_of_append = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='APPEND')
        instance_of_forceappend = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='FORCE_APPEND')
        instance_of_replace = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='REPLACE')
        instance_of_keep = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='KEEP')

        item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_append, instance_of_append])
        claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']]
        assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 1

        item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_forceappend, instance_of_forceappend])
        claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']]
        assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 2

        item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_replace], debug=True)
        claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] if 'remove' not in x]
        removed_claims = [True for x in item.get_json_representation()['claims']['P31'] if 'remove' in x]
        assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == 3 and True in removed_claims

        item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_keep], debug=True)
        claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']]
        assert len(claims) == 3 and 'Q1234' not in claims

    def test_description(self):
        item = wbi_core.ItemEngine(item_id="Q2")
        descr = item.get_description('en')
        assert len(descr) > 3

        assert "planet" in item.get_description()

        # set_description on already existing description
        item.set_description(descr)
        item.set_description("lorem")
        item.set_description("lorem ipsum", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem'}
        # set_description on empty desription
        item.set_description("")
        item.set_description("lorem ipsum", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'}

        item.set_description("lorem", lang='fr', if_exists='KEEP')
        item.set_description("lorem ipsum", lang='fr', if_exists='REPLACE')
        item.set_description("lorem", lang='en', if_exists='KEEP')
        assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'}
        assert item.json_representation['descriptions']['fr'] == {'language': 'fr', 'value': 'lorem ipsum'}

    def test_label(self):
        item = wbi_core.ItemEngine(item_id="Q2")

        assert item.get_label('en') == "Earth"

        assert "globe" in item.get_aliases()

        assert item.get_label("es") == "Tierra"

        item.set_label("Earth")
        item.set_label("lorem")
        item.set_label("lorem ipsum", lang='en', if_exists='KEEP')
        assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'lorem'}
        assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'}
        item.set_aliases(["fake alias"], if_exists='APPEND')
        assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en']

        item.set_label(label=None, lang='fr')
        item.set_label(label=None, lang='non-exist-key')
        assert 'remove' in item.json_representation['labels']['fr']

        item.get_label("ak")
        item.get_description("ak")
        item.get_aliases("ak")
        item.set_label("label", lang='ak')
        item.set_description("d", lang='ak')
        item.set_aliases(["a"], lang='ak', if_exists='APPEND')
        assert item.get_aliases('ak') == ['a']
        item.set_aliases("b", lang='ak')
        assert item.get_aliases('ak') == ['a', 'b']
        item.set_aliases("b", lang='ak', if_exists='REPLACE')
        assert item.get_aliases('ak') == ['b']
        item.set_aliases(["c"], lang='ak', if_exists='REPLACE')
        assert item.get_aliases('ak') == ['c']

    def test_wd_search(self):
        t = wbi_functions.search_entities('rivaroxaban')
        print('Number of results: ', len(t))
        self.assertIsNot(len(t), 0)

    def test_item_generator(self):
        items = ['Q408883', 'P715', 'Q18046452']

        item_instances = wbi_functions.generate_item_instances(items=items)

        for qid, item in item_instances:
            self.assertIn(qid, items)

    def test_new_item_creation(self):
        data = [
            wbi_datatype.String(value='test1', prop_nr='P1'),
            wbi_datatype.String(value='test2', prop_nr='1'),
            wbi_datatype.String(value='test3', prop_nr=1),
            wbi_datatype.Math("xxx", prop_nr="P2"),
            wbi_datatype.ExternalID("xxx", prop_nr="P3"),
            wbi_datatype.ItemID("Q123", prop_nr="P4"),
            wbi_datatype.ItemID("123", prop_nr="P4"),
            wbi_datatype.ItemID(123, prop_nr="P4"),
            wbi_datatype.Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"),
            wbi_datatype.Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"),
            wbi_datatype.Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"),
            wbi_datatype.Url("http://www.wikidata.org", prop_nr="P6"),
            wbi_datatype.Url("https://www.wikidata.org", prop_nr="P6"),
            wbi_datatype.Url("ftp://example.com", prop_nr="P6"),
            wbi_datatype.Url("ssh://user@server/project.git", prop_nr="P6"),
            wbi_datatype.Url("svn+ssh://user@server:8888/path", prop_nr="P6"),
            wbi_datatype.MonolingualText(text="xxx", language="fr", prop_nr="P7"),
            wbi_datatype.Quantity(quantity=-5.04, prop_nr="P8"),
            wbi_datatype.Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"),
            wbi_datatype.CommonsMedia("xxx", prop_nr="P9"),
            wbi_datatype.GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"),
            wbi_datatype.GeoShape("Data:xxx.map", prop_nr="P11"),
            wbi_datatype.Property("P123", prop_nr="P12"),
            wbi_datatype.Property("123", prop_nr="P12"),
            wbi_datatype.Property(123, prop_nr="P12"),
            wbi_datatype.TabularData("Data:Taipei+Population.tab", prop_nr="P13"),
            wbi_datatype.MusicalNotation("\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr="P14"),
            wbi_datatype.Lexeme("L123", prop_nr="P15"),
            wbi_datatype.Lexeme("123", prop_nr="P15"),
            wbi_datatype.Lexeme(123, prop_nr="P15"),
            wbi_datatype.Form("L123-F123", prop_nr="P16"),
            wbi_datatype.Sense("L123-S123", prop_nr="P17"),
            wbi_datatype.EDTF("2004-06-~01/2004-06-~20", prop_nr="P18")
        ]
        core_props = {f"P{x}" for x in range(20)}

        for d in data:
            item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props)
            assert item.get_json_representation()
            item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props)
            assert item.get_json_representation()
            item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set())
            assert item.get_json_representation()
            item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set())
            assert item.get_json_representation()

        item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props)
        assert item.get_json_representation()
        item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set())
        assert item.get_json_representation()

    def test_get_property_list(self):
        self.assertTrue(len(self.common_item.get_property_list()))

    def test_count_references(self):
        self.assertTrue(len(self.common_item.count_references(prop_id='P2067')))

    def test_get_reference_properties(self):
        self.assertTrue(len(self.common_item.get_reference_properties(prop_id='P2067')))

    def test_get_qualifier_properties(self):
        print(self.common_item.get_qualifier_properties(prop_id='P170'))
        self.assertTrue(len(self.common_item.get_qualifier_properties(prop_id='P2067')))
Пример #18
0
                        lemma = lexeme
                        lid = lexeme_data[0]
                        print(f"Uploading id to {lid}: {lemma}")
                        # TODO if numbered
                        # - fetch lexeme using wbi
                        # - present to user
                        # - ask user which if one matches
                        print(f"Adding {saob_id} to {lid}")
                        saob_statement = wbi_core.ExternalID(
                            prop_nr="P8478",
                            value=saob_id,
                        )
                        described_by_source = wbi_core.ItemID(prop_nr="P1343",
                                                              value="Q1935308")
                        item = wbi_core.ItemEngine(
                            data=[saob_statement, described_by_source],
                            #append_value="P8478",
                            item_id=lid)
                        result = item.write(
                            login_instance,
                            edit_summary=
                            "Added SAOB identifier with [[Wikidata:Tools/LexSAOB]]"
                        )
                        #if config.debug_json:
                        #logging.debug(f"result from WBI:{result}")
                        print(f"{wd_prefix}{lid}")
                        exit(0)
                    else:
                        print("Categories did not match :/ - skipping")
    else:
        print(f"{lexeme} not found in SAOB wordlist")
Пример #19
0
def add_usage_example(
        document_id=None,
        sentence=None,
        lid=None,
        form_id=None,
        sense_id=None,
        word=None,
        publication_date=None,
        language_style=None,
        type_of_reference=None,
        source=None,
        line=None,
):
    # Use WikibaseIntegrator aka wbi to upload the changes in one edit
    link_to_form = wbi_core.Form(
        prop_nr="P5830",
        value=form_id,
        is_qualifier=True
    )
    link_to_sense = wbi_core.Sense(
        prop_nr="P6072",
        value=sense_id,
        is_qualifier=True
    )
    if language_style == "formal":
        style = "Q104597585"
    else:
        if language_style == "informal":
            style = "Q901711"
        else:
            print(_( "Error. Language style {} ".format(language_style) +
                     "not one of (formal,informal). Please report a bug at "+
                     "https://github.com/egils-consulting/LexUtils/issues" ))
            return
    logging.debug("Generating qualifier language_style " +
                  f"with {style}")
    language_style_qualifier = wbi_core.ItemID(
        prop_nr="P6191",
        value=style,
        is_qualifier=True
    )
    # oral or written
    if type_of_reference == "written":
        medium = "Q47461344"
    else:
        if type_of_reference == "oral":
            medium = "Q52946"
        else:
            print(_( "Error. Type of reference {} ".format(type_of_reference) +
                     "not one of (written,oral). Please report a bug at "+
                     "https://github.com/egils-consulting/LexUtils/issues" ))
            return
    logging.debug(_( "Generating qualifier type of reference " +
                  "with {}".format(medium) ))
    type_of_reference_qualifier = wbi_core.ItemID(
        prop_nr="P3865",
        value=medium,
        is_qualifier=True
    )
    if source == "riksdagen":
        if publication_date is not None:
            publication_date = datetime.fromisoformat(publication_date)
        else:
            print(_( "Publication date of document {} " +
                  "is missing. We have no fallback for that at the moment. " +
                  "Abort adding usage example.".format(document_id) ))
            return False
        stated_in = wbi_core.ItemID(
            prop_nr="P248",
            value="Q21592569",
            is_reference=True
        )
        document_id = wbi_core.ExternalID(
            prop_nr="P8433",  # Riksdagen Document ID
            value=document_id,
            is_reference=True
        )
        reference = [
            stated_in,
            document_id,
            wbi_core.Time(
                prop_nr="P813",  # Fetched today
                time=datetime.utcnow().replace(
                    tzinfo=timezone.utc
                ).replace(
                    hour=0,
                    minute=0,
                    second=0,
                ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
                is_reference=True,
            ),
            wbi_core.Time(
                prop_nr="P577",  # Publication date
                time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"),
                is_reference=True,
            ),
            type_of_reference_qualifier,
        ]
    if source == "europarl":
        stated_in = wbi_core.ItemID(
            prop_nr="P248",
            value="Q5412081",
            is_reference=True
        )
        reference = [
            stated_in,
            wbi_core.Time(
                prop_nr="P813",  # Fetched today
                time=datetime.utcnow().replace(
                    tzinfo=timezone.utc
                ).replace(
                    hour=0,
                    minute=0,
                    second=0,
                ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
                is_reference=True,
            ),
            wbi_core.Time(
                prop_nr="P577",  # Publication date
                time="+2012-05-12T00:00:00Z",
                is_reference=True,
            ),
            wbi_core.Url(
                prop_nr="P854",  # reference url
                value="http://www.statmt.org/europarl/v7/sv-en.tgz",
                is_reference=True,
            ),
            # filename in archive
            wbi_core.String(
                (f"europarl-v7.{config.language_code}" +
                 f"-en.{config.language_code}"),
                "P7793",
                is_reference=True,
            ),
            # line number
            wbi_core.String(
                str(line),
                "P7421",
                is_reference=True,
            ),
            type_of_reference_qualifier,
        ]
    if source == "ksamsok":
        # No date is provided unfortunately, so we set it to unknown value
        stated_in = wbi_core.ItemID(
            prop_nr="P248",
            value="Q7654799",
            is_reference=True
        )
        document_id = wbi_core.ExternalID(
            # K-Samsök URI
            prop_nr="P1260",  
            value=document_id,
            is_reference=True
        )
        reference = [
            stated_in,
            document_id,
            wbi_core.Time(
                prop_nr="P813",  # Fetched today
                time=datetime.utcnow().replace(
                    tzinfo=timezone.utc
                ).replace(
                    hour=0,
                    minute=0,
                    second=0,
                ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
                is_reference=True,
            ),
            wbi_core.Time(
                # We don't know the value of the publication dates unfortunately
                prop_nr="P577",  # Publication date
                time="",
                snak_type="somevalue",
                is_reference=True,
            ),
            type_of_reference_qualifier,
        ]
    if reference is None:
        logger.error(_( "No reference defined, cannot add usage example" ))
        exit(1)
    # This is the usage example statement
    claim = wbi_core.MonolingualText(
        sentence,
        "P5831",
        language=config.language_code,
        # Add qualifiers
        qualifiers=[
            link_to_form,
            link_to_sense,
            language_style_qualifier,
        ],
        # Add reference
        references=[reference],
    )
    if config.debug_json:
        logging.debug(f"claim:{claim.get_json_representation()}")
    item = wbi_core.ItemEngine(
        data=[claim], append_value=["P5831"], item_id=lid,
    )
    # if config.debug_json:
    #     print(item.get_json_representation())
    if config.login_instance is None:
        # Authenticate with WikibaseIntegrator
        print("Logging in with Wikibase Integrator")
        config.login_instance = wbi_login.Login(
            user=config.username, pwd=config.password
        )
    result = item.write(
        config.login_instance,
        edit_summary=(
            _( "Added usage example "+
               "with [[Wikidata:LexUtils]] v{}".format(config.version) )
        )
    )
    if config.debug_json:
        logging.debug(f"result from WBI:{result}")
    # TODO add handling of result from WBI and return True == Success or False 
    return result
Пример #20
0
def create_dataset_entity_for_gtfs_metadata(gtfs_representation,
                                            api_url,
                                            username=None,
                                            password=None):
    """Create a dataset entity for a new dataset version on the Database.
    :param gtfs_representation: The representation of the GTFS dataset to process.
    :param api_url: API url, either PRODUCTION_API_URL or STAGING_API_URL.
    :return: The representation of the GTFS dataset post-execution.
    """
    validate_api_url(api_url)
    validate_gtfs_representation(gtfs_representation)
    metadata = gtfs_representation.metadata

    ###########################
    # 1. Process the core props
    ###########################

    # Begin with the core properties data
    # To verify if the dataset entity already exist
    core_props_data = []

    # SHA-1 hash property
    if is_valid_instance(metadata.sha1_hash, str):
        core_props_data.append(
            wbi_core.String(value=metadata.sha1_hash,
                            prop_nr=os.environ[SHA1_HASH_PROP]))

    # Archives URL, from the stable URL property
    if is_valid_instance(metadata.stable_urls, dict):
        archives_url = metadata.stable_urls.get(ARCHIVES_URL)
        try:
            core_props_data.append(
                wbi_core.Url(
                    value=archives_url,
                    prop_nr=os.environ[STABLE_URL_PROP],
                    rank=PREFERRED,
                ))
        except ValueError as ve:
            print(
                f"url {archives_url} for source {metadata.source_entity_code} caused {ve}"
            )
            raise ve

    # If the 2 core props values in were NOT added in core_props_data,
    # then it is not possible to verify if the dataset entity already exists
    if len(core_props_data) != 2:
        raise MissingCorePropsException(core_props_data)

    # An existing dataset entity is considered the same as the one processed
    # if and only if 2 core props values are matching: the SHA-1 hash and the Archives URL
    # so the core properties threshold is 100%
    core_props_threshold = 1.0

    try:
        dataset_entity = wbi_core.ItemEngine(
            data=core_props_data,
            core_props={
                os.environ[STABLE_URL_PROP],
                os.environ[SHA1_HASH_PROP],
            },
            core_prop_match_thresh=core_props_threshold,
        )
    except ManualInterventionReqException as mi:
        print(
            f"ManualInterventionReqException : a core property value exists for multiple dataset entities."
        )
        raise mi
    except CorePropIntegrityException as cp:
        print(
            f"CorePropIntegrityException: a dataset entity exists with 1 of the 2 core props values."
        )
        raise cp
    except Exception as e:
        print(f"metadata : {metadata} raised {e}")
        raise e

    # If the dataset entity retrieved as already an item_id (entity id) value,
    # then we do nothing because the dataset already exists
    if dataset_entity.item_id != "":
        raise EntityAlreadyExistsException(dataset_entity.item_id)

    #################################################
    # 2. Add the other properties to the dataset data
    #################################################
    dataset_data = []

    # Add the core_props_data to the dataset_data
    dataset_data += core_props_data

    # Delete the archives_url from the metadata.stable_urls
    # Since it was part of the core_props_data
    del metadata.stable_urls[ARCHIVES_URL]

    # Stable urls property
    if is_valid_instance(metadata.stable_urls, dict):
        for url in metadata.stable_urls.values():
            try:
                dataset_data.append(
                    wbi_core.Url(value=url,
                                 prop_nr=os.environ[STABLE_URL_PROP],
                                 rank=NORMAL))
            except ValueError as ve:
                print(
                    f"url {url} for source {metadata.source_entity_code} caused {ve}"
                )
                raise ve

    # Instance property
    dataset_data.append(
        wbi_core.ItemID(
            value=os.environ[GTFS_SCHEDULE_DATA_FORMAT],
            prop_nr=os.environ[INSTANCE_PROP],
        ))

    # Source entity property
    dataset_data.append(
        wbi_core.ItemID(value=metadata.source_entity_code,
                        prop_nr=os.environ[SOURCE_ENTITY_PROP]))

    # Main timezone property
    if is_valid_instance(metadata.main_timezone, str):
        dataset_data.append(
            wbi_core.String(
                value=metadata.main_timezone,
                prop_nr=os.environ[TIMEZONE_PROP],
                rank=PREFERRED,
            ))

    # Other timezones property
    if is_valid_instance(metadata.other_timezones, list):
        for timezone in metadata.other_timezones:
            dataset_data.append(
                wbi_core.String(value=timezone,
                                prop_nr=os.environ[TIMEZONE_PROP],
                                rank=NORMAL))

    # Country code property
    if is_valid_instance(metadata.country_codes, list):
        for country_code in metadata.country_codes:
            dataset_data.append(
                wbi_core.String(
                    value=country_code,
                    prop_nr=os.environ[COUNTRY_CODE_PROP],
                    rank=NORMAL,
                ))

    # Main language code property
    if is_valid_instance(metadata.main_language_code, str):
        dataset_data.append(
            wbi_core.String(
                value=metadata.main_language_code,
                prop_nr=os.environ[MAIN_LANGUAGE_CODE_PROP],
                rank=PREFERRED,
            ))

    # Start service date property
    if is_valid_instance(metadata.start_service_date, str):
        dataset_data.append(
            wbi_core.String(
                value=metadata.start_service_date,
                prop_nr=os.environ[START_SERVICE_DATE_PROP],
            ))

    # End service date property
    if is_valid_instance(metadata.end_service_date, str):
        dataset_data.append(
            wbi_core.String(
                value=metadata.end_service_date,
                prop_nr=os.environ[END_SERVICE_DATE_PROP],
            ))

    # Start timestamp property
    if is_valid_instance(metadata.start_timestamp, str):
        dataset_data.append(
            wbi_core.String(value=metadata.start_timestamp,
                            prop_nr=os.environ[START_TIMESTAMP_PROP]))

    # End timestamp property
    if is_valid_instance(metadata.end_timestamp, str):
        dataset_data.append(
            wbi_core.String(value=metadata.end_timestamp,
                            prop_nr=os.environ[END_TIMESTAMP_PROP]))

    # Bounding box property
    if is_valid_instance(metadata.bounding_box, dict):
        for order_key, corner_value in metadata.bounding_box.items():
            dataset_data.append(
                create_geographical_property(order_key, corner_value,
                                             os.environ[BOUNDING_BOX_PROP]))

    # Bounding octagon property
    if is_valid_instance(metadata.bounding_octagon, dict):
        for order_key, corner_value in metadata.bounding_octagon.items():
            dataset_data.append(
                create_geographical_property(
                    order_key, corner_value,
                    os.environ[BOUNDING_OCTAGON_PROP]))

    # Stop counts
    if is_valid_instance(metadata.stops_count_by_type, dict):
        # Number of stops property
        stops_count = metadata.stops_count_by_type.get(STOP_KEY, None)
        if stops_count is not None:
            dataset_data.append(
                wbi_core.Quantity(
                    quantity=stops_count,
                    prop_nr=os.environ[NUM_OF_STOPS_PROP],
                ))

        # Number of stations property
        stations_count = metadata.stops_count_by_type.get(STATION_KEY, None)
        if stations_count is not None:
            dataset_data.append(
                wbi_core.Quantity(
                    quantity=stations_count,
                    prop_nr=os.environ[NUM_OF_STATIONS_PROP],
                ))

        # Number of entrances property
        entrances_count = metadata.stops_count_by_type.get(ENTRANCE_KEY, None)
        if entrances_count is not None:
            dataset_data.append(
                wbi_core.Quantity(
                    quantity=entrances_count,
                    prop_nr=os.environ[NUM_OF_ENTRANCES_PROP],
                ))

    if is_valid_instance(metadata.agencies_count, int):
        # Number of agencies property
        dataset_data.append(
            wbi_core.Quantity(
                quantity=metadata.agencies_count,
                prop_nr=os.environ[NUM_OF_AGENCIES_PROP],
            ))

    # Number of routes property
    if is_valid_instance(metadata.routes_count_by_type, dict):
        for route_key, route_value in metadata.routes_count_by_type.items():
            route_qualifier = [
                wbi_core.ItemID(
                    value=route_key,
                    prop_nr=os.environ[ROUTE_TYPE_PROP],
                    is_qualifier=True,
                )
            ]
            dataset_data.append(
                wbi_core.Quantity(
                    quantity=route_value,
                    prop_nr=os.environ[NUM_OF_ROUTES_PROP],
                    qualifiers=route_qualifier,
                ))

    # Download date
    if is_valid_instance(metadata.download_date, str):
        dataset_data.append(
            wbi_core.String(
                value=metadata.download_date,
                prop_nr=os.environ[DOWNLOAD_DATE_PROP],
            ))

    # Dataset version entity label
    version_name_label = metadata.dataset_version_name
    if not username:
        username = os.environ[USERNAME]
    if not password:
        password = os.environ[PASSWORD]
    login_instance = wbi_login.Login(user=username, pwd=password)

    #################################################
    # 3. Create the dataset entity on the database
    #################################################

    # Create the Dataset WITHOUT using the core_props
    # For some reasons, when using the core_props with all the data
    # the WikibaseIntegrator library retrieves entities
    # that are not sharing data with the actual dataset entity,
    # which makes the process crash
    dataset_entity = wbi_core.ItemEngine(data=dataset_data, )

    # Set the label (name)
    dataset_entity.set_label(version_name_label, ENGLISH)

    # Create the dataset entity on the database
    dataset_entity_id = dataset_entity.write(login_instance)
    metadata.dataset_version_entity_code = dataset_entity_id

    # Create the source data with the dataset entity code and property
    version_prop = wbi_core.ItemID(
        value=metadata.dataset_version_entity_code,
        prop_nr=os.environ[DATASET_PROP],
        if_exists=APPEND,
    )
    source_data = [version_prop]

    # Update the source entity
    # Try maximum 20 times in cases there is edit conflicts
    try_count = 20
    has_succeeded = False

    while not has_succeeded and try_count > 0:
        source_entity = wbi_core.ItemEngine(
            item_id=metadata.source_entity_code)
        source_entity.update(source_data)
        try:
            source_entity.write(login_instance)
        except MWApiError as mwae:
            print(
                f"Failed to update: {source_entity.item_id} with data: {source_data} raised MWApiError. "
                f"{try_count} attempts left.")
            try_count -= 1
            # If the attempts have not succeeded, fail loudly
            if try_count == 0:
                print(
                    f"source_entity: {source_entity.get_json_representation()} with data: "
                    f"{source_data} raised MWApiError.")
                raise mwae
            # Wait 20 seconds before the next attempt so the database updates
            # preventing other edit conflicts
            # and not overloading the database with requests
            time.sleep(20)
        else:
            has_succeeded = True
            metadata.source_entity_code = source_entity.item_id

    return gtfs_representation
Пример #21
0
def add_usage_example(
    document_id=None,
    sentence=None,
    lid=None,
    form_id=None,
    sense_id=None,
    word=None,
    publication_date=None,
):
    # Use WikibaseIntegrator aka wbi to upload the changes in one edit
    if publication_date is not None:
        publication_date = datetime.fromisoformat(publication_date)
    else:
        print("Publication date of document {document_id} " +
              "is missing. We have no fallback for that. " +
              "Abort adding usage example.")
        return False
    link_to_form = wbi_core.Form(prop_nr="P5830",
                                 value=form_id,
                                 is_qualifier=True)
    link_to_sense = wbi_core.Sense(prop_nr="P6072",
                                   value=sense_id,
                                   is_qualifier=True)
    reference = [
        wbi_core.ItemID(
            prop_nr="P248",  # Stated in Riksdagen open data portal
            value="Q21592569",
            is_reference=True),
        wbi_core.ExternalID(
            prop_nr="P8433",  # Riksdagen Document ID
            value=document_id,
            is_reference=True),
        wbi_core.Time(
            prop_nr="P813",  # Fetched today
            time=datetime.utcnow().replace(tzinfo=timezone.utc).replace(
                hour=0,
                minute=0,
                second=0,
            ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
            is_reference=True,
        ),
        wbi_core.Time(
            prop_nr="P577",  # Publication date
            time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"),
            is_reference=True,
        )
    ]
    claim = wbi_core.MonolingualText(
        sentence,
        "P5831",
        language="sv",
        qualifiers=[link_to_form, link_to_sense],
        references=[reference],
    )
    # print(claim)
    if debug_json:
        print(claim.get_json_representation())
    item = wbi_core.ItemEngine(data=[claim], item_id=lid)
    if debug_json:
        print(item.get_json_representation())
    result = item.write(
        login_instance,
        edit_summary="Added usage example with [[Wikidata:LexUse]]")
    if debug_json:
        print(f"Result from WBI: {result}")
    return result