def test_ref_equals(): # statements are identical oldref = [ wbi_core.ExternalID(value='P58742', prop_nr='P352', is_reference=True), wbi_core.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), wbi_core.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True) ] olditem = wbi_core.ItemID("Q123", "P123", references=[oldref]) newitem = copy.deepcopy(olditem) assert olditem.equals(newitem, include_ref=False) assert olditem.equals(newitem, include_ref=True) # dates are a month apart newitem = copy.deepcopy(olditem) newitem.references[0][2] = wbi_core.Time(time='+2002-01-31T12:01:13Z', prop_nr='P813') assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) # multiple refs newitem = copy.deepcopy(olditem) newitem.references.append( [wbi_core.ExternalID(value='99999', prop_nr='P352')]) assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) olditem.references.append( [wbi_core.ExternalID(value='99999', prop_nr='P352')]) assert olditem.equals(newitem, include_ref=True)
def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language data = [wbi_core.ExternalID('/m/02j71', 'P646')] fast_run_base_filter = {'P361': 'Q18589965'} item = wbi_core.ItemEngine(item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) frc = wbi_core.ItemEngine.fast_run_store[0] frc.debug = True assert item.get_label('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 aliases = item.get_aliases() assert "Terra" in aliases assert list( item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth" assert item.fast_run_container.check_language_data( "Q2", ['not the Earth'], 'en', 'label') assert "Terra" in item.get_aliases() assert "planet" in item.get_description() assert item.get_label("es") == "Tierra" item.set_description(descr) item.set_description("fghjkl") assert item.json_representation['descriptions']['en'] == { 'language': 'en', 'value': 'fghjkl' } item.set_label("Earth") item.set_label("xfgfdsg") assert item.json_representation['labels']['en'] == { 'language': 'en', 'value': 'xfgfdsg' } item.set_aliases(["fake alias"], if_exists='APPEND') assert { 'language': 'en', 'value': 'fake alias' } in item.json_representation['aliases']['en'] # something thats empty (for now.., can change, so this just makes sure no exception is thrown) frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label') frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label') frc.check_language_data("Q2", [''], 'ak', 'description') frc.check_language_data("Q2", [], 'ak', 'aliases') frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases') item.get_label("ak") item.get_description("ak") item.get_aliases("ak") item.set_label("label", lang="ak") item.set_description("d", lang="ak") item.set_aliases(["a"], lang="ak", if_exists='APPEND')
def test_fast_run(self): statements = [ wbi_core.ExternalID(value='P40095', prop_nr='P352'), wbi_core.ExternalID(value='YER158C', prop_nr='P705') ] frc = wbi_fastrun.FastRunContainer( base_filter={ 'P352': '', 'P703': 'Q27510868' }, base_data_type=wbi_core.BaseDataType, engine=wbi_core.ItemEngine) fast_run_result = frc.write_required(data=statements) if fast_run_result: message = 'fastrun failed' else: message = 'successful fastrun' print(fast_run_result, message)
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, ): # Use WikibaseIntegrator aka wbi to upload the changes link_to_form = wbi_core.Form(prop_nr="P5830", value=form_id, is_qualifier=True) link_to_sense = wbi_core.Sense(prop_nr="P6072", value=sense_id, is_qualifier=True) reference = [ wbi_core.ItemID( prop_nr="P248", # Stated in Riksdagen open data portal value="Q21592569", is_reference=True), wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True), wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ) ] claim = wbi_core.MonolingualText( sentence, "P5831", language="sv", qualifiers=[link_to_form, link_to_sense], references=[reference], ) # print(claim) if debug: print(claim.get_json_representation()) item = wbi_core.ItemEngine(data=[claim], item_id=lid) if debug: print(item.get_json_representation()) result = item.write( login_instance, edit_summary="Added usage example with [[Wikidata:rikslex]]") return result
def test_new_item_creation(self): data = [ wbi_core.String(value='test1', prop_nr='P1'), wbi_core.String(value='test2', prop_nr='1'), wbi_core.String(value='test3', prop_nr=1), wbi_core.Math("xxx", prop_nr="P2"), wbi_core.ExternalID("xxx", prop_nr="P3"), wbi_core.ItemID("Q123", prop_nr="P4"), wbi_core.ItemID("123", prop_nr="P4"), wbi_core.ItemID(123, prop_nr="P4"), wbi_core.Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_core.Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_core.Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_core.Url("http://www.wikidata.org", prop_nr="P6"), wbi_core.Url("https://www.wikidata.org", prop_nr="P6"), wbi_core.Url("ftp://example.com", prop_nr="P6"), wbi_core.Url("ssh://user@server/project.git", prop_nr="P6"), wbi_core.Url("svn+ssh://user@server:8888/path", prop_nr="P6"), wbi_core.MonolingualText(text="xxx", language="fr", prop_nr="P7"), wbi_core.Quantity(quantity=-5.04, prop_nr="P8"), wbi_core.Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"), wbi_core.CommonsMedia("xxx", prop_nr="P9"), wbi_core.GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"), wbi_core.GeoShape("Data:xxx.map", prop_nr="P11"), wbi_core.Property("P123", prop_nr="P12"), wbi_core.Property("123", prop_nr="P12"), wbi_core.Property(123, prop_nr="P12"), wbi_core.TabularData("Data:Taipei+Population.tab", prop_nr="P13"), wbi_core.MusicalNotation("\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr="P14"), wbi_core.Lexeme("L123", prop_nr="P15"), wbi_core.Lexeme("123", prop_nr="P15"), wbi_core.Lexeme(123, prop_nr="P15"), wbi_core.Form("L123-F123", prop_nr="P16"), wbi_core.Sense("L123-S123", prop_nr="P17") ] core_props = set(["P{}".format(x) for x in range(20)]) for d in data: item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set()) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set()) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set()) assert item.get_json_representation()
def test_fastrun_ref_ensembl(): # fastrun checks refs frc = FastRunContainerFakeQueryDataEnsembl( base_filter={ 'P594': '', 'P703': 'Q15978631' }, base_data_type=wbi_core.BaseDataType, engine=wbi_core.ItemEngine, use_refs=True) # statement has no ref frc.debug = True statements = [wbi_core.ExternalID(value='ENSG00000123374', prop_nr='P594')] assert frc.write_required(data=statements) # statement has the same ref statements = [ wbi_core.ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[ wbi_core.ItemID("Q29458763", "P248", is_reference=True), wbi_core.ExternalID("ENSG00000123374", "P594", is_reference=True) ]]) ] assert not frc.write_required(data=statements) # new statement has an different stated in statements = [ wbi_core.ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[ wbi_core.ItemID("Q99999999999", "P248", is_reference=True), wbi_core.ExternalID("ENSG00000123374", "P594", is_reference=True) ]]) ] assert frc.write_required(data=statements) # fastrun don't check references, statement has no reference, frc = FastRunContainerFakeQueryDataEnsemblNoRef( base_filter={ 'P594': '', 'P703': 'Q15978631' }, base_data_type=wbi_core.BaseDataType, engine=wbi_core.ItemEngine, use_refs=False) statements = [wbi_core.ExternalID(value='ENSG00000123374', prop_nr='P594')] assert not frc.write_required(data=statements) # fastrun don't check references, statement has reference, frc = FastRunContainerFakeQueryDataEnsemblNoRef( base_filter={ 'P594': '', 'P703': 'Q15978631' }, base_data_type=wbi_core.BaseDataType, engine=wbi_core.ItemEngine, use_refs=False) statements = [ wbi_core.ExternalID( value='ENSG00000123374', prop_nr='P594', references=[[wbi_core.ItemID("Q123", "P31", is_reference=True)]]) ] assert not frc.write_required(data=statements)
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, publication_date=None, language_style=None, type_of_reference=None, source=None, line=None, ): # Use WikibaseIntegrator aka wbi to upload the changes in one edit link_to_form = wbi_core.Form(prop_nr="P5830", value=form_id, is_qualifier=True) link_to_sense = wbi_core.Sense(prop_nr="P6072", value=sense_id, is_qualifier=True) if language_style == "formal": style = "Q104597585" else: if language_style == "informal": style = "Q901711" else: print(f"Error. Language style {language_style} " + "not one of (formal,informal)") exit(1) logging.debug("Generating qualifier language_style " + f"with {style}") language_style_qualifier = wbi_core.ItemID(prop_nr="P6191", value=style, is_qualifier=True) # oral or written if type_of_reference == "written": medium = "Q47461344" else: if type_of_reference == "oral": medium = "Q52946" else: print(f"Error. Type of reference {type_of_reference} " + "not one of (written,oral)") exit(1) logging.debug("Generating qualifier type of reference " + f"with {medium}") type_of_reference_qualifier = wbi_core.ItemID(prop_nr="P3865", value=medium, is_qualifier=True) if source == "riksdagen": if publication_date is not None: publication_date = datetime.fromisoformat(publication_date) else: print("Publication date of document {document_id} " + "is missing. We have no fallback for that at the moment. " + "Abort adding usage example.") return False stated_in = wbi_core.ItemID(prop_nr="P248", value="Q21592569", is_reference=True) document_id = wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True) reference = [ stated_in, document_id, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace(tzinfo=timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"), is_reference=True, ), type_of_reference_qualifier, ] if source == "europarl": stated_in = wbi_core.ItemID(prop_nr="P248", value="Q5412081", is_reference=True) reference = [ stated_in, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace(tzinfo=timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time="+2012-05-12T00:00:00Z", is_reference=True, ), wbi_core.Url( prop_nr="P854", # reference url value="http://www.statmt.org/europarl/v7/sv-en.tgz", is_reference=True, ), # filename in archive wbi_core.String( (f"europarl-v7.{config.language_code}" + f"-en.{config.language_code}"), "P7793", is_reference=True, ), # line number wbi_core.String( str(line), "P7421", is_reference=True, ), type_of_reference_qualifier, ] # This is the usage example statement claim = wbi_core.MonolingualText( sentence, "P5831", language=config.language_code, # Add qualifiers qualifiers=[ link_to_form, link_to_sense, language_style_qualifier, ], # Add reference references=[reference], ) if config.debug_json: logging.debug(f"claim:{claim.get_json_representation()}") item = wbi_core.ItemEngine( data=[claim], append_value=["P5831"], item_id=lid, ) # if config.debug_json: # print(item.get_json_representation()) if config.login_instance is None: # Authenticate with WikibaseIntegrator print("Logging in with Wikibase Integrator") config.login_instance = wbi_login.Login(user=config.username, pwd=config.password) result = item.write( config.login_instance, edit_summary="Added usage example with [[Wikidata:LexUse]]") if config.debug_json: logging.debug(f"result from WBI:{result}") return result
if category is not None: if category == lexeme_data[1]: print("Hooray categories match, uploading") #************************* # upload #************************* lemma = lexeme lid = lexeme_data[0] print(f"Uploading id to {lid}: {lemma}") # TODO if numbered # - fetch lexeme using wbi # - present to user # - ask user which if one matches print(f"Adding {saob_id} to {lid}") saob_statement = wbi_core.ExternalID( prop_nr="P8478", value=saob_id, ) described_by_source = wbi_core.ItemID(prop_nr="P1343", value="Q1935308") item = wbi_core.ItemEngine( data=[saob_statement, described_by_source], #append_value="P8478", item_id=lid) result = item.write( login_instance, edit_summary= "Added SAOB identifier with [[Wikidata:Tools/LexSAOB]]" ) #if config.debug_json: #logging.debug(f"result from WBI:{result}") print(f"{wd_prefix}{lid}")
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, publication_date=None, language_style=None, type_of_reference=None, source=None, line=None, ): # Use WikibaseIntegrator aka wbi to upload the changes in one edit link_to_form = wbi_core.Form( prop_nr="P5830", value=form_id, is_qualifier=True ) link_to_sense = wbi_core.Sense( prop_nr="P6072", value=sense_id, is_qualifier=True ) if language_style == "formal": style = "Q104597585" else: if language_style == "informal": style = "Q901711" else: print(_( "Error. Language style {} ".format(language_style) + "not one of (formal,informal). Please report a bug at "+ "https://github.com/egils-consulting/LexUtils/issues" )) return logging.debug("Generating qualifier language_style " + f"with {style}") language_style_qualifier = wbi_core.ItemID( prop_nr="P6191", value=style, is_qualifier=True ) # oral or written if type_of_reference == "written": medium = "Q47461344" else: if type_of_reference == "oral": medium = "Q52946" else: print(_( "Error. Type of reference {} ".format(type_of_reference) + "not one of (written,oral). Please report a bug at "+ "https://github.com/egils-consulting/LexUtils/issues" )) return logging.debug(_( "Generating qualifier type of reference " + "with {}".format(medium) )) type_of_reference_qualifier = wbi_core.ItemID( prop_nr="P3865", value=medium, is_qualifier=True ) if source == "riksdagen": if publication_date is not None: publication_date = datetime.fromisoformat(publication_date) else: print(_( "Publication date of document {} " + "is missing. We have no fallback for that at the moment. " + "Abort adding usage example.".format(document_id) )) return False stated_in = wbi_core.ItemID( prop_nr="P248", value="Q21592569", is_reference=True ) document_id = wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True ) reference = [ stated_in, document_id, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace( tzinfo=timezone.utc ).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"), is_reference=True, ), type_of_reference_qualifier, ] if source == "europarl": stated_in = wbi_core.ItemID( prop_nr="P248", value="Q5412081", is_reference=True ) reference = [ stated_in, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace( tzinfo=timezone.utc ).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time="+2012-05-12T00:00:00Z", is_reference=True, ), wbi_core.Url( prop_nr="P854", # reference url value="http://www.statmt.org/europarl/v7/sv-en.tgz", is_reference=True, ), # filename in archive wbi_core.String( (f"europarl-v7.{config.language_code}" + f"-en.{config.language_code}"), "P7793", is_reference=True, ), # line number wbi_core.String( str(line), "P7421", is_reference=True, ), type_of_reference_qualifier, ] if source == "ksamsok": # No date is provided unfortunately, so we set it to unknown value stated_in = wbi_core.ItemID( prop_nr="P248", value="Q7654799", is_reference=True ) document_id = wbi_core.ExternalID( # K-Samsök URI prop_nr="P1260", value=document_id, is_reference=True ) reference = [ stated_in, document_id, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace( tzinfo=timezone.utc ).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( # We don't know the value of the publication dates unfortunately prop_nr="P577", # Publication date time="", snak_type="somevalue", is_reference=True, ), type_of_reference_qualifier, ] if reference is None: logger.error(_( "No reference defined, cannot add usage example" )) exit(1) # This is the usage example statement claim = wbi_core.MonolingualText( sentence, "P5831", language=config.language_code, # Add qualifiers qualifiers=[ link_to_form, link_to_sense, language_style_qualifier, ], # Add reference references=[reference], ) if config.debug_json: logging.debug(f"claim:{claim.get_json_representation()}") item = wbi_core.ItemEngine( data=[claim], append_value=["P5831"], item_id=lid, ) # if config.debug_json: # print(item.get_json_representation()) if config.login_instance is None: # Authenticate with WikibaseIntegrator print("Logging in with Wikibase Integrator") config.login_instance = wbi_login.Login( user=config.username, pwd=config.password ) result = item.write( config.login_instance, edit_summary=( _( "Added usage example "+ "with [[Wikidata:LexUtils]] v{}".format(config.version) ) ) ) if config.debug_json: logging.debug(f"result from WBI:{result}") # TODO add handling of result from WBI and return True == Success or False return result
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, publication_date=None, ): # Use WikibaseIntegrator aka wbi to upload the changes in one edit if publication_date is not None: publication_date = datetime.fromisoformat(publication_date) else: print("Publication date of document {document_id} " + "is missing. We have no fallback for that. " + "Abort adding usage example.") return False link_to_form = wbi_core.Form(prop_nr="P5830", value=form_id, is_qualifier=True) link_to_sense = wbi_core.Sense(prop_nr="P6072", value=sense_id, is_qualifier=True) reference = [ wbi_core.ItemID( prop_nr="P248", # Stated in Riksdagen open data portal value="Q21592569", is_reference=True), wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True), wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace(tzinfo=timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"), is_reference=True, ) ] claim = wbi_core.MonolingualText( sentence, "P5831", language="sv", qualifiers=[link_to_form, link_to_sense], references=[reference], ) # print(claim) if debug_json: print(claim.get_json_representation()) item = wbi_core.ItemEngine(data=[claim], item_id=lid) if debug_json: print(item.get_json_representation()) result = item.write( login_instance, edit_summary="Added usage example with [[Wikidata:LexUse]]") if debug_json: print(f"Result from WBI: {result}") return result