def create_indexer(): # create an index index_name = "indexer-hotels" fields = [ SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True), SimpleField(name="baseRate", type=SearchFieldDataType.Double) ] index = SearchIndex(name=index_name, fields=fields) ind_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key)) ind_client.create_index(index) # [START create_indexer] # create a datasource container = SearchIndexerDataContainer(name='searchcontainer') data_source = indexers_client.create_datasource( name="indexer-datasource", type="azureblob", connection_string=connection_string, container=container) # create an indexer indexer = SearchIndexer(name="sample-indexer", data_source_name="indexer-datasource", target_index_name="hotels") result = indexers_client.create_indexer(indexer) print("Create new Indexer - sample-indexer")
def _create_index(): name = "hotel-index" # Here we create an index with listed fields. fields = [ SimpleField(name="hotelId", type=SearchFieldDataType.String, filterable=True, sortable=True, key=True), SearchableField(name="hotelName", type=SearchFieldDataType.String), SimpleField(name="description", type=SearchFieldDataType.String), SimpleField(name="descriptionFr", type=SearchFieldDataType.String), SimpleField(name="category", type=SearchFieldDataType.String), SimpleField(name="parkingIncluded", type=SearchFieldDataType.Boolean, filterable=True), SimpleField(name="smokingAllowed", type=SearchFieldDataType.Boolean, filterable=True), SimpleField(name="lastRenovationDate", type=SearchFieldDataType.String), SimpleField(name="rating", type=SearchFieldDataType.Int64, sortable=True), SimpleField(name="location", type=SearchFieldDataType.GeographyPoint), ] cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60) # pass in the name, fields and cors options and create the index index = SearchIndex(name=name, fields=fields, cors_options=cors_options) index_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key)) result = index_client.create_index(index) return result
def create_index(name, endpoint, key): # Create a service client client = SearchIndexClient(endpoint, AzureKeyCredential(key)) fields = [ SimpleField(name='Id', type=SearchFieldDataType.String, key=True), SearchableField(name='FileName', type=SearchFieldDataType.String), SimpleField(name='FilePath', type=SearchFieldDataType.String), SearchableField(name='KeyPhrases', collection=True, type=SearchFieldDataType.String, analyzer_name="en.lucene"), SearchableField(name='People', collection=True, type=SearchFieldDataType.String), SearchableField(name='Organisation', collection=True, type=SearchFieldDataType.String), SearchableField(name='Location', collection=True, type=SearchFieldDataType.String) ] cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60) scoring_profiles = [] index = SearchIndex(name=name, fields=fields, scoring_profiles=scoring_profiles, cors_options=cors_options) result = client.create_index(index)
def test_create_or_update_index(self, api_key, endpoint, index_name, **kwargs): name = "hotels" fields = [ SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True), SimpleField(name="baseRate", type=SearchFieldDataType.Double) ] cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60) scoring_profiles = [] index = SearchIndex(name=name, fields=fields, scoring_profiles=scoring_profiles, cors_options=cors_options) client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.create_or_update_index(index=index) assert len(result.scoring_profiles) == 0 assert result.cors_options.allowed_origins == cors_options.allowed_origins assert result.cors_options.max_age_in_seconds == cors_options.max_age_in_seconds scoring_profile = ScoringProfile(name="MyProfile") scoring_profiles = [] scoring_profiles.append(scoring_profile) index = SearchIndex(name=name, fields=fields, scoring_profiles=scoring_profiles, cors_options=cors_options) result = client.create_or_update_index(index=index) assert result.scoring_profiles[0].name == scoring_profile.name assert result.cors_options.allowed_origins == cors_options.allowed_origins assert result.cors_options.max_age_in_seconds == cors_options.max_age_in_seconds
def test_get_service_statistics_v2020_06_30(self, mock_get_stats): client = SearchIndexClient("endpoint", CREDENTIAL, api_version=ApiVersion.V2020_06_30) client.get_service_statistics() assert mock_get_stats.called assert mock_get_stats.call_args[0] == () assert mock_get_stats.call_args[1] == {"headers": client._headers}
def test_list_indexes(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.list_indexes() first = next(result) assert first.name == index_name with pytest.raises(StopIteration): next(result)
def test_delete_indexes(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) client.delete_index(index_name) import time if self.is_live: time.sleep(TIME_TO_SLEEP) result = client.list_indexes() with pytest.raises(StopIteration): next(result)
def test_index_endpoint_https(self): credential = AzureKeyCredential(key="old_api_key") client = SearchIndexClient("endpoint", credential) assert client._endpoint.startswith('https') client = SearchIndexClient("https://endpoint", credential) assert client._endpoint.startswith('https') with pytest.raises(ValueError): client = SearchIndexClient("http://endpoint", credential) with pytest.raises(ValueError): client = SearchIndexClient(12345, credential)
def test_create_synonym_map(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.create_synonym_map("test-syn-map", [ "USA, United States, United States of America", "Washington, Wash. => WA", ]) assert isinstance(result, SynonymMap) assert result.name == "test-syn-map" assert result.synonyms == [ "USA, United States, United States of America", "Washington, Wash. => WA", ] assert len(client.get_synonym_maps()) == 1
def simple_analyze_text(): # [START simple_analyze_text] from azure.core.credentials import AzureKeyCredential from azure.search.documents.indexes import SearchIndexClient from azure.search.documents.indexes.models import AnalyzeTextOptions client = SearchIndexClient(service_endpoint, AzureKeyCredential(key)) analyze_request = AnalyzeTextOptions(text="One's <two/>", analyzer_name="standard.lucene") result = client.analyze_text(index_name, analyze_request) print(result.as_dict())
def test_search_indexers(self, endpoint, api_key, **kwargs): storage_cs = kwargs.get("search_storage_connection_string") container_name = kwargs.get("search_storage_container_name") client = SearchIndexerClient(endpoint, api_key) index_client = SearchIndexClient(endpoint, api_key) self._test_create_indexer(client, index_client, storage_cs, container_name) self._test_delete_indexer(client, index_client, storage_cs, container_name) self._test_get_indexer(client, index_client, storage_cs, container_name) self._test_list_indexer(client, index_client, storage_cs, container_name) self._test_create_or_update_indexer(client, index_client, storage_cs, container_name) self._test_reset_indexer(client, index_client, storage_cs, container_name) self._test_run_indexer(client, index_client, storage_cs, container_name) self._test_get_indexer_status(client, index_client, storage_cs, container_name) self._test_create_or_update_indexer_if_unchanged( client, index_client, storage_cs, container_name) self._test_delete_indexer_if_unchanged(client, index_client, storage_cs, container_name)
def _prepare_indexer(self, endpoint, api_key, name="sample-indexer", ds_name="sample-datasource", id_name="hotels"): con_str = self.settings.AZURE_STORAGE_CONNECTION_STRING self.scrubber.register_name_pair(con_str, 'connection_string') container = SearchIndexerDataContainer(name='searchcontainer') data_source = SearchIndexerDataSourceConnection( name=ds_name, type="azureblob", connection_string=con_str, container=container) client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) ds = client.create_datasource(data_source) index_name = id_name fields = [{ "name": "hotelId", "type": "Edm.String", "key": True, "searchable": False }] index = SearchIndex(name=index_name, fields=fields) ind = SearchIndexClient( endpoint, AzureKeyCredential(api_key)).create_index(index) return SearchIndexer(name=name, data_source_name=ds.name, target_index_name=ind.name)
def test_synonym_map(self, endpoint, api_key): client = SearchIndexClient(endpoint, api_key) self._test_create_synonym_map(client) self._test_delete_synonym_map(client) self._test_delete_synonym_map_if_unchanged(client) self._test_get_synonym_map(client) self._test_get_synonym_maps(client) self._test_create_or_update_synonym_map(client)
def test_delete_indexes_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) # First create an index name = "hotels" fields = [{ "name": "hotelId", "type": "Edm.String", "key": True, "searchable": False }, { "name": "baseRate", "type": "Edm.Double" }] scoring_profile = ScoringProfile(name="MyProfile") scoring_profiles = [] scoring_profiles.append(scoring_profile) cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60) index = SearchIndex(name=name, fields=fields, scoring_profiles=scoring_profiles, cors_options=cors_options) result = client.create_index(index) etag = result.e_tag # get e tag and update index.scoring_profiles = [] client.create_or_update_index(index) index.e_tag = etag with pytest.raises(HttpResponseError): client.delete_index(index, match_condition=MatchConditions.IfNotModified)
def test_alias(self, endpoint, api_key): client = SearchIndexClient(endpoint, api_key) aliases = ["resort", "motel"] index_name = next(client.list_index_names()) self._test_list_aliases_empty(client) self._test_create_alias(client, aliases[0], index_name) self._test_create_or_update_alias(client, aliases[1], index_name) # point an old alias to a new index new_index_name = "hotel" self._test_update_alias_to_new_index(client, aliases[1], new_index_name, index_name) self._test_get_alias(client, aliases) self._test_list_aliases(client, aliases) self._test_delete_aliases(client)
def _clean_up_indexes(endpoint, api_key): from azure.search.documents.indexes import SearchIndexClient client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) # wipe the synonym maps which seem to survive the index for map in client.get_synonym_maps(): client.delete_synonym_map(map.name) # wipe any existing indexes for index in client.list_indexes(): client.delete_index(index)
def authentication_service_client_with_api_key_credential(): # [START create_search_service_client_with_key] from azure.core.credentials import AzureKeyCredential from azure.search.documents.indexes import SearchIndexClient service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") key = os.getenv("AZURE_SEARCH_API_KEY") search_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key))
def test_delete_synonym_map(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.create_synonym_map("test-syn-map", [ "USA, United States, United States of America", "Washington, Wash. => WA", ]) assert len(client.get_synonym_maps()) == 1 client.delete_synonym_map("test-syn-map") assert len(client.get_synonym_maps()) == 0
def test_create_or_update_synonym_map(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) client.create_synonym_map("test-syn-map", [ "USA, United States, United States of America", ]) assert len(client.get_synonym_maps()) == 1 client.create_or_update_synonym_map("test-syn-map", [ "Washington, Wash. => WA", ]) assert len(client.get_synonym_maps()) == 1 result = client.get_synonym_map("test-syn-map") assert isinstance(result, dict) assert result["name"] == "test-syn-map" assert result["synonyms"] == [ "Washington, Wash. => WA", ]
def test_index_credential_roll(self): credential = AzureKeyCredential(key="old_api_key") client = SearchIndexClient("endpoint", credential) assert client._headers == { "api-key": "old_api_key", "Accept": "application/json;odata.metadata=minimal", } credential.update("new_api_key") assert client._headers == { "api-key": "new_api_key", "Accept": "application/json;odata.metadata=minimal", }
def dispmsg(self): name_label2 = ttk.Label(self.window, text = "File with the queried intents is downloaded at " + str(self.name_var.get()), font=('Times New Roman', 10, 'normal')) name_label2.grid(row=10,column=1,padx = 5, pady = 10) if str(self.name_var1.get()) != '': learning = 'active' Data,UserFnames = Read_Files(str(self.name_var.get()), learning=learning, vertical= str(self.vertical.get()).lower()) Data_Frame = pd.DataFrame(Data, columns = ['FileName', 'FilePath', 'Text']) Data_Frame = NER(Data_Frame) kf = [] for ind in Data_Frame.index: text = Data_Frame['Text'][ind] tr4w = TextRank4Keyword() tr4w.analyze(text, candidate_pos = ['NOUN', 'PROPN'], window_size=4, lower=False) kf.append(tr4w.get_keywords(100)) Data_Frame['KeyPhrases'] = kf name = str(self.vertical.get()).lower() endpoint = "https://<EndPoint>.search.windows.net" key = "<Cognitive search key>" if name == 'default': create_index(name, endpoint, key) upload_docs(Data_Frame=Data_Frame, index_name= name, endpoint=endpoint, key=key) result = search(rootdir=str(self.name_var.get()), Query=str(self.name_var1.get()), index_name=name, endpoint=endpoint, key= key, fnames = UserFnames, vertical=str(self.vertical.get()).lower()) if name == 'default': from azure.search.documents.indexes import SearchIndexClient from azure.core.credentials import AzureKeyCredential client = SearchIndexClient(endpoint, AzureKeyCredential(key)) client.delete_index(name) elif str(self.name_var1.get()) == '' and str(self.classes.get()) != 'None': learning = 'passive' Data,UserFnames = Read_Files(str(self.name_var.get()), learning=learning, vertical= None) Data_Frame = pd.DataFrame(Data, columns = ['FileName', 'FilePath', 'Text']) result = classifier(dataframe=Data_Frame, classs=str(self.classes.get()), rootdir=str(self.name_var.get())) else: pass
def test_search_index_client(self, api_key, endpoint, index_name): client = SearchIndexClient(endpoint, api_key) index_name = "hotels" self._test_get_service_statistics(client) self._test_list_indexes_empty(client) self._test_create_index(client, index_name) self._test_list_indexes(client, index_name) self._test_get_index(client, index_name) self._test_get_index_statistics(client, index_name) self._test_delete_indexes_if_unchanged(client) self._test_create_or_update_index(client) self._test_create_or_update_indexes_if_unchanged(client) self._test_analyze_text(client, index_name) self._test_delete_indexes(client)
def test_get_synonym_maps(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) client.create_synonym_map("test-syn-map-1", [ "USA, United States, United States of America", ]) client.create_synonym_map("test-syn-map-2", [ "Washington, Wash. => WA", ]) result = client.get_synonym_maps() assert isinstance(result, list) assert all(isinstance(x, SynonymMap) for x in result) assert set(x.name for x in result) == {"test-syn-map-1", "test-syn-map-2"}
def test_delete_synonym_map_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.create_synonym_map("test-syn-map", [ "USA, United States, United States of America", "Washington, Wash. => WA", ]) etag = result.e_tag client.create_or_update_synonym_map("test-syn-map", [ "Washington, Wash. => WA", ]) result.e_tag = etag with pytest.raises(HttpResponseError): client.delete_synonym_map( result, match_condition=MatchConditions.IfNotModified) assert len(client.get_synonym_maps()) == 1
def test_create_or_update_synonym_map(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) solr_format_synonyms = "\n".join([ "USA, United States, United States of America", "Washington, Wash. => WA", ]) synonym_map = SynonymMap(name="test-syn-map", synonyms=solr_format_synonyms) client.create_synonym_map(synonym_map) assert len(client.get_synonym_maps()) == 1 synonym_map.synonyms = "\n".join([ "Washington, Wash. => WA", ]) client.create_or_update_synonym_map(synonym_map) assert len(client.get_synonym_maps()) == 1 result = client.get_synonym_map("test-syn-map") assert isinstance(result, SynonymMap) assert result.name == "test-syn-map" assert result.synonyms == [ "Washington, Wash. => WA", ]
def test_create_or_update_synonym_map_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) synonyms = [ "USA, United States, United States of America", "Washington, Wash. => WA", ] synonym_map = SynonymMap(name="test-syn-map", synonyms=synonyms) result = client.create_synonym_map(synonym_map) etag = result.e_tag synonym_map.synonyms = [ "Washington, Wash. => WA", ] client.create_or_update_synonym_map(synonym_map) result.e_tag = etag with pytest.raises(HttpResponseError): client.create_or_update_synonym_map( result, match_condition=MatchConditions.IfNotModified)
def test_get_index_statistics(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.get_index_statistics(index_name) assert set(result.keys()) == {'document_count', 'storage_size'}
def test_get_index(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.get_index(index_name) assert result.name == index_name
def test_list_indexes_empty(self, api_key, endpoint, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.list_indexes() with pytest.raises(StopIteration): next(result)
def test_get_service_statistics(self, api_key, endpoint, **kwargs): client = SearchIndexClient(endpoint, AzureKeyCredential(api_key)) result = client.get_service_statistics() assert isinstance(result, dict) assert set(result.keys()) == {"counters", "limits"}