Exemplo n.º 1
0
def create_indexer():
    # create an index
    index_name = "indexer-hotels"
    fields = [
        SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True),
        SimpleField(name="baseRate", type=SearchFieldDataType.Double)
    ]
    index = SearchIndex(name=index_name, fields=fields)
    ind_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key))
    ind_client.create_index(index)

    # [START create_indexer]
    # create a datasource
    container = SearchIndexerDataContainer(name='searchcontainer')
    data_source = indexers_client.create_datasource(
        name="indexer-datasource",
        type="azureblob",
        connection_string=connection_string,
        container=container)

    # create an indexer
    indexer = SearchIndexer(name="sample-indexer",
                            data_source_name="indexer-datasource",
                            target_index_name="hotels")
    result = indexers_client.create_indexer(indexer)
    print("Create new Indexer - sample-indexer")
Exemplo n.º 2
0
def _create_index():
    name = "hotel-index"

    # Here we create an index with listed fields.
    fields = [
        SimpleField(name="hotelId",
                    type=SearchFieldDataType.String,
                    filterable=True,
                    sortable=True,
                    key=True),
        SearchableField(name="hotelName", type=SearchFieldDataType.String),
        SimpleField(name="description", type=SearchFieldDataType.String),
        SimpleField(name="descriptionFr", type=SearchFieldDataType.String),
        SimpleField(name="category", type=SearchFieldDataType.String),
        SimpleField(name="parkingIncluded",
                    type=SearchFieldDataType.Boolean,
                    filterable=True),
        SimpleField(name="smokingAllowed",
                    type=SearchFieldDataType.Boolean,
                    filterable=True),
        SimpleField(name="lastRenovationDate",
                    type=SearchFieldDataType.String),
        SimpleField(name="rating",
                    type=SearchFieldDataType.Int64,
                    sortable=True),
        SimpleField(name="location", type=SearchFieldDataType.GeographyPoint),
    ]
    cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)

    # pass in the name, fields and cors options and create the index
    index = SearchIndex(name=name, fields=fields, cors_options=cors_options)
    index_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key))
    result = index_client.create_index(index)
    return result
def create_index(name, endpoint, key):
    # Create a service client
    client = SearchIndexClient(endpoint, AzureKeyCredential(key))

    fields = [
        SimpleField(name='Id', type=SearchFieldDataType.String, key=True),
        SearchableField(name='FileName', type=SearchFieldDataType.String),
        SimpleField(name='FilePath', type=SearchFieldDataType.String),
        SearchableField(name='KeyPhrases',
                        collection=True,
                        type=SearchFieldDataType.String,
                        analyzer_name="en.lucene"),
        SearchableField(name='People',
                        collection=True,
                        type=SearchFieldDataType.String),
        SearchableField(name='Organisation',
                        collection=True,
                        type=SearchFieldDataType.String),
        SearchableField(name='Location',
                        collection=True,
                        type=SearchFieldDataType.String)
    ]

    cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
    scoring_profiles = []

    index = SearchIndex(name=name,
                        fields=fields,
                        scoring_profiles=scoring_profiles,
                        cors_options=cors_options)

    result = client.create_index(index)
 def test_create_or_update_index(self, api_key, endpoint, index_name,
                                 **kwargs):
     name = "hotels"
     fields = [
         SimpleField(name="hotelId",
                     type=SearchFieldDataType.String,
                     key=True),
         SimpleField(name="baseRate", type=SearchFieldDataType.Double)
     ]
     cors_options = CorsOptions(allowed_origins=["*"],
                                max_age_in_seconds=60)
     scoring_profiles = []
     index = SearchIndex(name=name,
                         fields=fields,
                         scoring_profiles=scoring_profiles,
                         cors_options=cors_options)
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.create_or_update_index(index=index)
     assert len(result.scoring_profiles) == 0
     assert result.cors_options.allowed_origins == cors_options.allowed_origins
     assert result.cors_options.max_age_in_seconds == cors_options.max_age_in_seconds
     scoring_profile = ScoringProfile(name="MyProfile")
     scoring_profiles = []
     scoring_profiles.append(scoring_profile)
     index = SearchIndex(name=name,
                         fields=fields,
                         scoring_profiles=scoring_profiles,
                         cors_options=cors_options)
     result = client.create_or_update_index(index=index)
     assert result.scoring_profiles[0].name == scoring_profile.name
     assert result.cors_options.allowed_origins == cors_options.allowed_origins
     assert result.cors_options.max_age_in_seconds == cors_options.max_age_in_seconds
 def test_get_service_statistics_v2020_06_30(self, mock_get_stats):
     client = SearchIndexClient("endpoint",
                                CREDENTIAL,
                                api_version=ApiVersion.V2020_06_30)
     client.get_service_statistics()
     assert mock_get_stats.called
     assert mock_get_stats.call_args[0] == ()
     assert mock_get_stats.call_args[1] == {"headers": client._headers}
    def test_list_indexes(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
        result = client.list_indexes()

        first = next(result)
        assert first.name == index_name

        with pytest.raises(StopIteration):
            next(result)
 def test_delete_indexes(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     client.delete_index(index_name)
     import time
     if self.is_live:
         time.sleep(TIME_TO_SLEEP)
     result = client.list_indexes()
     with pytest.raises(StopIteration):
         next(result)
    def test_index_endpoint_https(self):
        credential = AzureKeyCredential(key="old_api_key")
        client = SearchIndexClient("endpoint", credential)
        assert client._endpoint.startswith('https')

        client = SearchIndexClient("https://endpoint", credential)
        assert client._endpoint.startswith('https')

        with pytest.raises(ValueError):
            client = SearchIndexClient("http://endpoint", credential)

        with pytest.raises(ValueError):
            client = SearchIndexClient(12345, credential)
Exemplo n.º 9
0
 def test_create_synonym_map(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.create_synonym_map("test-syn-map", [
         "USA, United States, United States of America",
         "Washington, Wash. => WA",
     ])
     assert isinstance(result, SynonymMap)
     assert result.name == "test-syn-map"
     assert result.synonyms == [
         "USA, United States, United States of America",
         "Washington, Wash. => WA",
     ]
     assert len(client.get_synonym_maps()) == 1
Exemplo n.º 10
0
def simple_analyze_text():
    # [START simple_analyze_text]
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents.indexes import SearchIndexClient
    from azure.search.documents.indexes.models import AnalyzeTextOptions

    client = SearchIndexClient(service_endpoint, AzureKeyCredential(key))

    analyze_request = AnalyzeTextOptions(text="One's <two/>",
                                         analyzer_name="standard.lucene")

    result = client.analyze_text(index_name, analyze_request)
    print(result.as_dict())
Exemplo n.º 11
0
 def test_search_indexers(self, endpoint, api_key, **kwargs):
     storage_cs = kwargs.get("search_storage_connection_string")
     container_name = kwargs.get("search_storage_container_name")
     client = SearchIndexerClient(endpoint, api_key)
     index_client = SearchIndexClient(endpoint, api_key)
     self._test_create_indexer(client, index_client, storage_cs,
                               container_name)
     self._test_delete_indexer(client, index_client, storage_cs,
                               container_name)
     self._test_get_indexer(client, index_client, storage_cs,
                            container_name)
     self._test_list_indexer(client, index_client, storage_cs,
                             container_name)
     self._test_create_or_update_indexer(client, index_client, storage_cs,
                                         container_name)
     self._test_reset_indexer(client, index_client, storage_cs,
                              container_name)
     self._test_run_indexer(client, index_client, storage_cs,
                            container_name)
     self._test_get_indexer_status(client, index_client, storage_cs,
                                   container_name)
     self._test_create_or_update_indexer_if_unchanged(
         client, index_client, storage_cs, container_name)
     self._test_delete_indexer_if_unchanged(client, index_client,
                                            storage_cs, container_name)
Exemplo n.º 12
0
    def _prepare_indexer(self,
                         endpoint,
                         api_key,
                         name="sample-indexer",
                         ds_name="sample-datasource",
                         id_name="hotels"):
        con_str = self.settings.AZURE_STORAGE_CONNECTION_STRING
        self.scrubber.register_name_pair(con_str, 'connection_string')
        container = SearchIndexerDataContainer(name='searchcontainer')
        data_source = SearchIndexerDataSourceConnection(
            name=ds_name,
            type="azureblob",
            connection_string=con_str,
            container=container)
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        ds = client.create_datasource(data_source)

        index_name = id_name
        fields = [{
            "name": "hotelId",
            "type": "Edm.String",
            "key": True,
            "searchable": False
        }]
        index = SearchIndex(name=index_name, fields=fields)
        ind = SearchIndexClient(
            endpoint, AzureKeyCredential(api_key)).create_index(index)
        return SearchIndexer(name=name,
                             data_source_name=ds.name,
                             target_index_name=ind.name)
 def test_synonym_map(self, endpoint, api_key):
     client = SearchIndexClient(endpoint, api_key)
     self._test_create_synonym_map(client)
     self._test_delete_synonym_map(client)
     self._test_delete_synonym_map_if_unchanged(client)
     self._test_get_synonym_map(client)
     self._test_get_synonym_maps(client)
     self._test_create_or_update_synonym_map(client)
    def test_delete_indexes_if_unchanged(self, api_key, endpoint, index_name,
                                         **kwargs):
        client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))

        # First create an index
        name = "hotels"
        fields = [{
            "name": "hotelId",
            "type": "Edm.String",
            "key": True,
            "searchable": False
        }, {
            "name": "baseRate",
            "type": "Edm.Double"
        }]
        scoring_profile = ScoringProfile(name="MyProfile")
        scoring_profiles = []
        scoring_profiles.append(scoring_profile)
        cors_options = CorsOptions(allowed_origins=["*"],
                                   max_age_in_seconds=60)
        index = SearchIndex(name=name,
                            fields=fields,
                            scoring_profiles=scoring_profiles,
                            cors_options=cors_options)
        result = client.create_index(index)
        etag = result.e_tag
        # get e tag  and update
        index.scoring_profiles = []
        client.create_or_update_index(index)

        index.e_tag = etag
        with pytest.raises(HttpResponseError):
            client.delete_index(index,
                                match_condition=MatchConditions.IfNotModified)
    def test_alias(self, endpoint, api_key):
        client = SearchIndexClient(endpoint, api_key)
        aliases = ["resort", "motel"]
        index_name = next(client.list_index_names())
        self._test_list_aliases_empty(client)
        self._test_create_alias(client, aliases[0], index_name)

        self._test_create_or_update_alias(client, aliases[1], index_name)

        # point an old alias to a new index
        new_index_name = "hotel"
        self._test_update_alias_to_new_index(client, aliases[1],
                                             new_index_name, index_name)

        self._test_get_alias(client, aliases)

        self._test_list_aliases(client, aliases)
        self._test_delete_aliases(client)
Exemplo n.º 16
0
def _clean_up_indexes(endpoint, api_key):
    from azure.search.documents.indexes import SearchIndexClient
    client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))

    # wipe the synonym maps which seem to survive the index
    for map in client.get_synonym_maps():
        client.delete_synonym_map(map.name)

    # wipe any existing indexes
    for index in client.list_indexes():
        client.delete_index(index)
def authentication_service_client_with_api_key_credential():
    # [START create_search_service_client_with_key]
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents.indexes import SearchIndexClient

    service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
    key = os.getenv("AZURE_SEARCH_API_KEY")

    search_client = SearchIndexClient(service_endpoint,
                                      AzureKeyCredential(key))
Exemplo n.º 18
0
 def test_delete_synonym_map(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.create_synonym_map("test-syn-map", [
         "USA, United States, United States of America",
         "Washington, Wash. => WA",
     ])
     assert len(client.get_synonym_maps()) == 1
     client.delete_synonym_map("test-syn-map")
     assert len(client.get_synonym_maps()) == 0
Exemplo n.º 19
0
 def test_create_or_update_synonym_map(self, api_key, endpoint, index_name,
                                       **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     client.create_synonym_map("test-syn-map", [
         "USA, United States, United States of America",
     ])
     assert len(client.get_synonym_maps()) == 1
     client.create_or_update_synonym_map("test-syn-map", [
         "Washington, Wash. => WA",
     ])
     assert len(client.get_synonym_maps()) == 1
     result = client.get_synonym_map("test-syn-map")
     assert isinstance(result, dict)
     assert result["name"] == "test-syn-map"
     assert result["synonyms"] == [
         "Washington, Wash. => WA",
     ]
 def test_index_credential_roll(self):
     credential = AzureKeyCredential(key="old_api_key")
     client = SearchIndexClient("endpoint", credential)
     assert client._headers == {
         "api-key": "old_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }
     credential.update("new_api_key")
     assert client._headers == {
         "api-key": "new_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }
Exemplo n.º 21
0
    def dispmsg(self):
        name_label2 = ttk.Label(self.window, 
                                text = "File with the queried intents is downloaded at " + str(self.name_var.get()), 
                                font=('Times New Roman', 10, 'normal'))
        name_label2.grid(row=10,column=1,padx = 5, pady = 10)

        if str(self.name_var1.get()) != '':
            learning = 'active'
            Data,UserFnames =  Read_Files(str(self.name_var.get()), learning=learning, vertical= str(self.vertical.get()).lower())
            Data_Frame = pd.DataFrame(Data, columns = ['FileName', 'FilePath', 'Text'])
            Data_Frame = NER(Data_Frame)
            kf = []
            for ind in Data_Frame.index:
                text = Data_Frame['Text'][ind]
                tr4w = TextRank4Keyword()
                tr4w.analyze(text, candidate_pos = ['NOUN', 'PROPN'], window_size=4, lower=False)
                kf.append(tr4w.get_keywords(100))
            Data_Frame['KeyPhrases'] = kf
            name = str(self.vertical.get()).lower()
            endpoint = "https://<EndPoint>.search.windows.net"
            key = "<Cognitive search key>"
            if name == 'default':
                create_index(name, endpoint, key)
            upload_docs(Data_Frame=Data_Frame, index_name= name, endpoint=endpoint, key=key)
            result = search(rootdir=str(self.name_var.get()), 
                            Query=str(self.name_var1.get()), index_name=name, 
                            endpoint=endpoint, key= key, fnames = UserFnames, 
                            vertical=str(self.vertical.get()).lower())
            if name == 'default':
                from azure.search.documents.indexes import SearchIndexClient
                from azure.core.credentials import AzureKeyCredential
                client = SearchIndexClient(endpoint, AzureKeyCredential(key))
                client.delete_index(name)
        elif str(self.name_var1.get()) == '' and str(self.classes.get()) != 'None':
            learning = 'passive'
            Data,UserFnames =  Read_Files(str(self.name_var.get()), learning=learning, vertical= None)
            Data_Frame  =  pd.DataFrame(Data, columns = ['FileName', 'FilePath', 'Text'])
            result = classifier(dataframe=Data_Frame, classs=str(self.classes.get()), rootdir=str(self.name_var.get()))
        else:
            pass
Exemplo n.º 22
0
 def test_search_index_client(self, api_key, endpoint, index_name):
     client = SearchIndexClient(endpoint, api_key)
     index_name = "hotels"
     self._test_get_service_statistics(client)
     self._test_list_indexes_empty(client)
     self._test_create_index(client, index_name)
     self._test_list_indexes(client, index_name)
     self._test_get_index(client, index_name)
     self._test_get_index_statistics(client, index_name)
     self._test_delete_indexes_if_unchanged(client)
     self._test_create_or_update_index(client)
     self._test_create_or_update_indexes_if_unchanged(client)
     self._test_analyze_text(client, index_name)
     self._test_delete_indexes(client)
Exemplo n.º 23
0
 def test_get_synonym_maps(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     client.create_synonym_map("test-syn-map-1", [
         "USA, United States, United States of America",
     ])
     client.create_synonym_map("test-syn-map-2", [
         "Washington, Wash. => WA",
     ])
     result = client.get_synonym_maps()
     assert isinstance(result, list)
     assert all(isinstance(x, SynonymMap) for x in result)
     assert set(x.name
                for x in result) == {"test-syn-map-1", "test-syn-map-2"}
Exemplo n.º 24
0
    def test_delete_synonym_map_if_unchanged(self, api_key, endpoint,
                                             index_name, **kwargs):
        client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
        result = client.create_synonym_map("test-syn-map", [
            "USA, United States, United States of America",
            "Washington, Wash. => WA",
        ])
        etag = result.e_tag

        client.create_or_update_synonym_map("test-syn-map", [
            "Washington, Wash. => WA",
        ])

        result.e_tag = etag
        with pytest.raises(HttpResponseError):
            client.delete_synonym_map(
                result, match_condition=MatchConditions.IfNotModified)
            assert len(client.get_synonym_maps()) == 1
Exemplo n.º 25
0
 def test_create_or_update_synonym_map(self, api_key, endpoint, index_name,
                                       **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     solr_format_synonyms = "\n".join([
         "USA, United States, United States of America",
         "Washington, Wash. => WA",
     ])
     synonym_map = SynonymMap(name="test-syn-map",
                              synonyms=solr_format_synonyms)
     client.create_synonym_map(synonym_map)
     assert len(client.get_synonym_maps()) == 1
     synonym_map.synonyms = "\n".join([
         "Washington, Wash. => WA",
     ])
     client.create_or_update_synonym_map(synonym_map)
     assert len(client.get_synonym_maps()) == 1
     result = client.get_synonym_map("test-syn-map")
     assert isinstance(result, SynonymMap)
     assert result.name == "test-syn-map"
     assert result.synonyms == [
         "Washington, Wash. => WA",
     ]
Exemplo n.º 26
0
    def test_create_or_update_synonym_map_if_unchanged(self, api_key, endpoint,
                                                       index_name, **kwargs):
        client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
        synonyms = [
            "USA, United States, United States of America",
            "Washington, Wash. => WA",
        ]
        synonym_map = SynonymMap(name="test-syn-map", synonyms=synonyms)
        result = client.create_synonym_map(synonym_map)
        etag = result.e_tag

        synonym_map.synonyms = [
            "Washington, Wash. => WA",
        ]

        client.create_or_update_synonym_map(synonym_map)

        result.e_tag = etag
        with pytest.raises(HttpResponseError):
            client.create_or_update_synonym_map(
                result, match_condition=MatchConditions.IfNotModified)
 def test_get_index_statistics(self, api_key, endpoint, index_name,
                               **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.get_index_statistics(index_name)
     assert set(result.keys()) == {'document_count', 'storage_size'}
 def test_get_index(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.get_index(index_name)
     assert result.name == index_name
 def test_list_indexes_empty(self, api_key, endpoint, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.list_indexes()
     with pytest.raises(StopIteration):
         next(result)
 def test_get_service_statistics(self, api_key, endpoint, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.get_service_statistics()
     assert isinstance(result, dict)
     assert set(result.keys()) == {"counters", "limits"}