Пример #1
0
 def vi_client(self):
     if 'VH_USERNAME' in os.environ.keys():
         return ViClient(os.environ['VH_USERNAME'],
                         os.environ['VH_API_KEY'])
     elif 'VI_USERNAME' in os.environ.keys():
         return ViClient(os.environ['VI_USERNAME'],
                         os.environ['VI_API_KEY'])
     return ViClient()
Пример #2
0
 def vi_client(self):
     url = "https://vectorai-development-api-vectorai-test-api.azurewebsites.net/"
     if 'VH_USERNAME' in os.environ.keys():
         return ViClient(os.environ['VH_USERNAME'],
                         os.environ['VH_API_KEY'],
                         url=url)
     elif 'VI_USERNAME' in os.environ.keys():
         return ViClient(os.environ['VI_USERNAME'],
                         os.environ['VI_API_KEY'],
                         url=url)
     return ViClient(url=url)
Пример #3
0
    def add_documents(self,
                      username: str,
                      api_key: str,
                      items: List[Any],
                      metadata: Optional[List[Any]] = None,
                      collection_name: str = None):
        """
        Add documents to the Vector AI cloud.
        """
        self.username = username
        self.api_key = api_key
        if collection_name is not None:
            self.collection_name = collection_name
        else:
            self.collection_name = 'vectorhub_collection_with_' + self.__name__
        if metadata is not None:
            docs = [
                self._create_document(i, item, metadata)
                for i, (item,
                        metadata) in enumerate(list(zip(items, metadata)))
            ]
        else:
            docs = [
                self._create_document(i, item) for i, item in enumerate(items)
            ]

        self.client = ViClient(username, api_key)
        if self.encoder_type == 'encoder':
            return self.client.insert_documents(self.collection_name, docs,
                                                {'item': self})
        elif self.encoder_type == 'qa':
            return self.client.insert_documents(self.collection_name, docs,
                                                {'item': self.encode_question})
Пример #4
0
        mname = "google/pegasus-large"
        model = PegasusForConditionalGeneration.from_pretrained(mname)
        tok = PegasusTokenizer.from_pretrained(mname)

        def summarise(text):
            batch = tok.prepare_seq2seq_batch(src_texts=[text])  # don't need tgt_text for inference
            gen = model.generate(**batch)
            return tok.batch_decode(gen, skip_special_tokens=True)[0]

        for i, doc in enumerate(docs):
            if 'short_description' not in docs[i].keys():
                short_description = summarise(doc['description'])
                docs[i]['short_description'] = short_description
                # LOGGER.debug(short_description)

    vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])
    ids = vi_client.get_field_across_documents('_id', docs)
    if args.reset_collection:
        if args.collection_name in vi_client.list_collections():
            vi_client.delete_collection(args.collection_name)
            time.sleep(5)
    text_encoder = ViText2Vec(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])

    response = vi_client.insert_documents(args.collection_name, docs, models={'description': text_encoder})

    LOGGER.debug(response)
    print(response)
    if response['failed'] != 0:
        raise ValueError("Failed IDs")
    
    if args.evaluate_results: