def add_documents(self, username: str, api_key: str, items: List[Any], metadata: Optional[List[Any]] = None, collection_name: str = None): """ Add documents to the Vector AI cloud. """ self.username = username self.api_key = api_key if collection_name is not None: self.collection_name = collection_name else: self.collection_name = 'vectorhub_collection_with_' + self.__name__ if metadata is not None: docs = [ self._create_document(i, item, metadata) for i, (item, metadata) in enumerate(list(zip(items, metadata))) ] else: docs = [ self._create_document(i, item) for i, item in enumerate(items) ] self.client = ViClient(username, api_key) if self.encoder_type == 'encoder': return self.client.insert_documents(self.collection_name, docs, {'item': self}) elif self.encoder_type == 'qa': return self.client.insert_documents(self.collection_name, docs, {'item': self.encode_question})
def vi_client(self): if 'VH_USERNAME' in os.environ.keys(): return ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY']) elif 'VI_USERNAME' in os.environ.keys(): return ViClient(os.environ['VI_USERNAME'], os.environ['VI_API_KEY']) return ViClient()
def vi_client(self): url = "https://vectorai-development-api-vectorai-test-api.azurewebsites.net/" if 'VH_USERNAME' in os.environ.keys(): return ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'], url=url) elif 'VI_USERNAME' in os.environ.keys(): return ViClient(os.environ['VI_USERNAME'], os.environ['VI_API_KEY'], url=url) return ViClient(url=url)
mname = "google/pegasus-large" model = PegasusForConditionalGeneration.from_pretrained(mname) tok = PegasusTokenizer.from_pretrained(mname) def summarise(text): batch = tok.prepare_seq2seq_batch(src_texts=[text]) # don't need tgt_text for inference gen = model.generate(**batch) return tok.batch_decode(gen, skip_special_tokens=True)[0] for i, doc in enumerate(docs): if 'short_description' not in docs[i].keys(): short_description = summarise(doc['description']) docs[i]['short_description'] = short_description # LOGGER.debug(short_description) vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY']) ids = vi_client.get_field_across_documents('_id', docs) if args.reset_collection: if args.collection_name in vi_client.list_collections(): vi_client.delete_collection(args.collection_name) time.sleep(5) text_encoder = ViText2Vec(os.environ['VH_USERNAME'], os.environ['VH_API_KEY']) response = vi_client.insert_documents(args.collection_name, docs, models={'description': text_encoder}) LOGGER.debug(response) print(response) if response['failed'] != 0: raise ValueError("Failed IDs") if args.evaluate_results:
class ViIndexer: @property def encoder_type(self): """The encoder type ensures it uses either the 'encode' or 'encode_question'/'encode_answer' Currently supported encoder types: Question-Answer Encoder """ if self.definition.model_id.startswith('qa'): return 'qa' else: return 'encoder' def request_api_key(self, username: str, email: str, referral=None): """ Requesting an API key. """ print("API key is being requested. Be sure to save it somewhere!") return request_api_key(username=username, email=email, description='vectorhub', referral=referral) def add_documents(self, username: str, api_key: str, items: List[Any], metadata: Optional[List[Any]] = None, collection_name: str = None): """ Add documents to the Vector AI cloud. """ self.username = username self.api_key = api_key if collection_name is not None: self.collection_name = collection_name else: self.collection_name = 'vectorhub_collection_with_' + self.__name__ if metadata is not None: docs = [ self._create_document(i, item, metadata) for i, (item, metadata) in enumerate(list(zip(items, metadata))) ] else: docs = [ self._create_document(i, item) for i, item in enumerate(items) ] self.client = ViClient(username, api_key) if self.encoder_type == 'encoder': return self.client.insert_documents(self.collection_name, docs, {'item': self}) elif self.encoder_type == 'qa': return self.client.insert_documents(self.collection_name, docs, {'item': self.encode_question}) def _create_document(self, _id: str, item: List[str], metadata=None): return {'_id': str(_id), 'item': item, 'metadata': metadata} def delete_collection(self, collection_name=None): if collection_name is None: collection_name = self.collection_name return self.delete_collection(collection_name) def search(self, item: Any, num_results: int = 10): """ Simple search with Vector AI """ warnings.warn( "If you are looking for more advanced functionality, we recommend using the official Vector AI Github package" ) if self.encoder_type == 'encoder': return self.client.search(self.collection_name, self.encode(item), field='item_' + self.__name__ + '_vector_', page_size=num_results) elif self.encoder_type == 'qa': return self.client.search(self.collection_name, self.encode_question(item), field='item_vector_', page_size=num_results) def retrieve_documents(self, num_of_documents: int): """ Get all the documents in our package. """ return self.client.retrieve_documents( self.collection_name, page_size=num_of_documents)['documents'] def retrieve_all_documents(self): """ Retrieve all documents. """ return self.retrieve_all_documents(self.collection_name)
""" Script to create model cards. """ if __name__ == "__main__": from vectorai import ViClient from vectorai.models.deployed.text import ViText2Vec from vectorhub.auto_encoder import * import os vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY']) text_encoder = ViText2Vec(os.environ['VH_USERNAME'], os.environ['VH_API_KEY']) docs = get_model_definitions(None) vi_client.insert_documents(os.environ['VH_COLLECTION_NAME'], docs, models={'description': text_encoder})