예제 #1
0
        for i, doc in enumerate(docs):
            if 'short_description' not in docs[i].keys():
                short_description = summarise(doc['description'])
                docs[i]['short_description'] = short_description
                # LOGGER.debug(short_description)

    vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])
    ids = vi_client.get_field_across_documents('_id', docs)
    if args.reset_collection:
        if args.collection_name in vi_client.list_collections():
            vi_client.delete_collection(args.collection_name)
            time.sleep(5)
    text_encoder = ViText2Vec(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])

    response = vi_client.insert_documents(args.collection_name, docs, models={'description': text_encoder})

    LOGGER.debug(response)
    print(response)
    if response['failed'] != 0:
        raise ValueError("Failed IDs")
    
    if args.evaluate_results:
        LOGGER.debug("Checking Documents:")
        LOGGER.debug(vi_client.head(args.collection_name))
        LOGGER.debug(vi_client.head(args.collection_name)['vector_length'])
        LOGGER.debug(vi_client.collection_schema(args.collection_name))
        import pandas as pd
        pd.set_option('display.max_colwidth', None)
        LOGGER.debug(vi_client.show_json(vi_client.random_documents(args.collection_name), selected_fields=['markdown_without_example']))
예제 #2
0
    vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])
    ids = vi_client.get_field_across_documents('_id', docs)
    if args.reset_collection:
        if args.collection_name in vi_client.list_collections():
            vi_client.delete_collection(args.collection_name)
            time.sleep(5)
    text_encoder = ViText2Vec(os.environ['VH_USERNAME'],
                              os.environ['VH_API_KEY'])

    response = vi_client.insert_documents(args.collection_name,
                                          docs,
                                          models={'description': text_encoder},
                                          overwrite=True)

    LOGGER.debug(response)
    print(response)
    if response['failed'] != 0:
        raise ValueError("Failed IDs")

    if args.evaluate_results:
        LOGGER.debug("Checking Documents:")
        LOGGER.debug(vi_client.head(args.collection_name))
        LOGGER.debug(vi_client.head(args.collection_name)['vector_length'])
        LOGGER.debug(vi_client.collection_schema(args.collection_name))
        import pandas as pd
        pd.set_option('display.max_colwidth', None)
        LOGGER.debug(
            vi_client.show_json(vi_client.random_documents(
                args.collection_name),
                                selected_fields=['markdown_without_example']))