Python Database.connect примеры использования

Язык программирования: Python

Пространство имен/Пакет: Utils.database

Класс/Тип: Database

Метод/Функция: connect

Примеров на hotexamples.com: 2

Python Database.connect - 2 примера найдено. Это лучшие примеры Python кода для Utils.database.Database.connect, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

team(5)

Database(3)

close(2)

connect(2)

create_index(1)

create_table(1)

execute_query_with_params(1)

Пример #1

Показать файл

Файл: ClusteringApp.py Проект: bioinfonerd-forks/Patent2Vec

def main():
    log.info("*****Clustering Application*****")

    # Create a model for clustering patents
    model = Clustering(method="rbr",
                       criterion="i2",
                       similarity="cos",
                       cluster_choice="best",
                       rowmodel="none",
                       colmodel="none",
                       trials=10,
                       showfeatures=False,
                       showsummaries=True,
                       summary_method="cliques",
                       showtree=False,
                       zscores=False,
                       plotclusters=True,
                       plotformat="ps")

    # Create an object of 'Database'
    db = Database(verbose=True)

    # Connect to SQLite database
    db.connect(in_memory=True, load_from=PATENT_EMBEDDING_DATABASE)

    # Dummy document collection
    documents = []
    for root, folders, files in os.walk(config.CLUSTERING_BENCHMARK_DATA):
        for file in files:
            if not file.startswith('.'):
                if file.endswith(""):
                    document_name = file
                    document_category = root.rsplit(os.sep, 1)[1]
                    document_label = document_category + "." + document_name
                    documents.append(document_label)

    # Generate matrix of document embeddings
    model.patent2mat(documents,
                     rows=len(documents),
                     columns=300,
                     database=db,
                     search_on=PRIMARY_KEY,
                     matrix=PATENT_MATRIX,
                     labels=LABELS,
                     classes=CLASSES,
                     path=PATENT_CLUSTERING_PATH)

    # Close connection to SQLite database
    db.close()

    # Cluster documents
    model.train(matrix=PATENT_MATRIX,
                labels=LABELS,
                classes=CLASSES,
                use_patent_classes=True,
                k=20,
                iterations=20,
                patent_clusters=PATENT_CLUSTER,
                plot=PATENT_CLUSTER_PLOT,
                path=PATENT_CLUSTERING_PATH)

    # Clean all un-necessary files
    clean(
        cleanSample=True,
        cleanModel=False,
        cleanDocvecs=True,
        cleanDatabase=False,
        cleanClusters=True,
        filter=["PatentCluster", "PatentCluster.ps", "PatentEmbedding.rclass"])

Пример #2

Показать файл

def main():
    log.info("*****Patent2Vec Application*****")

    # Preprocess patent documents
    log.info("Preprocessing patent documents")
    patents = PatentDocument(SOURCE_DATASET,
                             extension="",
                             use_conceptualizer=True,
                             transform_conceptualizer=True,
                             enable_sampling=True,
                             train_ratio=1.0,
                             test_ratio=0.0)

    # Create Patent2Vec model
    models = OrderedDict()

    # PV-DM with average
    models["PV_DM_Mean"] = \
        Patent2Vec(dm=1, dm_mean=1, dm_concat=0, min_word_count=5, size=500,
                   context_window_size=8, negative=2, iter=50, workers=CPU_CORE,
                   use_less_memory=False, docvecs_mapfile=DOCVECS_MAP)
    models["PV_DM_Mean"].build(patents)
    models["PV_DM_Mean"].intersect_with_pretrained_embedding(
        PRETRAINED_EMBEDDING, binary=False)
    #     models["PV_DM_Mean"].load(PATENT2VEC_MODEL)

    #     # PV-DM with concatenation
    #     models["PV_DM_Concatenation"] = \
    #         Patent2Vec(dm=1, dm_mean=0, dm_concat=1, min_word_count=5, size=500,
    #                    context_window_size=8, negative=2, iter=50, workers=CPU_CORE,
    #                    use_less_memory=False, docvecs_mapfile=DOCVECS_MAP)
    #     models["PV_DM_Concatenation"].reuse_from(models["PV_DM_Mean"])
    # #     models["PV_DM_Concatenation"].build(patents)
    # #     models["PV_DM_Concatenation"].intersect_with_pretrained_embedding(PRETRAINED_EMBEDDING,
    # #                                                                       binary=False)
    # # #     models["PV_DM_Concatenation"].load(PATENT2VEC_MODEL)

    #     # PV-DBOW
    #     models["PV_DBOW"] = \
    #         Patent2Vec(dm=0, dm_mean=0, dm_concat=0, min_word_count=5, size=500,
    #                    context_window_size=8, negative=2, iter=50, workers=CPU_CORE,
    #                    use_less_memory=False, docvecs_mapfile=DOCVECS_MAP)
    #     models["PV_DBOW"].reuse_from(models["PV_DM_Mean"])
    # #     models["PV_DBOW"].build(patents)
    # #     models["PV_DBOW"].intersect_with_pretrained_embedding(PRETRAINED_EMBEDDING,
    # #                                                           binary=False)
    # # #     models["PV_DBOW"].load(PATENT2VEC_MODEL)

    #     # Mixed models
    #     models["DBOW + DM with average"] = ConcatenatedPatent2Vec([models["PV_DBOW"],
    #                                                                models["PV_DM_Mean"]])
    #     models["DBOW + DM with concatenation"] = ConcatenatedPatent2Vec([models["PV_DBOW"],
    #                                                                      models["PV_DM_Concatenation"]])

    for name, model in models.items():
        # Train Patent2Vec model
        start_time = time.time()
        model.train(patents,
                    alpha=0.1,
                    min_alpha=0.0001,
                    passes=10,
                    fixed_alpha=False)
        end_time = time.time()
        log.info("Total time elapsed: %r", (end_time - start_time))

        # Evaluate Patent2Vec model
        model.evaluate()

        # Save Patent2Vec model
        model.save(model=PATENT2VEC_MODEL)

        # Create a database object
        db = Database(verbose=True)

        # Connect to database
        db.connect(in_memory=True)

        # Create a new table for storing document embeddings
        db.create_table(table=PATENT_EMBEDDING_TABLE,
                        primary_column=PRIMARY_KEY,
                        other_columns=FIELDS)

        # Save document embeddings
        model.save_document_embeddings(document_embeddings=PATENT_EMBEDDING,
                                       rows=len(patents),
                                       columns=500,
                                       database=db,
                                       table_name=PATENT_EMBEDDING_TABLE,
                                       save_patent_category=True,
                                       prepend_document_category=True)

        # Test documents
        if not os.path.exists(TESTING_DATA):
            raise PathNotFoundError("Path does not exist: %s" % TESTING_DATA)

        with open(TESTING_DATA, "r") as t:
            test_documents = t.readlines()
            test_documents = map(lambda x: x.strip(), test_documents)
            test_documents = filter(None, test_documents)

        # Preprocessed test documents
        preprocessed_test_documents = patents.get_preprocessed_corpus(
            test_documents)

        # Predict document embeddings
        model.predict(preprocessed_test_documents,
                      alpha=0.1,
                      min_alpha=0.0001,
                      steps=50,
                      save=True,
                      database=db,
                      table_name=PATENT_EMBEDDING_TABLE,
                      save_patent_category=True,
                      prepend_document_category=True)

        # Create an index on document embedding table
        db.create_index(index=PATENT_EMBEDDING_INDEX,
                        table=PATENT_EMBEDDING_TABLE,
                        index_by_column=PRIMARY_KEY[0])

        # Close database connection
        db.close(save_to=PATENT_EMBEDDING_DATABASE)

        # Delete temporary training data
        model.clean()

    # Test document for checking the quality of Patent2Vec model
    patents.set_token_only(True)
    preprocessed_test_document = patents.get_preprocessed_document(
        TEST_DOCUMENT)
    patents.set_token_only(False)

    # Check quality of Patent2Vec model
    if preprocessed_test_document is not None:
        log.info("Check quality of Patent2Vec model")
        log.info("Top matches for test document: %s", TEST_DOCUMENT)

        for name, model in models.items():
            embedding = model.infer(preprocessed_test_document)

            top_matches = model.model.docvecs.most_similar(
                positive=[embedding], negative=[], topn=10)
            top_matches = map(lambda x: x[0] + "\t\t" + str(x[1]), top_matches)

            for top_match in top_matches:
                log.info(top_match)

    # Clean all un-necessary files
    clean(cleanSample=True,
          cleanModel=False,
          cleanDocvecs=True,
          cleanDatabase=False,
          cleanClusters=False,
          filter=[])