def get_types(corpus):
    if not corpus:
        corpus = default_corpus
    results = mongodb[database_name(corpus)][TYPES].find()
    if results:
        return metajson_service.load_dict_list(results)
    else:
        return None
Exemplo n.º 2
0
def get_types(corpus):
    if not corpus:
        corpus = default_corpus
    results = mongodb[database_name(corpus)][TYPES].find()
    if results:
        return metajson_service.load_dict_list(results)
    else:
        return None
def get_documents_by_rec_ids(corpus, rec_ids):
    if not corpus:
        corpus = default_corpus
    results = mongodb[database_name(corpus)][DOCUMENTS].find({"rec_id": {"$in": rec_ids}})
    # results is a pymongo.cursor.Cursor
    if results:
        return metajson_service.load_dict_list(results)
    else:
        raise exceptions.metajsonprc_error(2)
def get_documents_by_mongo_ids(corpus, mongo_ids):
    if not corpus:
        corpus = default_corpus
    mongo_object_ids = []
    for mongo_id in mongo_ids:
        mongo_object_ids.append(ObjectId(mongo_id))
    results = mongodb[database_name(corpus)][DOCUMENTS].find({"_id": {"$in": mongo_object_ids}})
    if results:
        return metajson_service.load_dict_list(results)
    else:
        raise exceptions.metajsonprc_error(4)
Exemplo n.º 5
0
def get_documents_by_rec_ids(corpus, rec_ids):
    if not corpus:
        corpus = default_corpus
    results = mongodb[database_name(corpus)][DOCUMENTS].find(
        {"rec_id": {
            "$in": rec_ids
        }})
    # results is a pymongo.cursor.Cursor
    if results:
        return metajson_service.load_dict_list(results)
    else:
        raise exceptions.metajsonprc_error(2)
Exemplo n.º 6
0
def get_documents_by_mongo_ids(corpus, mongo_ids):
    if not corpus:
        corpus = default_corpus
    mongo_object_ids = []
    for mongo_id in mongo_ids:
        mongo_object_ids.append(ObjectId(mongo_id))
    results = mongodb[database_name(corpus)][DOCUMENTS].find(
        {"_id": {
            "$in": mongo_object_ids
        }})
    if results:
        return metajson_service.load_dict_list(results)
    else:
        raise exceptions.metajsonprc_error(4)
Exemplo n.º 7
0
def search_mongo(corpus, mongo_query):
    if not corpus:
        corpus = default_corpus
    # {"_id": {"$in": mongo_object_ids}}
    # {"rec_id": {"$in": rec_ids}}
    # {"rec_type": "Book"}
    # {"is_part_ofs.rec_type":"Book"}
    # {"is_part_ofs.rec_type":"Journal"}
    # {"is_part_ofs.rec_type":"VideoRecording"}
    # {"is_part_ofs.is_part_ofs.rec_type":"Book"}
    # {"creators.agent.name_family":"Latour"}
    # {"is_part_ofs.creators.agent.name_family":"Latour"}
    # {"is_part_ofs.creators.agent.name_family":"Latour", "is_part_of.creators.agent.name_given":"Bruno"}
    results = mongodb[database_name(corpus)][DOCUMENTS].find(mongo_query)
    if results:
        return metajson_service.load_dict_list(results)
    else:
        return None
def search_mongo(corpus, mongo_query):
    if not corpus:
        corpus = default_corpus
    # {"_id": {"$in": mongo_object_ids}}
    # {"rec_id": {"$in": rec_ids}}
    # {"rec_type": "Book"}
    # {"is_part_ofs.rec_type":"Book"}
    # {"is_part_ofs.rec_type":"Journal"}
    # {"is_part_ofs.rec_type":"VideoRecording"}
    # {"is_part_ofs.is_part_ofs.rec_type":"Book"}
    # {"creators.agent.name_family":"Latour"}
    # {"is_part_ofs.creators.agent.name_family":"Latour"}
    # {"is_part_ofs.creators.agent.name_family":"Latour", "is_part_of.creators.agent.name_given":"Bruno"}
    results = mongodb[database_name(corpus)][DOCUMENTS].find(mongo_query)
    if results:
        return metajson_service.load_dict_list(results)
    else:
        return None
Exemplo n.º 9
0
def search(corpus, search_query):
    if not corpus:
        corpus = default_corpus

    search_response = SearchResponse()

    # empty search_query
    if search_query is None:
        raise exceptions.metajsonprc_error(40)

    # filter_class -> collection
    collection = None
    if "filter_class" not in search_query or search_query[
            "filter_class"] not in [
                "Document", "Agent", "Person", "OrgUnit", "Event", "Family"
            ]:
        raise exceptions.metajsonprc_error(40)
    elif search_query["filter_class"] == "Document":
        collection = DOCUMENTS
    #elif search_query["filter_class"] in ["Agent", "Person", "OrgUnit", "Event", "Family"]:
    #    collection = AGENTS

    # other filters
    # todo: filter_peer_review, filter_with_full_text, filter_favorite

    filter_query = []
    if "filter_date_end" in search_query:
        filter_date_end = date_service.parse_date(
            search_query["filter_date_end"])
        filter_query.append({"date_sort": {"$lte": filter_date_end}})
    if "filter_date_begin" in search_query:
        filter_date_begin = date_service.parse_date(
            search_query["filter_date_begin"])
        filter_query.append({"date_sort": {"$gte": filter_date_begin}})
    if "filter_languages" in search_query:
        filter_query.append(
            {"languages": {
                "$in": search_query["filter_languages"]
            }})
    if "filter_types" in search_query:
        filter_query.append(
            {"rec_type": {
                "$in": search_query["filter_types"]
            }})
    if "filter_status" in search_query:
        # "private", "pending", "rejected", "published", "deleted"
        filter_query.append(
            {"rec_status": {
                "$in": search_query["filter_status"]
            }})

    # search_terms
    # a
    # and b
    # or c
    # -> or(and(a,b),c)
    # a
    # or b
    # and c
    # -> and(or(a,b),c)

    search_indexes = []
    if "search_terms" in search_query:
        for idx, search_term in enumerate(search_query["search_terms"]):
            # value
            if "value" not in search_term or search_term["value"] is None:
                # useless
                break

            # split value
            values = search_term["value"].replace(",", " ").split()

            # index
            if "index" not in search_term:
                # useless
                raise exceptions.metajsonprc_error(40)
            elif search_term["index"] == "all":
                all_terms = []
                if values:
                    for value in values:
                        all_terms.append(
                            {"rec_id": {
                                "$regex": value,
                                "$options": 'i'
                            }})
                        all_terms.append({
                            "identifiers.value": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append(
                            {"title": {
                                "$regex": value,
                                "$options": 'i'
                            }})
                        all_terms.append(
                            {"title_sub": {
                                "$regex": value,
                                "$options": 'i'
                            }})
                        all_terms.append(
                            {"publishers": {
                                "$regex": value,
                                "$options": 'i'
                            }})
                        all_terms.append({
                            "is_part_ofs.title": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append({
                            "is_part_ofs.is_part_ofs.title": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append({
                            "creators.agent.name_family": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append({
                            "creators.agent.name_given": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append({
                            "creators.agent.name": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append({
                            "creators.agent.title": {
                                "$regex": value,
                                "$options": 'i'
                            }
                        })
                        all_terms.append(
                            {"rec_type": {
                                "$regex": value,
                                "$options": 'i'
                            }})
                    search_indexes.append({"$or": all_terms})

            elif search_term["index"] == "identifier":
                try:
                    obid = ObjectId(search_term["index"])
                    search_indexes.append({"_id": obid})
                except (InvalidId, TypeError):
                    search_indexes.append({
                        "$or": [{
                            "rec_id": {
                                "$regex": search_term["value"],
                                "$options": 'i'
                            }
                        }, {
                            "identifiers.value": {
                                "$regex": search_term["value"],
                                "$options": 'i'
                            }
                        }]
                    })

            elif search_term["index"] == "title":
                title_terms = []
                for value in values:
                    title_terms.append(
                        {"title": {
                            "$regex": value,
                            "$options": 'i'
                        }})
                search_indexes.append({"$and": title_terms})

            elif search_term["index"] == "is_part_of":
                is_part_of_terms = []
                for value in values:
                    is_part_of_terms.append({
                        "is_part_ofs.title": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                    is_part_of_terms.append({
                        "is_part_ofs.is_part_ofs.title": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                search_indexes.append({"$or": is_part_of_terms})

            elif search_term["index"] == "creator":
                creator_terms = []
                for value in values:
                    creator_terms.append({
                        "creators.agent.name_family": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                    creator_terms.append({
                        "creators.agent.name_given": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                    creator_terms.append({
                        "creators.agent.name": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                    creator_terms.append({
                        "creators.agent.title": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                search_indexes.append({"$or": creator_terms})

            elif search_term["index"] == "creator_id":
                search_indexes.append(
                    {"creators.agent.rec_id": search_term["value"]})

            elif search_term["index"] == "affiliation":
                search_indexes.append({
                    "creators.affiliation.name": {
                        "$regex": search_term["value"],
                        "$options": 'i'
                    }
                })

            elif search_term["index"] == "affiliation_id":
                search_indexes.append(
                    {"creators.affiliation.rec_id": search_term["value"]})

            elif search_term["index"] == "publisher":
                publisher_terms = []
                for value in values:
                    publisher_terms.append(
                        {"publishers": {
                            "$regex": value,
                            "$options": 'i'
                        }})
                    publisher_terms.append({
                        "is_part_ofs.publishers": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                    publisher_terms.append({
                        "is_part_ofs.is_part_ofs.publishers": {
                            "$regex": value,
                            "$options": 'i'
                        }
                    })
                search_indexes.append({"$or": publisher_terms})

            elif search_term["index"] == "keyword":
                search_indexes.append({
                    "keywords.value": {
                        "$regex": search_term["value"],
                        "$options": 'i'
                    }
                })

            elif search_term["index"] == "classification":
                search_indexes.append({
                    "classifications.value": {
                        "$regex": search_term["value"],
                        "$options": 'i'
                    }
                })

            elif search_term["index"] == "research_area":
                search_indexes.append({
                    "research_areas.value": {
                        "$regex": search_term["value"],
                        "$options": 'i'
                    }
                })

            elif search_term["index"] == "subject":
                search_indexes.append({
                    "subjects.value": {
                        "$regex": search_term["value"],
                        "$options": 'i'
                    }
                })

            elif search_term["index"] == "set":
                search_indexes.append({
                    "sets.value": {
                        "$regex": search_term["value"],
                        "$options": 'i'
                    }
                })

            # operator
            if "operator" in search_term:
                if search_term["operator"] == "or":
                    pass
                elif search_term["operator"] == "and":
                    pass
                elif search_term["operator"] == "not":
                    pass

    # result_sorts : how to with this index ? ...
    sort = [("title", pymongo.ASCENDING), ("rec_type", pymongo.ASCENDING)]

    # combine filter_query and search_indexes
    mongo_args = filter_query
    mongo_args.extend(search_indexes)

    # Generate the mongo query
    if mongo_args:
        mongo_query = {"$and": mongo_args}
    else:
        # search all
        mongo_query = {}
    logging.debug("mongo_query:")
    logging.debug(jsonbson.dumps_bson(mongo_query, True))

    mongo_response = mongodb[database_name(corpus)][collection].find(
        mongo_query).sort(sort)
    logging.debug(mongo_response)
    if mongo_response:
        records = metajson_service.load_dict_list(mongo_response)
        records_total_count = len(records)
    else:
        records = []
        records_total_count = 0

    search_response["records"] = records
    search_response["records_total_count"] = records_total_count
    search_response["result_batch_size"] = records_total_count
    search_response["result_offset"] = 0
    search_response["search_query"] = search_query

    return search_response
Exemplo n.º 10
0
def search(corpus, search_query):
    if not corpus:
        corpus = default_corpus

    search_response = SearchResponse()

    # empty search_query
    if search_query is None:
        raise exceptions.metajsonprc_error(40)

    # filter_class -> collection
    collection = None
    if "filter_class" not in search_query or search_query["filter_class"] not in ["Document", "Agent", "Person", "OrgUnit", "Event", "Family"]:
        raise exceptions.metajsonprc_error(40)
    elif search_query["filter_class"] == "Document":
        collection = DOCUMENTS
    #elif search_query["filter_class"] in ["Agent", "Person", "OrgUnit", "Event", "Family"]:
    #    collection = AGENTS

    # other filters
    # todo: filter_peer_review, filter_with_full_text, filter_favorite

    filter_query = []
    if "filter_date_end" in search_query:
        filter_date_end = date_service.parse_date(search_query["filter_date_end"])
        filter_query.append({"date_sort": {"$lte": filter_date_end}})
    if "filter_date_begin" in search_query:
        filter_date_begin = date_service.parse_date(search_query["filter_date_begin"])
        filter_query.append({"date_sort": {"$gte": filter_date_begin}})
    if "filter_languages" in search_query:
        filter_query.append({"languages": {"$in": search_query["filter_languages"]}})
    if "filter_types" in search_query:
        filter_query.append({"rec_type": {"$in": search_query["filter_types"]}})
    if "filter_status" in search_query:
        # "private", "pending", "rejected", "published", "deleted"
        filter_query.append({"rec_status": {"$in": search_query["filter_status"]}})

    # search_terms
    # a
    # and b
    # or c
    # -> or(and(a,b),c)
    # a
    # or b
    # and c
    # -> and(or(a,b),c)

    search_indexes = []
    if "search_terms" in search_query:
        for idx, search_term in enumerate(search_query["search_terms"]):
            # value
            if "value" not in search_term or search_term["value"] is None:
                # useless
                break

            # split value
            values = search_term["value"].replace(",", " ").split()
            
            # index
            if "index" not in search_term:
                # useless
                raise exceptions.metajsonprc_error(40)
            elif search_term["index"] == "all":
                all_terms = []
                if values:
                    for value in values:
                        all_terms.append({"rec_id": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"identifiers.value": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"title": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"title_sub": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"publishers": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"is_part_ofs.title": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"is_part_ofs.is_part_ofs.title": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"creators.agent.name_family": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"creators.agent.name_given": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"creators.agent.name": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"creators.agent.title": {"$regex": value, "$options": 'i'}})
                        all_terms.append({"rec_type": {"$regex": value, "$options": 'i'}})
                    search_indexes.append({"$or": all_terms})

            elif search_term["index"] == "identifier":
                try:
                    obid = ObjectId(search_term["index"])
                    search_indexes.append({"_id": obid})
                except (InvalidId, TypeError):
                    search_indexes.append({"$or": [{"rec_id": {"$regex": search_term["value"], "$options": 'i'}}, {"identifiers.value": {"$regex": search_term["value"], "$options": 'i'}}]})

            elif search_term["index"] == "title":
                title_terms = []
                for value in values:
                    title_terms.append({"title": {"$regex": value, "$options": 'i'}})
                search_indexes.append({"$and": title_terms})

            elif search_term["index"] == "is_part_of":
                is_part_of_terms = []
                for value in values:
                    is_part_of_terms.append({"is_part_ofs.title": {"$regex": value, "$options": 'i'}})
                    is_part_of_terms.append({"is_part_ofs.is_part_ofs.title": {"$regex": value, "$options": 'i'}})
                search_indexes.append({"$or": is_part_of_terms})

            elif search_term["index"] == "creator":
                creator_terms = []
                for value in values:
                    creator_terms.append({"creators.agent.name_family": {"$regex": value, "$options": 'i'}})
                    creator_terms.append({"creators.agent.name_given": {"$regex": value, "$options": 'i'}})
                    creator_terms.append({"creators.agent.name": {"$regex": value, "$options": 'i'}})
                    creator_terms.append({"creators.agent.title": {"$regex": value, "$options": 'i'}})
                search_indexes.append({"$or": creator_terms})

            elif search_term["index"] == "creator_id":
                search_indexes.append({"creators.agent.rec_id": search_term["value"]})

            elif search_term["index"] == "affiliation":
                search_indexes.append({"creators.affiliation.name": {"$regex": search_term["value"], "$options": 'i'}})

            elif search_term["index"] == "affiliation_id":
                search_indexes.append({"creators.affiliation.rec_id": search_term["value"]})

            elif search_term["index"] == "publisher":
                publisher_terms = []
                for value in values:
                    publisher_terms.append({"publishers": {"$regex": value, "$options": 'i'}})
                    publisher_terms.append({"is_part_ofs.publishers": {"$regex": value, "$options": 'i'}})
                    publisher_terms.append({"is_part_ofs.is_part_ofs.publishers": {"$regex": value, "$options": 'i'}})
                search_indexes.append({"$or": publisher_terms})

            elif search_term["index"] == "keyword":
                search_indexes.append({"keywords.value": {"$regex": search_term["value"], "$options": 'i'}})

            elif search_term["index"] == "classification":
                search_indexes.append({"classifications.value": {"$regex": search_term["value"], "$options": 'i'}})

            elif search_term["index"] == "research_area":
                search_indexes.append({"research_areas.value": {"$regex": search_term["value"], "$options": 'i'}})

            elif search_term["index"] == "subject":
                search_indexes.append({"subjects.value": {"$regex": search_term["value"], "$options": 'i'}})

            elif search_term["index"] == "set":
                search_indexes.append({"sets.value": {"$regex": search_term["value"], "$options": 'i'}})

            # operator
            if "operator" in search_term:
                if search_term["operator"] == "or":
                    pass
                elif search_term["operator"] == "and":
                    pass
                elif search_term["operator"] == "not":
                    pass

    # result_sorts : how to with this index ? ...
    sort = [("title",pymongo.ASCENDING), ("rec_type",pymongo.ASCENDING)]

    # combine filter_query and search_indexes
    mongo_args = filter_query
    mongo_args.extend(search_indexes)

    # Generate the mongo query
    if mongo_args:
        mongo_query = {"$and": mongo_args}
    else:
        # search all
        mongo_query = {}
    logging.debug("mongo_query:")
    logging.debug(jsonbson.dumps_bson(mongo_query, True))

    mongo_response = mongodb[database_name(corpus)][collection].find(mongo_query).sort(sort)
    logging.debug(mongo_response)
    if mongo_response:
        records = metajson_service.load_dict_list(mongo_response)
        records_total_count = len(records)
    else:
        records = []
        records_total_count = 0

    search_response["records"] = records
    search_response["records_total_count"] = records_total_count
    search_response["result_batch_size"] = records_total_count
    search_response["result_offset"] = 0
    search_response["search_query"] = search_query

    return search_response