def opinion_ids_to_names(opinion_ids: Iterable[str]) -> List[str]:
    op_names = []
    for op_id in opinion_ids:
        if (op_model := Opinion.select().where(
                Opinion.resource_id == op_id).first()) is not None:
            op_names.append(op_model.cluster.case_name)
        else:
            op_names.append("Unknown")
Пример #2
0
 def join_to_clusters(
     base_citation_query: ModelSelect,
 ) -> Tuple[ModelSelect, ModelAlias, ModelAlias]:
     CitingOpinion, CitedOpinion = Opinion.alias(), Opinion.alias()
     CitingCluster, CitedCluster = Cluster.alias(), Cluster.alias()
     return (
         (base_citation_query.join_from(
             Citation, CitingOpinion, on=Citation.citing_opinion).join_from(
                 Citation, CitedOpinion,
                 on=Citation.cited_opinion).join_from(
                     CitingOpinion,
                     CitingCluster).join_from(CitedOpinion, CitedCluster)),
         CitingCluster,
         CitedCluster,
     )
Пример #3
0
def get_case_html(resource_id: int):
    try:
        opinion = Opinion.get(resource_id=resource_id)
        if not opinion.html_text:
            raise FileNotFoundError()
        return opinion.html_text
    except (Opinion.DoesNotExist, FileNotFoundError):
        abort(HTTPStatus.NOT_FOUND)
Пример #4
0
 def search_cases(query, max_cases=25):
     search_text = CaseSearch.prepare_query(query)
     return (Opinion.select(
         Opinion,
         fn.ts_headline(Cluster.case_display_name(),
                        fn.to_tsquery(search_text)).alias("headline"),
     ).join(Cluster).where(
         ts_match(Cluster.searchable_case_name,
                  fn.to_tsquery(search_text))).order_by(
                      Cluster.citation_count.desc()).limit(max_cases))
Пример #5
0
def nearest(resource_id):
    # Both scipy.spatial.kdtrees take too mcuh memory
    global citing
    global mat
    vec = mat[np.where(citing == resource_id)[0][0]]
    mat -= vec
    mat = np.linalg.norm(mat, axis=1)
    idx = np.argsort(mat)
    names = []
    for i, id_ in enumerate(idx[:50]):
        opinion = Opinion.get(Opinion.resource_id == citing[id_])
        names.append("{}: {}".format(i, opinion.cluster.case_name))
    return names
Пример #6
0
def get_case_clusters():
    case_resource_ids = [int(c) for c in request.args.getlist("cases")]
    num_clusters = int(request.args.get("num_clusters") or 0) or None
    if len(case_resource_ids) < 1:
        return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY
    clusters = clustering.spectral_cluster(set(case_resource_ids),
                                           num_clusters=num_clusters)
    output_dict = {}
    for cluster_name, opinion_ids in clusters.items():
        opinion_models = Opinion.select().where(
            Opinion.resource_id << opinion_ids)
        output_dict[str(cluster_name)] = model_list_to_dicts(opinion_models)
    return output_dict
Пример #7
0
def ingest_opinion_data(opinions_dir):
    opinion_records = []
    directory = os.fsencode(opinions_dir)
    for file in os.listdir(directory):
        try:
            filename = os.fsdecode(file)
            if filename.endswith(".json"):
                file_path = os.path.join(opinions_dir, filename)
                with open(file_path, encoding="utf8") as json_file:
                    opinion_data = json.load(json_file)
                    cluster_uri = opinion_data["cluster"]
                    cluster_id = int(cluster_uri.split("/")[-2])
                    new_record = Opinion(
                        resource_id=opinion_data["id"],
                        opinion_uri=opinion_data["resource_uri"],
                        cluster_uri=cluster_uri,
                        cluster=cluster_id,
                    )
                    opinion_records.append(new_record)
        except:
            print(f"Failure on file {file}")
    with db.atomic():
        Opinion.bulk_create(opinion_records, batch_size=100)
Пример #8
0
def get_recommended_cases():
    case_resource_ids = frozenset(map(int, request.args.getlist("cases")))
    court_ids = frozenset(map(str, request.args.getlist("courts")))
    max_cases = int(request.args.get("max_cases") or 10)
    if len(case_resource_ids) < 1:
        return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY
    recommendations = recommendation.recommendations(case_resource_ids,
                                                     max_cases,
                                                     courts=court_ids)
    recommended_opinions = sorted(
        Opinion.select().join(Cluster).where(
            Opinion.resource_id << list(recommendations.keys())),
        key=lambda op: recommendations[op.resource_id],
        reverse=True,
    )
    return model_list_to_json(recommended_opinions)
Пример #9
0
def ingest_citation_data(citations_file):
    # Since there's only ~65,000 opinions, it's feasible to just load all the IDs into memory to avoid making
    # millions of DB queries.
    opinion_set = {o.resource_id for o in Opinion.select()}

    citation_records = []
    with open(citations_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=",")
        for row in csv_reader:
            try:
                integer_row = [int(cell) for cell in row]
                if integer_row[0] in opinion_set and integer_row[1] in opinion_set:
                    new_record = Citation(
                        citing_opinion=integer_row[0],
                        cited_opinion=integer_row[1],
                        depth=integer_row[2],
                    )
                    citation_records.append(new_record)
            except Exception as e:
                print(f"Failure on row {row}: {e}")
        with db.atomic():
            Citation.bulk_create(citation_records, batch_size=100)
import networkx as nx
from db.peewee.models import db, Opinion
from graph.citation_network import CitationNetwork

citation_graph = CitationNetwork.construct_network()

centrality = nx.eigenvector_centrality_numpy(citation_graph)
top_opinions = [
    opinion_id for opinion_id, centrality_score in sorted(
        centrality.items(), key=lambda item: item[1], reverse=True)
][:100]

db.connect()
output_str = ""
for i, opinion_id in enumerate(top_opinions):
    try:
        opinion = Opinion.get(Opinion.resource_id == opinion_id)
        output_str += f"{i + 1}: {opinion.resource_id}, {opinion.cluster.case_name}\n"
    except:
        pass
print(output_str)
Пример #11
0
def get_case(resource_id: int):
    try:
        opinion = Opinion.get(resource_id=resource_id)
        return model_to_dict(opinion, **DEFAULT_SERIALIZATION_ARGS)
    except Opinion.DoesNotExist:
        abort(HTTPStatus.NOT_FOUND)