Exemplo n.º 1
0
def test_issue12(t, c_name):
    src = '''[
{"id": "1", "c1": 1, "c2": 2, "c3": 3 },
{"id": "2", "c1": 2, "c2": 2, "c3": 3 },
{"id": "3", "c1": 3, "c2": 3, "c3": 3 },
{"id": "4", "c1": 1, "c2": 1, "c3": 1 },
{"id": "5", "c1": 6, "c2": 6, "c3": 6 }
]'''
    #dataset.verbose_on() # DEBUG
    #dataset.use_strict_dotpath(True) # DEBUG
    if dataset.status(c_name) == False:
        if not dataset.init(c_name):
            err = dataset.error_message()
            t.error(f'failed to create {c_name}')
            return
    objects = json.loads(src)
    for obj in objects:
        key = obj['id']
        if dataset.has_key(c_name, key):
            dataset.update(c_name, key, obj)
        else:
            dataset.create(c_name, key, obj)
    f_names = dataset.frames(c_name)
    for f_name in f_names:
        ok = dataset.delete_frame(c_name, f_name)
        if ok == False:
            err = dataset.error_message()
            t.error(f'Failed to delete {f_name} from {c_name} -> "{err}"')
            return
        if dataset.has_frame(c_name, f_name) == True:
            t.error(
                f'Failed to delete frame {c_name} from {c_name}, frame still exists'
            )
            return
    f_name = 'issue12'
    dot_paths = [".c1", "c3"]
    labels = [".col1", ".col3"]
    keys = dataset.keys(c_name)
    if not dataset.frame_create(c_name, f_name, keys, dot_paths, labels):
        err = dataset.error_message()
        t.error(f'failed to create {f_name} from {c_name}, {err}')
    if not dataset.has_frame(c_name, f_name):
        err = dataset.error_message()
        t.error(f'expected frame {f_name} to exists, {err}')
        return
    f_keys = dataset.frame_keys(c_name, f_name)
    if len(f_keys) == 0:
        err = dataset.error_message()
        t.error(f'expected keys in {f_name}, got zero, {err}')
        return
    f_objects = dataset.frame_objects(c_name, f_name)
    if len(f_objects) == 0:
        err = dataset.error_message()
        t.error(f'expected objects in {f_name}, got zero, {err}')
        return
    if not dataset.delete_frame(c_name, f_name):
        err = dataset.error_message()
        t.error(f'expected to delete {f_name} in {c_name}, {err}')
def get_records(dot_paths, f_name, d_name, keys, labels=None):
    if dataset.has_frame(d_name, f_name):
        dataset.delete_frame(d_name, f_name)
    if labels:
        f, err = dataset.frame(d_name, f_name, keys, dot_paths, labels)
        if err != "":
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    else:
        # If labels arn't provided, just base on dot path
        labels = []
        for d in dot_paths:
            labels.append(d.split(".")[-1])
        f, err = dataset.frame(d_name, f_name, keys, dot_paths, labels)
        if err != "":
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    return dataset.frame_objects(d_name, f_name)
Exemplo n.º 3
0
def get_records(dot_paths, f_name, d_name, keys, labels=None, clear=True):
    if dataset.has_frame(d_name, f_name):
        if clear:
            dataset.delete_frame(d_name, f_name)
        else:
            dataset.frame_refresh(d_name, f_name)
            return dataset.frame_objects(d_name, f_name)
    if labels:
        if not dataset.frame_create(d_name, f_name, keys, dot_paths, labels):
            err = dataset.error_message()
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    else:
        # If labels arn't provided, just base on dot path
        labels = []
        for d in dot_paths:
            labels.append(d.split(".")[-1])
        if not dataset.frame_create(d_name, f_name, keys, dot_paths, labels):
            err = dataset.error_message()
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    return dataset.frame_objects(d_name, f_name)
            if identifiers['type'] == 'xref_doi':
                link = 'https://doi.org/' + idv['value']
            elif identifiers['type'] == 'doi':
                link = 'https://doi.org/' + idv['value']
            else:
                link = idv['value']

        record =\
                {'id':uid,'title':title,'journal':journal,'authors':author_list,'identifiers':identifier_list,'affiliations':affiliation_list,'link':link,'year':publication_date.year}

        dataset.create(collection, link, record)

#Export to Google Sheet
os.environ['GOOGLE_CLIENT_SECRET_JSON'] = "/etc/client_secret.json"

#Google sheet ID for output
sheet_name = "Sheet1"
sheet_range = "A1:CZ"
f_name = 'f_name'
export_list = [".link", ".title", ".journal", ".year"]
title_list = ["link", "title", "journal", "year"]
keys = dataset.keys(collection)
if dataset.has_frame(collection, f_name):
    dataset.delete_frame(collection, f_name)
frame, err = dataset.frame(collection, f_name, keys, export_list, title_list)
if err != '':
    print(err)
err = dataset.export_gsheet(collection, f_name, sheet, sheet_name, sheet_range)
if err != '':
    print(err)
Exemplo n.º 5
0
        if subject.links not in dupe.links:
            dupe.links += subject.links

print("Total collaborators: ", len(deduped))

collab = 'collaborators.ds'

subprocess.run(['rm', '-rf', collab])
dataset.init(collab)
for d in deduped:
    dataset.create(collab, d.ca_id, d.write())
#Export to Google Sheet
os.environ['GOOGLE_CLIENT_SECRET_JSON'] = "/etc/client_secret.json"

#Google sheet ID for output
f_name = 'frm'
sheet_name = "Sheet1"
sheet_range = "A1:CZ"
export_list = [".names", ".years", ".affiliations", ".links"]
title_list = ["name", "years", "affiliations", "links"]
keys = dataset.keys(collab)
if dataset.has_frame(collab, f_name):
    dataset.delete_frame(collab, f_name)
frame, err = dataset.frame(collab, f_name, keys, export_list, title_list)
if err != '':
    print(err)
err = dataset.export_gsheet(collab, f_name, output_sheet, sheet_name,
                            sheet_range)
if err != '':
    print(err)
Exemplo n.º 6
0
def match_cd_refs():
    token = os.environ["TINDTOK"]

    matches = []
    collection = "caltechdata.ds"
    keys = dataset.keys(collection)
    if "mediaupdate" in keys:
        keys.remove("mediaupdate")

    # Get event data results
    event_data = "crossref_refs.ds"
    event_keys = dataset.keys(event_data)
    event_keys.remove("captured")
    f_name = "match_cd_refs"
    dot_paths = [".obj_id", ".id", ".subj_id"]
    labels = ["obj_id", "id", "subj_id"]
    print("Getting Event Data Records")
    if dataset.has_frame(event_data, f_name):
        if not dataset.frame_reframe(event_data, f_name, event_keys):
            err = dataset.error_message()
            print(f"Failed to reframe {f_name} in {event_data}, {err}")
            exit()
    elif not dataset.frame_create(event_data, f_name, event_keys, dot_paths, labels):
        err = dataset.error_message()
        print(f"Failed to create frame {f_name} in {event_data}, {err}")
        exit()
    grid = dataset.frame_grid(event_data, f_name)
    df = pd.DataFrame(np.array(grid), columns=["obj_id", "id", "subj_id"])
    grouped = df.groupby(["obj_id"])
    groups = grouped.groups
    # Look at all CaltechDATA records
    for k in keys:
        # Collect matched new links for the record
        record_matches = []
        print(k)
        metadata, err = dataset.read(collection, k)
        if err != "":
            print(f"Unexpected error on read: {err}")
        doi = "https://doi.org/" + metadata["identifier"]["identifier"]
        if doi in groups:
            hits = grouped.get_group(doi)
            for index, h in hits.iterrows():
                # Trigger for whether we already have this link
                new = True
                if "relatedIdentifiers" in metadata:
                    for m in metadata["relatedIdentifiers"]:
                        if m["relatedIdentifier"] in h["subj_id"]:
                            new = False
                if new == True:
                    match = h["subj_id"]
                    print(match)
                    print(h["obj_id"])
                    inputv = input("Do you approve this link?  Type Y or N: ")
                    if inputv == "Y":
                        record_matches.append(match)
        # If we have to update record
        if len(record_matches) > 0:
            ids = []
            if "relatedIdentifiers" in metadata:
                for m in metadata["relatedIdentifiers"]:
                    ids.append(m)
            matches.append([k, record_matches])
            # Now collect identifiers for record
            for match in record_matches:
                split = match.split("doi.org/")
                new_id = {
                    "relatedIdentifier": split[1],
                    "relatedIdentifierType": "DOI",
                    "relationType": "IsCitedBy",
                }
                ids.append(new_id)
            newmetadata = {"relatedIdentifiers": ids}
            response = caltechdata_edit(token, k, newmetadata, {}, {}, True)
            print(response)
    return matches