def get_subset(collection):
    #Demo pulling out a subset of records from collection
    #Using pandas data feame
    #Get all files with cell type Xenotransplanted microglia
    keys = dataset.keys(collection)
    dot_paths = [".cell_source", ".species", ".tissue", "._Key"]
    (grid, err) = dataset.grid(collection, keys, dot_paths)
    if err != "":
        print(err)
        exit()
    df = pd.DataFrame(np.array(grid),
                      columns=["source", "species", "tissue", "key"])
    grouped = df.groupby(["source"])
    print(grouped.groups.keys())
    records = grouped.get_group('Xenotransplanted microglia')
    for index, r in records.iterrows():
        print('getting files for ', r['key'])
        err = dataset.detach(collection, r['key'], [])
        if err != '':
            print(err)

    #Example doing the same thing with frames
    labels = ["source", "species", "tissue", "key"]
    f, err = dataset.frame(collection, 'frame_name', keys, dot_paths, labels)
    if err != "":
        print(err)
    records = dataset.frame_objects(collection, 'frame_name')
    for record in records:
        if record['source'] == 'Xenotransplanted microglia':
            print('getting files for ', record['key'])
            err = dataset.detach(collection, record['key'], [])
            if err != '':
                print(err)
Exemplo n.º 2
0
def test_issue12(t, c_name):
    src = '''[
{"id": "1", "c1": 1, "c2": 2, "c3": 3 },
{"id": "2", "c1": 2, "c2": 2, "c3": 3 },
{"id": "3", "c1": 3, "c2": 3, "c3": 3 },
{"id": "4", "c1": 1, "c2": 1, "c3": 1 },
{"id": "5", "c1": 6, "c2": 6, "c3": 6 }
]'''
    #dataset.verbose_on() # DEBUG
    #dataset.use_strict_dotpath(True) # DEBUG
    if dataset.status(c_name) == False:
        if not dataset.init(c_name):
            err = dataset.error_message()
            t.error(f'failed to create {c_name}')
            return
    objects = json.loads(src)
    for obj in objects:
        key = obj['id']
        if dataset.has_key(c_name, key):
            dataset.update(c_name, key, obj)
        else:
            dataset.create(c_name, key, obj)
    f_names = dataset.frames(c_name)
    for f_name in f_names:
        ok = dataset.delete_frame(c_name, f_name)
        if ok == False:
            err = dataset.error_message()
            t.error(f'Failed to delete {f_name} from {c_name} -> "{err}"')
            return
        if dataset.has_frame(c_name, f_name) == True:
            t.error(
                f'Failed to delete frame {c_name} from {c_name}, frame still exists'
            )
            return
    f_name = 'issue12'
    dot_paths = [".c1", "c3"]
    labels = [".col1", ".col3"]
    keys = dataset.keys(c_name)
    if not dataset.frame_create(c_name, f_name, keys, dot_paths, labels):
        err = dataset.error_message()
        t.error(f'failed to create {f_name} from {c_name}, {err}')
    if not dataset.has_frame(c_name, f_name):
        err = dataset.error_message()
        t.error(f'expected frame {f_name} to exists, {err}')
        return
    f_keys = dataset.frame_keys(c_name, f_name)
    if len(f_keys) == 0:
        err = dataset.error_message()
        t.error(f'expected keys in {f_name}, got zero, {err}')
        return
    f_objects = dataset.frame_objects(c_name, f_name)
    if len(f_objects) == 0:
        err = dataset.error_message()
        t.error(f'expected objects in {f_name}, got zero, {err}')
        return
    if not dataset.delete_frame(c_name, f_name):
        err = dataset.error_message()
        t.error(f'expected to delete {f_name} in {c_name}, {err}')
Exemplo n.º 3
0
def get_records(dot_paths, f_name, d_name, keys, labels=None, clear=True):
    if dataset.has_frame(d_name, f_name):
        if clear:
            dataset.delete_frame(d_name, f_name)
        else:
            dataset.frame_refresh(d_name, f_name)
            return dataset.frame_objects(d_name, f_name)
    if labels:
        if not dataset.frame_create(d_name, f_name, keys, dot_paths, labels):
            err = dataset.error_message()
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    else:
        # If labels arn't provided, just base on dot path
        labels = []
        for d in dot_paths:
            labels.append(d.split(".")[-1])
        if not dataset.frame_create(d_name, f_name, keys, dot_paths, labels):
            err = dataset.error_message()
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    return dataset.frame_objects(d_name, f_name)
def get_records(dot_paths, f_name, d_name, keys, labels=None):
    if dataset.has_frame(d_name, f_name):
        dataset.delete_frame(d_name, f_name)
    if labels:
        f, err = dataset.frame(d_name, f_name, keys, dot_paths, labels)
        if err != "":
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    else:
        # If labels arn't provided, just base on dot path
        labels = []
        for d in dot_paths:
            labels.append(d.split(".")[-1])
        f, err = dataset.frame(d_name, f_name, keys, dot_paths, labels)
        if err != "":
            print(f"ERROR: Can't create {f_name} in {d_name}, {err}")
    return dataset.frame_objects(d_name, f_name)
Exemplo n.º 5
0
def test_frame_objects(t, c_name):
    if dataset.status(c_name) == True:
        dataset.close(c_name)
        if os.path.exists(c_name):
            shutil.rmtree(c_name)
    if dataset.init(c_name) == False:
        err = dataset.error_message()
        t.error(f'init({c_name}), {err}')
        return
    data = [{
        "id":
        "A",
        "nameIdentifiers": [{
            "nameIdentifier": "0000-000X-XXXX-XXXX",
            "nameIdentifierScheme": "ORCID",
            "schemeURI": "http://orcid.org/"
        }, {
            "nameIdentifier": "H-XXXX-XXXX",
            "nameIdentifierScheme": "ResearcherID",
            "schemeURI": "http://www.researcherid.com/rid/"
        }],
        "two":
        22,
        "three":
        3.0,
        "four": ["one", "two", "three"]
    }, {
        "id": "B",
        "two": 2000,
        "three": 3000.1
    }, {
        "id": "C"
    }, {
        "id":
        "D",
        "nameIdentifiers": [{
            "nameIdentifier": "0000-000X-XXXX-XXXX",
            "nameIdentifierScheme": "ORCID",
            "schemeURI": "http://orcid.org/"
        }],
        "two":
        20,
        "three":
        334.1,
        "four": []
    }]
    keys = []
    dot_paths = [
        "._Key", ".nameIdentifiers", ".nameIdentifiers[:].nameIdentifier",
        ".two", ".three", ".four"
    ]
    labels = [
        "id", "nameIdentifiers", "nameIdentifier", "two", "three", "four"
    ]
    for row in data:
        key = row['id']
        keys.append(key)
        err = dataset.create(c_name, key, row)
    f_name = 'f1'
    if dataset.frame_create(c_name, f_name, keys, dot_paths, labels) == False:
        err = dataset.error_message()
        t.error(
            f'frame_create({c_name}, {f_name}, {keys}, {dot_paths}, {labels}), {err}'
        )
        return
    f_keys = dataset.frame_keys(c_name, f_name)
    if len(f_keys) != len(keys):
        t.error(f'expected {len(keys)}, got {len(f_keys)}')
    if dataset.frame_refresh(c_name, f_name) == False:
        err = dataset.error_message()
        t.error(f'frame_reframe({c_name}, {f_name}), {err}')
    l = dataset.frames(c_name)
    if len(l) != 1 or l[0] != 'f1':
        t.error(f"expected one frame name, f1, got {l}")
    object_result = dataset.frame_objects(c_name, f_name)
    if len(object_result) != 4:
        t.error(
            f'Did not get correct number of objects back, expected 4 got {len(object_result)}, {object_result}'
        )
    count_nameId = 0
    count_nameIdObj = 0
    for obj in object_result:
        if 'id' not in obj:
            t.error('Did not get id in object')
        if 'nameIdentifiers' in obj:
            count_nameId += 1
            for idv in obj['nameIdentifiers']:
                if 'nameIdentifier' not in idv:
                    t.error('Missing part of object')
        if 'nameIdentifier' in obj:
            count_nameIdObj += 1
            if "0000-000X-XXXX-XXXX" not in obj['nameIdentifier']:
                t.error('Missing object in complex dot path')
    if count_nameId != 2:
        t.error(
            f"Incorrect number of nameIdentifiers elements, expected 2, got {count_nameId}"
        )
    if count_nameIdObj != 2:
        t.error(
            f"Incorrect number of nameIdentifier elements, expected 2, got {count_nameIdObj}"
        )
    if dataset.delete_frame(c_name, f_name) == False:
        err = dataset.error_message()
        t.error(f'delete_frame({c_name}, {f_name}), {err}')