async def process_phenopacket(source_id, pheno_packet):
    all_paths = set(create_all_paths(pheno_packet))

    for p in all_paths:
        values = list(set(collect_value_from_path(p, pheno_packet)))
        filter(lambda x: x == True, values)
        if values:
            eav_id = json.dumps(p)
            queryAttr = insert(eav_attributes).values(
                    id=eav_id,
                    source_id=source_id,
                    eav_attribute= p,
                ).on_conflict_do_nothing()
            await database.execute(query=queryAttr)

            buf = [{'eav_id':eav_id, 'value':v} for v in values]
            engine.execute(eav_values.insert(), buf)

    query = eavs.insert().values(
        source_id=source_id,
        subject_id=pheno_packet['id'],
        data=pheno_packet)
    await database.execute(query=query)


    clean_up_eav_values()
    await processing_done(source_id)
Exemplo n.º 2
0
def clean_up_eav_values():
    statement = text("""DELETE FROM eav_values WHERE id IN
        (SELECT id FROM 
            (SELECT id,
             ROW_NUMBER() OVER( PARTITION BY eav_id, value ORDER BY id ) AS row_num
            FROM eav_values) t
            WHERE t.row_num > 1) or value = '';""")
    engine.execute(statement)
Exemplo n.º 3
0
async def get_attributes_vals(id: str, attribute: dict):
    attr_id = json.dumps(attribute['attribute'])

    stm = """select array_agg(x.value) 
              from (select value from eav_values where eav_id = :id"""
    # if payload.string:
    #     stm = stm + ' and value like :str'
    # if payload.limit:
    #     stm = stm + ' limit :limit'
    # if payload.offset:
    #     stm = stm + ' offset :offset'
    stm = stm + ') x'

    print(attr_id)

    rs = engine.execute(text(stm), id=attr_id)

    return list(rs)[0]['array_agg']
Exemplo n.º 4
0
async def get_attributes_vals(id: str, payload: AttributeValues):
    attr_id = json.dumps(payload.attribute)

    string = '%' + payload.string + '%' if payload.string else ''

    stm = """select array_agg(x.value) 
              from (select value from eav_values where eav_id = :id"""
    if payload.string:
        stm = stm + ' and value like :str'
    if payload.limit:
        stm = stm + ' limit :limit'
    if payload.offset:
        stm = stm + ' offset :offset'
    stm = stm + ') x'

    rs = engine.execute(text(stm),
                        id=attr_id,
                        limit=payload.limit,
                        offset=payload.offset,
                        str=string)

    return list(rs)[0]['array_agg']
Exemplo n.º 5
0
def init_db(ctx):
    print("Creating all resources.")

    Base.metadata.create_all()
    engine.execute("insert into widget values (1, 'hey', 'there');")
    print(engine.execute("select * from widget;"))
Exemplo n.º 6
0
async def process_old(source_id, file_name, empty_delim, eav_types):
    start = time.time()
    count = 0
    max_count = 10000000
    buf = []
    bufVal = []
    eav_attrs = set()
    eav_attr_vals = {}

    for v in VCF.lines(file_name):
        count = count + 1
        if (count > max_count):
            break
        d_v = dict(v)
        data = {}

        subject_id = d_v['ID'] if 'ID' in d_v.keys(
        ) and d_v['ID'] is not None else count

        for key, value in d_v.items():
            if value:

                attr = {}
                attr[key] = eav_types[key]
                attr = map_(attr, lambda x: x.__name__)
                eav_id = json.dumps(attr)
                if eav_id not in eav_attr_vals:
                    eav_attr_vals[eav_id] = set()

                if eav_id not in eav_attrs:
                    eav_attrs.add(eav_id)

                    queryAttr = insert(eav_attributes).values(
                        id=eav_id,
                        source_id=source_id,
                        eav_attribute=attr,
                    ).on_conflict_do_nothing()
                    await database.execute(query=queryAttr)

                if type(eav_types[key]) is list and type(value) is list:
                    for v in value:
                        if v not in eav_attr_vals[eav_id]:
                            eav_attr_vals[eav_id].add(v)

                            bufVal.append({'eav_id': eav_id, 'value': v})

                    data[key] = [
                        cast_(eav_types[key])(x) for x in value
                        if x not in empty_delim
                    ]

                elif type(eav_types[key]) is list and type(
                        value) is not list and value not in empty_delim:
                    if value not in eav_attr_vals[eav_id]:
                        eav_attr_vals[eav_id].add(value)
                        bufVal.append({'eav_id': eav_id, 'value': value})

                    data[key] = [cast_(eav_types[key])(value)]
                elif value not in empty_delim:
                    if value not in eav_attr_vals[eav_id]:
                        eav_attr_vals[eav_id].add(value)
                        bufVal.append({'eav_id': eav_id, 'value': value})

                    data[key] = cast_(eav_types[key])(value)

        buf.append({
            'source_id': source_id,
            'subject_id': subject_id,
            'data': data
        })

        if len(bufVal) > 1000:
            print("got here...bufVal")
            engine.execute(eav_values.insert(), bufVal)
            bufVal = []

        if len(buf) > 1000:
            print("got here...")
            engine.execute(eavs.insert(), buf)
            end = time.time()
            buf = []
            print("per second: ", 1000 / (end - start))
            start = end

    engine.execute(eavs.insert(), buf)
    engine.execute(eav_values.insert(), bufVal)