async def process_phenopacket(source_id, pheno_packet): all_paths = set(create_all_paths(pheno_packet)) for p in all_paths: values = list(set(collect_value_from_path(p, pheno_packet))) filter(lambda x: x == True, values) if values: eav_id = json.dumps(p) queryAttr = insert(eav_attributes).values( id=eav_id, source_id=source_id, eav_attribute= p, ).on_conflict_do_nothing() await database.execute(query=queryAttr) buf = [{'eav_id':eav_id, 'value':v} for v in values] engine.execute(eav_values.insert(), buf) query = eavs.insert().values( source_id=source_id, subject_id=pheno_packet['id'], data=pheno_packet) await database.execute(query=query) clean_up_eav_values() await processing_done(source_id)
def clean_up_eav_values(): statement = text("""DELETE FROM eav_values WHERE id IN (SELECT id FROM (SELECT id, ROW_NUMBER() OVER( PARTITION BY eav_id, value ORDER BY id ) AS row_num FROM eav_values) t WHERE t.row_num > 1) or value = '';""") engine.execute(statement)
async def get_attributes_vals(id: str, attribute: dict): attr_id = json.dumps(attribute['attribute']) stm = """select array_agg(x.value) from (select value from eav_values where eav_id = :id""" # if payload.string: # stm = stm + ' and value like :str' # if payload.limit: # stm = stm + ' limit :limit' # if payload.offset: # stm = stm + ' offset :offset' stm = stm + ') x' print(attr_id) rs = engine.execute(text(stm), id=attr_id) return list(rs)[0]['array_agg']
async def get_attributes_vals(id: str, payload: AttributeValues): attr_id = json.dumps(payload.attribute) string = '%' + payload.string + '%' if payload.string else '' stm = """select array_agg(x.value) from (select value from eav_values where eav_id = :id""" if payload.string: stm = stm + ' and value like :str' if payload.limit: stm = stm + ' limit :limit' if payload.offset: stm = stm + ' offset :offset' stm = stm + ') x' rs = engine.execute(text(stm), id=attr_id, limit=payload.limit, offset=payload.offset, str=string) return list(rs)[0]['array_agg']
def init_db(ctx): print("Creating all resources.") Base.metadata.create_all() engine.execute("insert into widget values (1, 'hey', 'there');") print(engine.execute("select * from widget;"))
async def process_old(source_id, file_name, empty_delim, eav_types): start = time.time() count = 0 max_count = 10000000 buf = [] bufVal = [] eav_attrs = set() eav_attr_vals = {} for v in VCF.lines(file_name): count = count + 1 if (count > max_count): break d_v = dict(v) data = {} subject_id = d_v['ID'] if 'ID' in d_v.keys( ) and d_v['ID'] is not None else count for key, value in d_v.items(): if value: attr = {} attr[key] = eav_types[key] attr = map_(attr, lambda x: x.__name__) eav_id = json.dumps(attr) if eav_id not in eav_attr_vals: eav_attr_vals[eav_id] = set() if eav_id not in eav_attrs: eav_attrs.add(eav_id) queryAttr = insert(eav_attributes).values( id=eav_id, source_id=source_id, eav_attribute=attr, ).on_conflict_do_nothing() await database.execute(query=queryAttr) if type(eav_types[key]) is list and type(value) is list: for v in value: if v not in eav_attr_vals[eav_id]: eav_attr_vals[eav_id].add(v) bufVal.append({'eav_id': eav_id, 'value': v}) data[key] = [ cast_(eav_types[key])(x) for x in value if x not in empty_delim ] elif type(eav_types[key]) is list and type( value) is not list and value not in empty_delim: if value not in eav_attr_vals[eav_id]: eav_attr_vals[eav_id].add(value) bufVal.append({'eav_id': eav_id, 'value': value}) data[key] = [cast_(eav_types[key])(value)] elif value not in empty_delim: if value not in eav_attr_vals[eav_id]: eav_attr_vals[eav_id].add(value) bufVal.append({'eav_id': eav_id, 'value': value}) data[key] = cast_(eav_types[key])(value) buf.append({ 'source_id': source_id, 'subject_id': subject_id, 'data': data }) if len(bufVal) > 1000: print("got here...bufVal") engine.execute(eav_values.insert(), bufVal) bufVal = [] if len(buf) > 1000: print("got here...") engine.execute(eavs.insert(), buf) end = time.time() buf = [] print("per second: ", 1000 / (end - start)) start = end engine.execute(eavs.insert(), buf) engine.execute(eav_values.insert(), bufVal)