예제 #1
0
if not input_vds_path.endswith(".vds"):
    p.error("Input must be a .vds")

input_vds_path_prefix = input_vds_path.replace(".vds", "")

logger.info("\n==> create HailContext")
hc = hail.HailContext(log="/hail.log")

logger.info("\n==> import vds: " + input_vds_path)
vds = hc.read(input_vds_path)

parallel_computed_annotation_exprs = [
    "va.variantId = %s" % get_expr_for_variant_id(),
    
    "va.contig = %s" % get_expr_for_contig(),
    "va.start = %s" % get_expr_for_start_pos(),
    "va.pos = %s" % get_expr_for_start_pos(),
    "va.end = %s" % get_expr_for_end_pos(),
    "va.ref = %s" % get_expr_for_ref_allele(),
    "va.alt = %s" % get_expr_for_alt_allele(),
    
    "va.xpos = %s" % get_expr_for_xpos(pos_field="start"),
    "va.xstart = %s" % get_expr_for_xpos(pos_field="start"),
]

serial_computed_annotation_exprs = [
    "va.xstop = %s" % get_expr_for_xpos(field_prefix="va.", pos_field="end"),
]
vds = vds.annotate_variants_expr(parallel_computed_annotation_exprs)
vds = vds.annotate_variants_expr(serial_computed_annotation_exprs)
pprint(kt_annotations.schema)
pprint(kt_rare_variants.schema)

ES_HOST_IP = '10.4.0.13'
ES_HOST_PORT = 9200

print("======== Export to elasticsearch ======")
es = ElasticsearchClient(
    host=ES_HOST_IP,
    port=ES_HOST_PORT,
)

annotation_expressions = [
    'variant_id = %s' % get_expr_for_variant_id(),
    'chrom = %s' % get_expr_for_contig(),
    'pos = %s' % get_expr_for_start_pos(),
    "xpos = %s" % get_expr_for_xpos(field_prefix="", pos_field="pos"),
]

for expression in annotation_expressions:
    kt_rare_variants = kt_rare_variants.annotate(expression)

kt_rare_variants = kt_rare_variants.drop(['v'])

kt_annotations = kt_annotations.annotate('variantId = %s' % get_expr_for_variant_id()).drop(['v'])

kt_rare_variants = kt_rare_variants.key_by('variantId').join(kt_annotations.key_by('variantId'))

pprint(kt_rare_variants.schema)

es.export_kt_to_elasticsearch(