예제 #1
0
input_vds_path = str(args.input_vds).rstrip("/")
if not input_vds_path.endswith(".vds"):
    p.error("Input must be a .vds")

input_vds_path_prefix = input_vds_path.replace(".vds", "")

logger.info("\n==> create HailContext")
hc = hail.HailContext(log="/hail.log")

logger.info("\n==> import vds: " + input_vds_path)
vds = hc.read(input_vds_path)

parallel_computed_annotation_exprs = [
    "va.variantId = %s" % get_expr_for_variant_id(),
    
    "va.contig = %s" % get_expr_for_contig(),
    "va.start = %s" % get_expr_for_start_pos(),
    "va.pos = %s" % get_expr_for_start_pos(),
    "va.end = %s" % get_expr_for_end_pos(),
    "va.ref = %s" % get_expr_for_ref_allele(),
    "va.alt = %s" % get_expr_for_alt_allele(),
    
    "va.xpos = %s" % get_expr_for_xpos(pos_field="start"),
    "va.xstart = %s" % get_expr_for_xpos(pos_field="start"),
]

serial_computed_annotation_exprs = [
    "va.xstop = %s" % get_expr_for_xpos(field_prefix="va.", pos_field="end"),
]
vds = vds.annotate_variants_expr(parallel_computed_annotation_exprs)
vds = vds.annotate_variants_expr(serial_computed_annotation_exprs)
예제 #2
0
         NCC: Int,
         NEGATIVE_TRAIN_SITE: Boolean,
         POSITIVE_TRAIN_SITE: Boolean,
         QD: Double,
         ReadPosRankSum: Double,
         SOR: Double,
         VQSLOD: Double,
         culprit: String,
         AC_Hom: Array[Int],
         AC_Het: Array[Int],
         AC_Hemi: Array[Int],
    """
}

vds_computed_annotations_exprs = [
    "va.chrom = %s" % get_expr_for_contig(),
    "va.pos = %s" % get_expr_for_start_pos(),
    "va.ref = %s" % get_expr_for_ref_allele(),
    "va.alt = %s" % get_expr_for_alt_allele(),
    "va.xpos = %s" % get_expr_for_xpos(),

    "va.variantId = %s" % get_expr_for_variant_id(),
    "va.originalAltAlleles = %s" % get_expr_for_orig_alt_alleles_set(),
    "va.geneIds = %s" % get_expr_for_vep_gene_ids_set(),
    "va.transcriptIds = %s" % get_expr_for_vep_transcript_ids_set(),
    "va.transcriptConsequenceTerms = %s" % get_expr_for_vep_consequence_terms_set(),
    "va.sortedTranscriptConsequences = %s" % get_expr_for_vep_sorted_transcript_consequences_array(),
    "va.mainTranscript = %s" % get_expr_for_worst_transcript_consequence_annotations_struct("va.sortedTranscriptConsequences"),
    "va.sortedTranscriptConsequences = json(va.sortedTranscriptConsequences)"
]
pprint(kt_pop.schema)
pprint(kt_annotations.schema)
pprint(kt_rare_variants.schema)

ES_HOST_IP = '10.4.0.13'
ES_HOST_PORT = 9200

print("======== Export to elasticsearch ======")
es = ElasticsearchClient(
    host=ES_HOST_IP,
    port=ES_HOST_PORT,
)

annotation_expressions = [
    'variant_id = %s' % get_expr_for_variant_id(),
    'chrom = %s' % get_expr_for_contig(),
    'pos = %s' % get_expr_for_start_pos(),
    "xpos = %s" % get_expr_for_xpos(field_prefix="", pos_field="pos"),
]

for expression in annotation_expressions:
    kt_rare_variants = kt_rare_variants.annotate(expression)

kt_rare_variants = kt_rare_variants.drop(['v'])

kt_annotations = kt_annotations.annotate('variantId = %s' % get_expr_for_variant_id()).drop(['v'])

kt_rare_variants = kt_rare_variants.key_by('variantId').join(kt_annotations.key_by('variantId'))

pprint(kt_rare_variants.schema)
예제 #4
0
  'Analysis group': 'analysis_group',
  'AC case': 'ac_case',
  'AC ctrl': 'ac_ctrl',
  'AN case': 'an_case',
  'AN ctrl': 'an_ctrl',
  'AF case': 'af_case',
  'AF ctrl': 'af_ctrl',
  'Estimate': 'est',
  'SE': 'se',
  'P-value': 'p',
  'Comment': 'comment',
}

annotation_expressions = [
    'variant_id = %s' % get_expr_for_variant_id(),
    'contig = %s' % get_expr_for_contig(),
    'pos = %s' % get_expr_for_start_pos(),
    "xpos = %s" % get_expr_for_xpos(field_prefix="", pos_field="pos"),
]

kt_variant_annotation = kt_variant_annotation.rename(column_map)
kt_variant_annotation = kt_variant_annotation.annotate('v = Variant(v)')

kt_variant_results = kt_variant_results.rename(column_map)
for expression in annotation_expressions:
    kt_variant_results = kt_variant_results.annotate(expression)

kt_variants = kt_variant_results.key_by('v').join(kt_variant_annotation.key_by('v'))
kt_variants = kt_variants.drop(['v'])

kt_results_by_cohort = kt_results_by_cohort.rename(column_map)