Пример #1
0
def get_variant(request, get_institute):
    logger.info("setup a variant")
    variant = Variant(
        document_id = "document_id",
        variant_id = "variant_id",
        display_name = "display_name",
        variant_type = 'research',
        case_id = 'case_id',
        chromosome = '1',
        position = 10,
        reference = "A",
        alternative = "C",
        rank_score = 10.0,
        variant_rank = 1,
        institute = get_institute,
    )
    logger.info("Adding variant to database")
    variant.save()
    def teardown():
        print('\n')
        logger.info('Removing variant')
        variant.delete()
        logger.info('Case variant')
    request.addfinalizer(teardown)
    
    return variant
Пример #2
0
  def variants(self, case_id, query=None, variant_ids=None, nr_of_variants = 10, skip = 0):
    """
    Returns the number of variants specified in question for a specific case.
    If skip ≠ 0 skip the first n variants.

    Arguments:
      case_id : A string that represents the case
      query   : A dictionary with querys for the database

    Returns:
      A generator with the variants

    """
    if variant_ids:
      nr_of_variants = len(variant_ids)
    else:
      nr_of_variants = skip + nr_of_variants

    mongo_query = self.build_query(case_id, query, variant_ids)

    for variant in (Variant.objects(__raw__=mongo_query)
                           .order_by('variant_rank')
                           .skip(skip)
                           .limit(nr_of_variants)):
      yield variant
Пример #3
0
    def variants(self, case_id, query=None, variant_ids=None,
                 nr_of_variants=10, skip=0):
        """Returns variants specified in question for a specific case.

        If skip not equal to 0 skip the first n variants.

        Arguments:
            case_id(str): A string that represents the case
            query(dict): A dictionary with querys for the database

        Yields:
            Variant objects
        """
        logger.info("Fetching variants from {0}".format(case_id))
        if variant_ids:
            nr_of_variants = len(variant_ids)
        else:
            nr_of_variants = skip + nr_of_variants

        mongo_query = build_query(case_id, query, variant_ids)

        result = Variant.objects(
            __raw__=mongo_query).order_by(
                'variant_rank').skip(
                    skip).limit(nr_of_variants)

        for variant in result:
            yield variant
Пример #4
0
  def variants(self, case_id, query=None, variant_ids=None, nr_of_variants = 10, skip = 0):
    """
    Returns the number of variants specified in question for a specific case.
    If skip ≠ 0 skip the first n variants.

    Arguments:
      case_id : A string that represents the case
      query   : A dictionary with querys for the database

    Returns:
      A generator with the variants

    """
    if variant_ids:
      nr_of_variants = len(variant_ids)
    else:
      nr_of_variants = skip + nr_of_variants

    mongo_query = self.build_query(case_id, query, variant_ids)

    for variant in (Variant.objects(__raw__=mongo_query)
                           .order_by('variant_rank')
                           .skip(skip)
                           .limit(nr_of_variants)):
      yield variant
Пример #5
0
    def delete_variants(self, case_id, variant_type):
        """Delete variants of one type for a case

            This is used when a case i reanalyzed

            Args:
                case_id(str): The case id
                variant_type(str): 'research' or 'clinical'
        """
        logger.info("Deleting old {0} variants for case {1}".format(
            variant_type, case_id))
        nr_deleted = Variant.objects(
            case_id=case_id,
            variant_type=variant_type).delete()

        logger.info("{0} variants deleted".format(nr_deleted))
        logger.debug("Variants deleted")
Пример #6
0
def load_mongo_db(scout_configs,
                  vcf_configs=None,
                  family_type='cmms',
                  mongo_db='variantDatabase',
                  variant_type='clinical',
                  username=None,
                  password=None,
                  port=27017,
                  host='localhost',
                  rank_score_threshold=0,
                  variant_number_threshold=5000):
    """Populate a moongo database with information from ped and variant files."""
    # get root path of the Flask app
    # project_root = '/'.join(app.root_path.split('/')[0:-1])

    logger = logging.getLogger(__name__)
    # For testing only
    if __name__ == '__main__':
        logger = logging.getLogger("scout.ext.backend.load_mongo")

    ####### Check if the vcf file is on the proper format #######
    vcf_file = scout_configs['load_vcf']
    logger.info(
        "Found a vcf for loading variants into scout: {0}".format(vcf_file))

    logger.info("Connecting to {0}".format(mongo_db))
    connect(mongo_db,
            host=host,
            port=port,
            username=username,
            password=password)

    variant_database = get_db()

    ped_file = scout_configs['ped']
    logger.info("Found a ped file: {0}".format(ped_file))

    ######## Parse the config file to check for keys ########
    logger.info("Parsing config file")
    config_object = ConfigParser(vcf_configs)

    ######## Get the cases and add them to the mongo db: ########

    logger.info("Get the case from ped file")
    case = get_case(scout_configs, family_type)

    logger.info('Case found in {0}: {1}'.format(ped_file, case.display_name))

    ######## Add the institute to the mongo db: ########

    for institute_name in case['collaborators']:
        if institute_name:
            institute = get_institute(institute_name)
            logger.info("Institute found: {0}".format(institute))
            try:
                Institute.objects.get(internal_id=institute.internal_id)
                logger.info(
                    "Institute {0} already in database".format(institute))
            except DoesNotExist:
                institute.save()
                logger.info(
                    "Adding new institute {0} to database".format(institute))

    logger.info("Updating case in database")

    update_case(case, variant_type, logger)

    ######## Get the variants and add them to the mongo db: ########

    logger.info("Setting up a variant parser")
    variant_parser = VCFParser(infile=vcf_file,
                               split_variants=True,
                               skip_info_check=True)
    nr_of_variants = 0

    logger.info("Deleting old variants for case {0}".format(case.case_id))
    Variant.objects(case_id=case.case_id, variant_type=variant_type).delete()
    logger.debug("Variants deleted")

    start_inserting_variants = datetime.now()

    # Get the individuals to see which we should include in the analysis
    ped_individuals = {
        individual.individual_id: individual.display_name
        for individual in case.individuals
    }

    # Check which individuals that exists in the vcf file.
    # Save the individuals in a dictionary with individual ids as keys
    # and display names as values
    individuals = {}
    # loop over keys (internal ids)
    logger.info("Checking which individuals in ped file exists in vcf")
    for individual_id, display_name in iteritems(ped_individuals):
        logger.debug("Checking individual {0}".format(individual_id))
        if individual_id in variant_parser.individuals:
            logger.debug("Individual {0} found".format(individual_id))
            individuals[individual_id] = display_name
        else:
            logger.warning("Individual {0} is present in ped file but"\
                          " not in vcf".format(individual_id))

    logger.info('Start parsing variants')

    ########## If a rank score threshold is used check if it is below that threshold ##########
    for variant in variant_parser:
        logger.debug("Parsing variant {0}".format(variant['variant_id']))
        if not float(variant['rank_scores'][
                case.display_name]) > rank_score_threshold:
            logger.info("Lower rank score threshold reaced after {0}"\
                        " variants".format(nr_of_variants))
            break

        if nr_of_variants > variant_number_threshold:
            logger.info("Variant number threshold reached. ({0})".format(
                variant_number_threshold))
            break

        nr_of_variants += 1
        mongo_variant = get_mongo_variant(variant, variant_type, individuals,
                                          case, config_object, nr_of_variants)

        mongo_variant.save()

        if nr_of_variants % 1000 == 0:
            logger.info('{0} variants parsed'.format(nr_of_variants))
Пример #7
0
    id_fields = [
        variant['CHROM'], variant['POS'], variant['REF'], variant['ALT'],
        variant_type
    ]

    variant_id = generate_md5_key(id_fields)
    document_id = generate_md5_key(id_fields + case_id.split('_'))

    # Create the mongo variant object
    mongo_variant = Variant(document_id=document_id,
                            variant_id=variant_id,
                            variant_type=variant_type,
                            case_id=case_id,
                            display_name='_'.join(id_fields),
                            chromosome=variant['CHROM'],
                            position=int(variant['POS']),
                            reference=variant['REF'],
                            alternative=variant['ALT'],
                            variant_rank=variant_count,
                            quality=float(variant['QUAL']),
                            filters=variant['FILTER'].split(';'),
                            institute=institute)

    # If a variant belongs to any gene lists we check which ones
    mongo_variant['gene_lists'] = variant['info_dict'].get(
        config_object['VCF']['GeneLists']['vcf_info_key'], None)

    ################# Add the rank score and variant rank #################
    # Get the rank score as specified in the config file.
    # This is central for displaying variants in scout.
Пример #8
0
def load_mongo_db(scout_configs, vcf_configs=None, family_type='cmms',
                  mongo_db='variantDatabase', variant_type='clinical',
                  username=None, password=None, port=27017, host='localhost',
                  rank_score_threshold = 0, variant_number_threshold = 5000):
  """Populate a moongo database with information from ped and variant files."""
  # get root path of the Flask app
  # project_root = '/'.join(app.root_path.split('/')[0:-1])

  logger = logging.getLogger(__name__)
  # For testing only
  if __name__ == '__main__':
    logger = logging.getLogger("scout.ext.backend.load_mongo")

  ####### Check if the vcf file is on the proper format #######
  vcf_file = scout_configs['load_vcf']
  logger.info("Found a vcf for loading variants into scout: {0}".format(
    vcf_file
  ))

  logger.info("Connecting to {0}".format(mongo_db))
  connect(mongo_db, host=host, port=port, username=username,
          password=password)

  variant_database = get_db()

  ped_file = scout_configs['ped']
  logger.info("Found a ped file: {0}".format(ped_file))

  ######## Parse the config file to check for keys ########
  logger.info("Parsing config file")
  config_object = ConfigParser(vcf_configs)


  ######## Get the cases and add them to the mongo db: ########

  logger.info("Get the case from ped file")
  case = get_case(scout_configs, family_type)

  logger.info('Case found in {0}: {1}'.format(ped_file, case.display_name))

  ######## Add the institute to the mongo db: ########

  for institute_name in case['collaborators']:
    if institute_name:
      institute = get_institute(institute_name)
      logger.info("Institute found: {0}".format(institute))
      try:
        Institute.objects.get(internal_id = institute.internal_id)
        logger.info("Institute {0} already in database".format(institute))
      except DoesNotExist:
        institute.save()
        logger.info("Adding new institute {0} to database".format(institute))

  logger.info("Updating case in database")

  update_case(case, variant_type, logger)

  ######## Get the variants and add them to the mongo db: ########

  logger.info("Setting up a variant parser")
  variant_parser = VCFParser(infile=vcf_file, split_variants=True, skip_info_check=True)
  nr_of_variants = 0

  logger.info("Deleting old variants for case {0}".format(case.case_id))
  Variant.objects(case_id=case.case_id, variant_type=variant_type).delete()
  logger.debug("Variants deleted")

  start_inserting_variants = datetime.now()

  # Get the individuals to see which we should include in the analysis
  ped_individuals = {individual.individual_id: individual.display_name
                     for individual in case.individuals}

  # Check which individuals that exists in the vcf file.
  # Save the individuals in a dictionary with individual ids as keys
  # and display names as values
  individuals = {}
  # loop over keys (internal ids)
  logger.info("Checking which individuals in ped file exists in vcf")
  for individual_id, display_name in iteritems(ped_individuals):
    logger.debug("Checking individual {0}".format(individual_id))
    if individual_id in variant_parser.individuals:
      logger.debug("Individual {0} found".format(individual_id))
      individuals[individual_id] = display_name
    else:
        logger.warning("Individual {0} is present in ped file but"\
                      " not in vcf".format(individual_id))

  logger.info('Start parsing variants')

  ########## If a rank score threshold is used check if it is below that threshold ##########
  for variant in variant_parser:
    logger.debug("Parsing variant {0}".format(variant['variant_id']))
    if not float(variant['rank_scores'][case.display_name]) > rank_score_threshold:
      logger.info("Lower rank score threshold reaced after {0}"\
                  " variants".format(nr_of_variants))
      break

    if nr_of_variants > variant_number_threshold:
      logger.info("Variant number threshold reached. ({0})".format(
        variant_number_threshold))
      break


    nr_of_variants += 1
    mongo_variant = get_mongo_variant(variant, variant_type, individuals, case, config_object, nr_of_variants)

    mongo_variant.save()

    if nr_of_variants % 1000 == 0:
      logger.info('{0} variants parsed'.format(nr_of_variants))

  logger.info("Parsing variants done")
  logger.info("{0} variants inserted".format(nr_of_variants))
  logger.info("Time to insert variants: {0}".format(
    datetime.now() - start_inserting_variants
  ))

  logger.info("Updating indexes")

  ensure_indexes(variant_database, logger)

  return