Пример #1
0
def update_distance_matrix(config_dict, args):
    logger = logging.getLogger('snapperdb.snpdb.update_distance_matrix')
    logger.info('Inititialising SnpDB Class')
    snpdb = SNPdb(config_dict)
    snpdb.parse_config_dict(config_dict)
    snpdb._connect_to_snpdb()
    logger.info('Getting strains')
    strain_list, update_strain, all_strains = snpdb.get_strains()

    # # get_all_good_ids from snpdb2 takes a snp cutoff as well, here, we don't have a SNP cutoff so we set it arbitrarily high.
    snp_co = '1000000'
    if all_strains or len(update_strain) > 1:
        if update_strain:
            print "###  Populating distance matrix: " + str(
                datetime.datetime.now())
            snpdb.parse_args_for_update_matrix(snp_co, strain_list)
            if args.hpc == 'N':
                print '### Launching serial update_distance_matrix ' + str(
                    datetime.datetime.now())
                snpdb.check_matrix(strain_list, update_strain)
                snpdb.update_clusters()
            else:
                print '### Launching parallel update_distance_matrix ' + str(
                    datetime.datetime.now())
                present_stains = list(set(strain_list) - set(update_strain))
                for idx, one_strain in enumerate(
                        chunks(list(update_strain), int(args.hpc))):
                    snpdb.write_qsubs_to_check_matrix(
                        args, idx, one_strain, present_stains,
                        config_dict['snpdb_name'])
                snpdb.check_matrix(update_strain, update_strain)
        else:
            print '### Nothing to update ' + str(datetime.datetime.now())
    else:
        print '### Nothing to update ' + str(datetime.datetime.now())
Пример #2
0
def import_json(args):
    #set up logging
    logger = logging.getLogger('snapperdb.snpdb.import_json')
    json_path = untar_file(args.json_file)
    json_dict = {}

    #import json
    try:
        with open(json_path) as json_data:
            json_dict = json.load(json_data)
        json_data.close()
    except IOError:
        print "Issue with JSON file. Exiting."
        exit()

    #parse config
    args.config_file = json_dict['config_file']
    config_dict = parse_config(args)

    #initalise snpdb class
    snpdb = SNPdb(config_dict)
    #parse confif
    snpdb.parse_config_dict(config_dict)
    #get reference genome path
    ref_seq_file = os.path.join(snapperdb.__ref_genome_dir__,
                                snpdb.reference_genome + '.fa')
    #read the reference fasta
    ref_seq = read_multi_contig_fasta(ref_seq_file)

    #create VCF class
    vcf = Vcf()
    vcf.parse_json_dict(json_dict, ref_seq)
    vcf.depth_average = json_dict['strain_stats']
    vcf.sample_name = json_dict['sample']

    logger.info('Uploading to SNPdb')
    #upload vcf
    #connect to snpdb postgres
    snpdb._connect_to_snpdb()
    snpdb.snpdb_conn = psycopg2.connect(snpdb.conn_string)

    if args.write_flag == 'W':

        snpdb.snpdb_upload(vcf, args)
        #annotate vars
        logger.info('Annotating new variants')

        snpdb.snpdb_annotate_vars(vcf)
    elif args.write_flag == 'R':
        print "under development"
Пример #3
0
def accept_outlier(args, config_dict):

    #set up logging
    logger = logging.getLogger('snapperdb.snpdb.export_json')
    logger.info('Inititialising SnpDB Class')
    #initalise snpdb class
    snpdb = SNPdb(config_dict)
    #parse confif
    snpdb.parse_config_dict(config_dict)

    #connect to postgresdb
    snpdb._connect_to_snpdb()
    snpdb.snpdb_conn = psycopg2.connect(snpdb.conn_string)

    #remove isolate
    snpdb.zscore_exception(args.out_strain)
Пример #4
0
def get_strains(args, config_dict):

    #set up logging
    logger = logging.getLogger('snapperdb.snpdb.get_strains')
    logger.info('Inititialising SnpDB Class')
    #initalise snpdb class
    snpdb = SNPdb(config_dict)
    #parse confif
    snpdb.parse_config_dict(config_dict)

    #connect to postgresdb
    snpdb._connect_to_snpdb()
    snpdb.snpdb_conn = psycopg2.connect(snpdb.conn_string)

    #remove isolate
    snpdb.get_strain_list(args.thresh)
Пример #5
0
def add_ref_cluster(args, config_dict):
    #set up loggging
    logger = logging.getLogger('snapperdb.snpdb.add_ref_cluster')
    logger.info('Initialising SNPdb class')

    #create snpdb class
    snpdb = SNPdb(config_dict)

    #parse config into snpdb object
    logger.info('Parsing config dict')
    snpdb.parse_config_dict(config_dict)

    #connect to snpdb postgres
    snpdb._connect_to_snpdb()
    snpdb.snpdb_conn = psycopg2.connect(snpdb.conn_string)

    snpdb.add_cluster()
Пример #6
0
def get_the_snps(args, config_dict):
    #set up logging
    logger = logging.getLogger('snapperdb.snpdb.get_the_snps')
    logger.info('Inititialising SnpDB Class')
    #initalise snpdb class
    snpdb = SNPdb(config_dict)
    #parse confif
    snpdb.parse_config_dict(config_dict)
    #read strainlist
    strain_list = read_file(args.strain_list)

    #connect to postgresdb
    snpdb._connect_to_snpdb()
    #get reference genome path
    ref_seq_file = os.path.join(snapperdb.__ref_genome_dir__,
                                snpdb.reference_genome + '.fa')
    #read the reference fasta
    ref_seq = read_multi_contig_fasta(ref_seq_file)
    #add reference genome to strain_list
    strain_list.append(snpdb.reference_genome)

    #if recombination flag set
    rec_dict = {}

    if args.rec_file != 'N':
        logger.info('Reading recombination list')
        rec_dict = read_rec_file_mc(args.rec_file, rec_dict)
    if args.gubbins_rec_file != None:
        logger.info('Reading gubbins recombination list')
        rec_dict = read_rec_file_mc_gubbins(args.gubbins_rec_file, ref_seq,
                                            rec_dict)

    #query snadb
    snpdb.parse_args_for_get_the_snps_mc(args, strain_list, ref_seq,
                                         snpdb.reference_genome)

    snpdb.print_fasta_mc(args, rec_dict)

    #print matrix
    if args.mat_flag == 'Y':
        snpdb.print_matrix(args.out)
    # print variant list
    if args.var_flag == 'Y':
        logger.info('Printing variants')
        snpdb.print_vars_mc(args, rec_dict)
Пример #7
0
def qsub_to_check_matrix(config_dict, args):

    snpdb = SNPdb(config_dict)
    snpdb.parse_config_dict(config_dict)
    snpdb._connect_to_snpdb()
    snp_co = '1000000'
    added_list = []
    with open(args.added_list) as fi:
        for x in fi.readlines():
            added_list.append(x.strip())
    present_strains = []
    with open(args.present_strains) as fi:
        for x in fi.readlines():
            present_strains.append(x.strip())

    strain_list = list(set(present_strains) | set(added_list))
    snpdb.parse_args_for_update_matrix(snp_co, strain_list)
    snpdb.check_matrix(strain_list, added_list)
Пример #8
0
def export_json(args, config_dict):
    #set up logging
    logger = logging.getLogger('snapperdb.snpdb.export_json')
    logger.info('Inititialising SnpDB Class')
    #initalise snpdb class
    snpdb = SNPdb(config_dict)
    #parse confif
    snpdb.parse_config_dict(config_dict)
    #read strainlist
    strain_list = read_file(args.strain_list)

    #connect to postgresdb
    snpdb._connect_to_snpdb()
    #get reference genome path
    ref_seq_file = os.path.join(snapperdb.__ref_genome_dir__,
                                snpdb.reference_genome + '.fa')
    #read the reference fasta
    ref_seq = read_multi_contig_fasta(ref_seq_file)

    snpdb.parse_args_for_export(args, strain_list, ref_seq)
Пример #9
0
def vcf_to_db(args, config_dict, vcf):
    #set up loggging
    logger = logging.getLogger('snapperdb.snpdb.vcf_to_db')
    logger.info('Initialising SNPdb class')

    #create snpdb class
    snpdb = SNPdb(config_dict)

    #parse config into snpdb object
    logger.info('Parsing config dict')
    snpdb.parse_config_dict(config_dict)

    #connect to snpdb postgres
    snpdb._connect_to_snpdb()
    snpdb.snpdb_conn = psycopg2.connect(snpdb.conn_string)

    #check stack?
    if inspect.stack()[0][3] == 'fastq_to_db':
        # fastq_to_db we will alread have a vcf object to work wih
        logger.info('You are running fastq_to_db.')

    elif inspect.stack()[0][3] == 'vcf_to_db':
        ## there is no existing vcf class here, but there will definitely be a vcf
        logger.info('You are running vcf_to_db. Initialising Vcf class.')
        vcf = Vcf()
        logger.info('Making SNPdb variables and output files')
        #set up variables
        snpdb.define_class_variables_and_make_output_files(args, vcf)

    #read vcf
    vcf.read_multi_contig_vcf()
    logger.info('Uploading to SNPdb')
    #upload vcf
    snpdb.snpdb_upload(vcf, args)
    #annotate vars
    logger.info('Annotating new variants')

    snpdb.snpdb_annotate_vars(vcf)
Пример #10
0
def update_clusters(config_dict):
    snpdb = SNPdb(config_dict)
    snpdb.parse_config_dict(config_dict)
    snpdb._connect_to_snpdb()
    snpdb.update_clusters()