Пример #1
0
    def process(self, information):
        # Setup django for templates
        from django.conf import settings
        settings.configure(TEMPLATE_DIRS=self.templateDirs,
                           INSTALLED_APPS=('APIgen.tags', ))

        # Reset temp dir
        if os.path.exists(self.tempDir):
            shutil.rmtree(self.tempDir)

        # Make sure we have out and temp directories
        mkdir(self.outDir)
        mkdir(self.tempDir)

        # Copy assets to output
        for directory in self.assetDirs:
            shutil.copytree(directory,
                            self.tempDir,
                            ignore=shutil.ignore_patterns(IGNORE_PATTERNS))

        log.info("\n---------------------GENERATING------------------------\n")

        for module in information[MODULES].values():
            self.gen_module(information, module)

        log.info("\n---------------------DONE------------------------\n")
Пример #2
0
    def process(self, information):
        # Setup django for templates
        from django.conf import settings
        settings.configure(
            TEMPLATE_DIRS=self.templateDirs,
            INSTALLED_APPS = ('APIgen.tags', )
        )
        
        # Reset temp dir        
        if os.path.exists(self.tempDir):
            shutil.rmtree(self.tempDir)
        
        # Make sure we have out and temp directories
        mkdir(self.outDir)
        mkdir(self.tempDir)

        # Copy assets to output
        for directory in self.assetDirs:
            shutil.copytree(directory, self.tempDir, ignore=shutil.ignore_patterns(IGNORE_PATTERNS))

        log.info("\n---------------------GENERATING------------------------\n")
        
        for module in information[MODULES].values():
            self.gen_module(information, module)
            
        log.info("\n---------------------DONE------------------------\n")
Пример #3
0
 def gen_module(self, information, module):
     moduleName = module[NAME]
     self.createPage(
           information
         , "%s.txt" % moduleName
         , [ os.sep.join(['modules', moduleName, 'module.rst'])
           , 'module.rst'
           ]
         , module = module
         , current = module
         , fullname = moduleName
         )
     
     moduleDir = os.path.join(self.outDir, moduleName)
     mkdir(moduleDir)
     
     for kls in module[CLASS_LIST]:
         klsName = kls[NAME]
         fullName = "%s.%s" % (moduleName, klsName)
         if moduleName == klsName:
             fullName = klsName
             
         self.createPage(
               information
             , os.sep.join([moduleName, "%s.txt" % klsName])
             , [ os.sep.join(["classes", "%s.rst" % klsName])
               , os.sep.join(["classes", moduleName, "%s.rst" % klsName])
               , os.sep.join(["modules", moduleName, "class.rst"])
               , os.sep.join(["modules", moduleName, "classes", "%s.rst" % klsName])
               , "class.rst"
               ]
             , module = module
             , current = kls
             , fullname = fullName
             )
Пример #4
0
    def gen_module(self, information, module):
        moduleName = module[NAME]
        self.createPage(
            information,
            "%s.txt" % moduleName,
            [os.sep.join(['modules', moduleName, 'module.rst']), 'module.rst'],
            module=module,
            current=module,
            fullname=moduleName)

        moduleDir = os.path.join(self.outDir, moduleName)
        mkdir(moduleDir)

        for kls in module[CLASS_LIST]:
            klsName = kls[NAME]
            fullName = "%s.%s" % (moduleName, klsName)
            if moduleName == klsName:
                fullName = klsName

            self.createPage(
                information,
                os.sep.join([moduleName, "%s.txt" % klsName]), [
                    os.sep.join(["classes", "%s.rst" % klsName]),
                    os.sep.join(["classes", moduleName,
                                 "%s.rst" % klsName]),
                    os.sep.join(["modules", moduleName, "class.rst"]),
                    os.sep.join(
                        ["modules", moduleName, "classes",
                         "%s.rst" % klsName]), "class.rst"
                ],
                module=module,
                current=kls,
                fullname=fullName)
Пример #5
0
def __logging_args(args):
    #make directories
    args.log_dir += args.dataset + '_noaug/' if args.aug_type == 0 else args.dataset + '/'
    logfile_name = '%s_d%dw%d' % (args.network, args.depth, args.widen_factor)
    #name of logging text file
    if args.model_prefix is None:
        args.model_prefix = args.log_dir + args.network + '/'
        logfile_name += '_exp' if args.exp_name is None else '_' + args.exp_name
        random_idx = 1
        while os.path.isfile(args.model_prefix + logfile_name +
                             str(random_idx) + '.txt'):
            random_idx += 1
        logfile_name += str(random_idx)
    #logging
    log_file_full_name = args.model_prefix + logfile_name + '.txt'
    args.model_prefix += 'weights/' + logfile_name + '/'
    utility.mkdir(args.model_prefix)
    args.model_prefix += logfile_name
    head = '%(asctime)-15s %(message)s'
    logger = logging.getLogger()
    handler = logging.FileHandler(log_file_full_name)
    formatter = logging.Formatter(head)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)
    logger.info('%s', log_file_full_name)
    logger.info('start with arguments %s', args)
 def bmp2jpg(path_image, pathGoal_image):
     utility.mkdir(pathGoal_image)
     for i_view in range(8):
         path_image_view = "%s%d/" % (path_image, i_view)
         pathGoal_image_view = "%s%d/" % (pathGoal_image, i_view)
         utility.mkdir(pathGoal_image_view)
         for root, dirs, files in os.walk(path_image_view):
             for i_file in range(files.__len__()):
                 pass
                 image = cv2.imread(path_image_view + files[i_file])
                 pathFileSave = pathGoal_image_view + files[i_file][
                     0:3] + ".jpg"
                 cv2.imwrite(pathFileSave, img=image)
                 print("Finish %s%s" % (pathFileSave))
Пример #7
0
    def run(self):
        apt_packages = [
            "nginx",
            "nginx-extras",
            "apache2-utils",
        ]
        for package in apt_packages:
            utility.apt(package)
        utility.mkdir('/etc/nginx/sites-available', use_sudo=True)
        utility.mkdir('/etc/nginx/sites-enabled', use_sudo=True)

        upload_template('nginx.conf',
                        '/etc/nginx/nginx.conf',
                        context={}, template_dir=TEMPLATE_DIR,
                        use_jinja=True, use_sudo=True)
        utility.rm('/etc/nginx/sites-enabled/default', use_sudo=True)
        self.restart()
Пример #8
0
    def __init__(self, args, train_loader, train_sampler, valid_loader,
                 my_model, my_loss, ckp):
        self.args = args
        self.scale = args.scale[0]

        self.ckp = ckp
        self.loader_train = train_loader
        self.loader_valid = valid_loader
        self.train_sampler = train_sampler
        self.model = my_model
        self.loss = my_loss
        self.optimizer = utility.make_optimizer(args, self.model)

        self.psnr_fn = PSNR(boundary_ignore=40)
        # Postprocessing function to obtain sRGB images
        self.postprocess_fn = SimplePostProcess(return_np=True)

        if 'L1' in args.loss:
            self.aligned_loss = L1(boundary_ignore=None).cuda(args.local_rank)
        elif 'MSE' in args.loss:
            self.aligned_loss = L2(boundary_ignore=None).cuda(args.local_rank)
        elif 'CB' in args.loss:
            self.aligned_loss = CharbonnierLoss(boundary_ignore=None).cuda(
                args.local_rank)
        elif 'MSSSIM' in args.loss:
            self.aligned_loss = MSSSIMLoss(boundary_ignore=None).cuda(
                args.local_rank)

        if self.args.fp16:
            self.scaler = GradScaler()

        self.best_psnr = 0.
        self.best_epoch = 0

        if self.args.load != '':
            self.optimizer.load(ckp.dir, epoch=len(ckp.log))

        self.error_last = 1e8
        self.glob_iter = 0

        self.log_dir = LOG_DIR + "/" + args.save
        self.img_save_dir = IMG_SAVE_DIR + "/" + args.save
        # Where to load model
        self.load_model_dir = LOAD_MODEL_DIR + "/" + args.save
        # Where to save new model
        self.save_model_dir = SAVE_MODEL_DIR + "/" + args.save

        # Where to save visualization images (for report)
        self.results_dir = RESULTS_DIR + "/" + args.save
        self.writer = SummaryWriter(log_dir=self.log_dir)

        utility.mkdir(self.save_model_dir)
        utility.mkdir(self.img_save_dir)
        utility.mkdir(self.log_dir)
        utility.mkdir('frames')
Пример #9
0
def __logging_args(args):
    #make directories
    args.log_dir += args.dataset + '_noaug/' if args.aug_type == 0 else args.dataset + '/'
    logfile_name = '%s_d%dL%dM%d' % (args.network, args.depth,
                                     args.primary_partition,
                                     args.secondary_partition)
    #name of logging text file
    if args.model_prefix is None:
        args.model_prefix = args.log_dir + args.network + '/'
        logfile_name += '_exp' if args.exp_name is None else '_' + args.exp_name
        random_idx = 1
        while os.path.isfile(args.model_prefix + logfile_name +
                             str(random_idx) + '.txt'):
            random_idx += 1
        if args.load_epoch is not None:  #deprecated. Use `--model-prefix` to manually set the name
            random_idx -= 1
        logfile_name += str(random_idx)
    #model related
    if args.checkpoint_epochs is None:  #if num_epochs=400, then we will save the model every 50 epochs
        args.checkpoint_epochs = args.num_epochs / 8
    if args.rand_seed is None:
        import time
        args.rand_seed = int(
            time.time())  #different random init for serveral runs
    mx.random.seed(args.rand_seed)  #cudnn conv backward is non-deterministic
    #logging
    log_file_full_name = args.model_prefix + logfile_name + '.txt'
    args.model_prefix += 'weights/' + logfile_name + '/'
    utility.mkdir(args.model_prefix)
    args.model_prefix += logfile_name
    head = '%(asctime)-15s %(message)s'
    logger = logging.getLogger()
    map(logger.removeHandler, logger.handlers[:])  #reset
    handler = logging.FileHandler(log_file_full_name)
    formatter = logging.Formatter(head)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)
    logger.info('%s', log_file_full_name)
    logger.info('start with arguments %s', args)
    def handle(self):

        print('connected from:', self.client_address)

        while True:

            # 定义文件信息。128s表示文件名为128bytes长,l表示一个int或log文件类型,在此为文件大小
            fileinfo_size = struct.calcsize('128sl')
            self.buf = self.request.recv(fileinfo_size)

            # 如果不加这个if,第一个文件传输完成后会自动走到下一句
            if self.buf:

                # 根据128sl解包文件信息,与client端的打包规则相同
                self.str_received, self.filesize = struct.unpack(
                    '128sl', self.buf)

                # # 文件名长度为128,大于文件名实际长度
                # print('filesize is: ',self.filesize,'filename size is: ',len(self.filename))
                # # 使用strip()删除打包时附加的多余空字符

                [self.filename,
                 self.pathSave] = self.str_received.decode().split("##")
                pathSave = (self.pathSave).strip('\00')
                utility.mkdir(pathSave)
                for i_view in range(8):
                    pathSave_view = pathSave + "%d_bmp/" % i_view
                    utility.mkdir(pathSave_view)
                    pathSave_view = pathSave + "v%d/" % i_view
                    utility.mkdir(pathSave_view)

                self.filenewname = os.path.join(pathSave,
                                                (self.filename).strip('\00'))
                print(self.filenewname, type(self.filenewname))

                # 定义接收了的文件大小
                recvd_size = 0
                file = open(self.filenewname, 'wb')
                # print('stat receiving...')
                while not recvd_size == self.filesize:
                    if self.filesize - recvd_size > 1024:
                        rdata = self.request.recv(1024)
                        recvd_size += len(rdata)
                    else:
                        rdata = self.request.recv(self.filesize - recvd_size)
                        recvd_size = self.filesize
                    file.write(rdata)
                file.close()
Пример #11
0
        if PC == "Desktop":
            path_data = "J:/0 SOG_201807/data/"
        elif PC in CameraSystem_L:
            path_data = "F:/0 SoG/data/"
        elif PC in CameraSystem_S:
            path_data = "C:/0_SoG/data/"

    # Path Save
    if 1:
        # level 1
        str_timeDate_level_1 = "181128"
        str_dir_level_1 = 'dataResult_%s_image1811_1_K1/' % (str_timeDate_level_1)
        str_dir_level_1 = 'dataResult_%s_image1811_2_K1/' % (str_timeDate_level_1)
        pathSave = path_data + str_dir_level_1
        if PC in CameraSystem_L:
            utility.mkdir(pathSave)

        str_level_2_list = [
            ["181114", "0018", "D116"],
            ["181120", "1959", "D115"],
            ["181123", "2139", "D115_newSil"],
            ["181123", "2333", "D115_newSil_fixImageProcessed"],
            ["181124", "0021", "D114"],
            ["181125", "1927", "D114_root16"],
            ["181125", "2130", "D113_root16"],
            ["181126", "2152", "D113_root16_weight2"],
            ["181126", "2152", "D112_root16_weight2"],
            ["181128", "0310", "D111_root16_weight2"],
            ["181211", "1731", "D111_test"],
            ["181211", "2103", "D111_cluster"],
            ["181211", "2236", "D111_cluster25000"],
Пример #12
0
def join_runs(run_list, joined_fname_root):

    try:
        shutil.__name__
        deque.__name__
    except NameError:
        import shutil
        from collections import deque

    # Create the directory structure
    mkdir(joined_fname_root+'/')
    mkdir(joined_fname_root+'/chains/')
    mkdir(joined_fname_root+'/results/')
    mkdir(joined_fname_root+'/plots/')

    # Do some checks to ensure that the runs we are joining are compatible with
    # each other...
    for param in ['problem_name', 'n_avg', 'flag_autocor_on_file',
            'flag_no_map_write', 'n_P', 'P_name', 'P_limit_min', 
            'P_limit_max', 'n_DS_arrays_total', 'n_DS']:
        check_param_compatibility(run_list, param)
    
    # Copy the run.dat file for the first run as our new run.dat file
    shutil.copy(run_list[0].filename_root+'/run.dat',
            joined_fname_root)


    # Read in the chain config files and work out the values for our new file
    n_iterations = 0
    n_iterations_burn = None
    chain_list = deque()
    for run in run_list:
        for i_chain in xrange(run.n_chains):
            chain = Chain(run, i_chain)
            chain_list.append(chain)
            if n_iterations_burn==None: 
                n_iterations_burn=chain.n_iterations_burn
                n_iterations += chain.n_iterations
            else:
                n_iterations += chain.n_iterations-chain.n_iterations_burn
    covariance_matrix = chain_list[0].covariance_matrix
    temp = chain_list[0].temp

    # Write the new chain config file 
    fout = open(joined_fname_root+'/chains/chain_config_%06d.dat'%(0), 'wb')
    np.array([n_iterations], dtype=np.int32).tofile(fout)
    np.array([n_iterations_burn], dtype=np.int32).tofile(fout)
    np.array([temp], dtype=np.float64).tofile(fout)
    covariance_matrix.flatten().tofile(fout)
    fout.close()


    # Concatenate all of the chain stats files together
    fout = open(joined_fname_root+'/chains/chain_stats_%06d.dat'%(0), 'wb')
    first_burn = True
    for i_run, run in enumerate(run_list):
        for i_chain in xrange(run.n_chains):
            fin = open(run.filename_root+'/chains/chain_stats_%06d.dat'%(i_chain), 'rb')
            if first_burn==False:
                # Seek past the burn of this chain
                n_iterations_burn = chain_list[i_run+i_chain].n_iterations_burn
                byte_seek = (8*run.n_P*12)+(8*6)
                if run.flag_autocor_on_file==1:
                    byte_seek+=(run.n_avg-1)*8
                fin.seek(byte_seek*n_iterations_burn,1)
            else:
                first_burn = False
            shutil.copyfileobj(fin, fout)
            fin.close()
    fout.close()


    # Concatenate all of the chain trace files together
    fout = open(joined_fname_root+'/chains/chain_trace_%06d.dat'%(0), 'wb')
    first_burn = True
    for run in run_list:
        for i_chain in xrange(run.n_chains):
            fin = open(run.filename_root+'/chains/chain_trace_%06d.dat'%(i_chain), 'rb')
            if first_burn == False:
                # Seek past the burn of this chain
                chain = chain_list[i_run+i_chain]
                n_iterations_burn = chain.n_iterations_burn
                n_avg = run.n_avg
                byte_seek = 1+8+(8*run.n_P)
                if run.flag_no_map_write==0:
                    for i_DS in xrange(run.n_DS):
                        byte_seek += 8*run.n_M[i_DS]
                fin.seek(byte_seek*n_iterations_burn*n_avg,1)
            else:
                first_burn = False
            shutil.copyfileobj(fin, fout)
            fin.close()
    fout.close()
Пример #13
0
tfboard_name = exp_name + "_"
exp_train_log_dir = os.path.join(train_log_dir, exp_name)

LOG_DIR = os.path.join(exp_train_log_dir, 'logs')

# save img path
IMG_SAVE_DIR = os.path.join(exp_train_log_dir, 'img_log')
# Where to load model
LOAD_MODEL_DIR = os.path.join(exp_train_log_dir, 'models')
# Where to save new model
SAVE_MODEL_DIR = os.path.join(exp_train_log_dir, 'real_models')

# Where to save visualization images (for report)
RESULTS_DIR = os.path.join(exp_train_log_dir, 'report')

utility.mkdir(SAVE_MODEL_DIR)
utility.mkdir(IMG_SAVE_DIR)
utility.mkdir(LOG_DIR)


class Trainer():
    def __init__(self, args, train_loader, train_sampler, valid_loader,
                 my_model, my_loss, ckp):
        self.args = args
        self.scale = args.scale[0]

        self.ckp = ckp
        self.loader_train = train_loader
        self.loader_valid = valid_loader
        self.train_sampler = train_sampler
        self.model = my_model
Пример #14
0
def main():
    args = get_args()

    sample_name = os.path.splitext(os.path.basename(args.fasta1))[0]

    if args.presets == "ont":
        preset = "map-ont"
    elif args.presets == "pacbio":
        preset = "map-pb"
    else:
        print("unrecognized preset provided, used pacbio mode")
        preset = "map-pb"
    overlap = 20
    gap = 20

    stat_summary = args.out + "/" + "stat.summary.txt"
    rm_file(stat_summary)

    # telr output files
    telr_bed = []
    for file in glob.glob(args.telr_dir + "/*.telr.vcf"):
        telr_bed.append(file)
    prefix = os.path.basename(telr_bed[0]).replace('.telr.vcf', '')
    telr_bed = args.telr_dir + "/" + prefix + '.telr.vcf'
    telr_contigs = args.telr_dir + "/intermediate_files/" + prefix + '.contigs.fa'
    telr_meta = args.telr_dir + '/intermediate_files/' + prefix + ".final.meta.tsv"
    telr_contig_te = args.telr_dir +"/intermediate_files/" + prefix + ".te2contig_filter.bed"
    telr_family = args.telr_dir + "/intermediate_files/" + prefix + ".te2contig_rm.merge.bed"

    # filter genome 1 TE annotation by focus regions and families
    ## TODO: check annotation file format?
    if "bed" in args.annotation1:
        annotation_filter = args.out + "/" + sample_name + ".annotation.filter.bed"
        filter_annotation(args.annotation1, annotation_filter, args.out, sample_name, region = args.region1, discard_family = args.discard_family)
    else:
        annotation_filter = args.out + "/" + sample_name + ".annotation.filter.gff"
        filter_annotation(args.annotation1, annotation_filter, args.out, sample_name, region = args.region1, discard_family = args.discard_family)
        annotation_filter_bed = args.out + "/" + sample_name + ".annotation.filter.bed"
        liftover.gff_to_bed(annotation_filter, annotation_filter_bed)
        annotation_filter = annotation_filter_bed
    annotation_set = get_set(annotation_filter)
    out_line = "annotated TEs in genome 1: " + str(len(annotation_set))
    write_report(stat_summary, out_line)


    # filter TELR predictions
    telr_filter = args.out + "/" + prefix + ".telr.filter.bed"
    filter_annotation(telr_meta, telr_filter, args.out, sample_name, region = args.region1, discard_family = args.discard_family)
    telr_contig_set = set()
    with open(telr_filter, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            telr_contig_set.add(entry[5])
    telr_contig_set = get_set(telr_filter, type = 2)
    out_line = "non-ref TEs predicted by TELR: " + str(len(telr_contig_set))
    write_report(stat_summary, out_line)
    
    ################### liftover and overlap with TELR predictions ###################
    out_line = "\n### stats by liftover TEs from genome1 to genome2 and compare with TELR predictions ###"
    write_report(stat_summary, out_line)

    # liftover from one genome to another
    te_lift, te_lift_meta = liftover.lift_annotation(fasta1 = args.fasta1, fasta2 = args.fasta2, bed = annotation_filter, sample_name = sample_name, out_dir = args.out, preset = preset, overlap = overlap, gap = gap, flank_len = 500)

    # remove reference lifted annotations that overlap with genome2 TE annotations
    te_lift_filter = args.out + "/" + sample_name + ".lift.nonref.bed"
    with open(te_lift_filter, "w") as output:
        subprocess.call(["bedtools", "intersect", "-a", te_lift, "-b", args.annotation2, "-v"], stdout = output)
    lift_set = get_set(te_lift_filter)
    out_line = "non-ref TEs lifted from genome1 to genome2: " + str(len(lift_set))
    write_report(stat_summary, out_line)

    # overlap
    overlap = args.out + "/" + sample_name + ".overlap.bed"
    window = 3
    with open(overlap, "w") as output:
        subprocess.call(["bedtools", "window", "-w", str(window), "-a", te_lift_filter, "-b", telr_filter], stdout = output)
    share_lift_set = set()
    share_telr_contig_set = set()
    with open(overlap, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            if entry[3].lower() in entry[9].lower():
                share_lift_set.add('_'.join(entry[0:4]))
                share_telr_contig_set.add(entry[11])
    out_line = "lifted non-ref TEs supported by TELR predictions: " + str(len(share_lift_set))
    write_report(stat_summary, out_line)
    lift_only_set = lift_set.difference(share_lift_set)
    out_line = "non-ref TEs predicted by TELR supported by liftover set: " + str(len(share_telr_contig_set))
    write_report(stat_summary, out_line)
    lift_only_set = lift_set.difference(share_lift_set)
    out_line = "non-ref TEs only in liftover set: " + str(len(lift_only_set))
    write_report(stat_summary, out_line)
    telr_only_contig_set = telr_contig_set.difference(share_telr_contig_set)
    out_line = "non-ref TEs only in TELR set: " + str(len(telr_only_contig_set))
    write_report(stat_summary, out_line)

    rm_file(overlap)

    ################## TELR flanks mapped to genome 1 ###################
    print("Lift flanking sequences from TELR contigs to genome 1...")
    out_line = "\n### stats by liftover TEs from TELR contigs to genome1 and compare with genome1 annotations ###"
    write_report(stat_summary, out_line)

    # generate bed file
    contig_te_strand_dict = dict()
    with open(telr_family, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            contig_te_strand_dict[entry[0]] = entry[4]

    contig_te_bed = args.out + "/" + prefix + ".contig.te.filter.bed"
    with open(contig_te_bed, "w") as output, open(telr_meta, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            if entry[5] in telr_contig_set:
                family = entry[3]
                contig_name = re.sub(":.*", "", entry[5])
                coord = re.sub(".*:", "", entry[5])
                start = coord.split('-')[0]
                end = coord.split('-')[1]
                strand = contig_te_strand_dict[contig_name]
                out_line = '\t'.join([contig_name, start, end, family, strand])
                output.write(out_line + '\n')
    
    sample_name = prefix
    overlap = 20
    gap = 30000
    flank_lift, flank_lift_meta = liftover.lift_annotation(fasta1 = telr_contigs, fasta2 = args.fasta1, bed = contig_te_bed, sample_name = sample_name, out_dir = args.out, preset = preset, overlap = overlap, gap = gap, flank_len = 500)

    telr_lift_contig_set = set()
    with open(flank_lift_meta, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            telr_lift_contig_set.add(entry[5])

    telr_unlift_set = telr_contig_set.difference(telr_lift_contig_set)
    out_line = "TEs can not be lifted from TELR contigs to genome1: " + str(len(telr_unlift_set))
    write_report(stat_summary, out_line)

    out_line = "TEs lifted from TELR contigs to genome1: " + str(len(telr_lift_contig_set))
    write_report(stat_summary, out_line)

    # overlap
    overlap = args.out + "/" + sample_name + ".genome1.overlap.bed"
    window = 3
    with open(overlap, "w") as output:
        subprocess.call(["bedtools", "window", "-w", str(window), "-a", flank_lift_meta, "-b", annotation_filter], stdout = output)
    share_telr_lift_contig_set = set()
    with open(overlap, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            family_telr_lift_genome1 = entry[3].lower()
            family_annotation_genome1 = entry[10].lower()
            if family_annotation_genome1 in family_telr_lift_genome1:
                share_telr_lift_contig_set.add(entry[5])
    
    out_line = "TEs lifted from TELR contigs supported by genome 1 annotation: " + str(len(share_telr_lift_contig_set))
    write_report(stat_summary, out_line)
    telr_lift_only_set = telr_lift_contig_set.difference(share_telr_lift_contig_set)
    out_line = "TEs lifted from TELR contigs not supported by genome 1 annotation: " + str(len(telr_lift_only_set))
    write_report(stat_summary, out_line)

    rm_file(overlap)

    #TODO compare sequence quality for all the overlapped?

    # for TELR only and unlifted flanks, map the contig to dm6 and do dnadiff comparison
    print("For TELR unlifted flanks and lifted flanks without genome1 annotation support, map contig to genome1")
    telr_check_set = telr_unlift_set.union(telr_lift_only_set)
    # out_line = "TELR TEs not lifted or not supported by genome1 annotation: " + str(len(telr_check_set))
    # write_report(stat_summary, out_line)

    # then, extract contigs and map to dm6
    telr_check_contig_list = args.out + "/" + sample_name + ".check.contig.txt"
    telr_check_contig_set = set()
    with open(telr_check_contig_list, "w") as output:
        for item in telr_check_set:
            contig = item.split(':')[0]
            telr_check_contig_set.add(contig)
            output.write(contig + '\n')
    
    telr_check_contigs = args.out + "/" + sample_name + ".check.contig.fa"
    with open(telr_check_contigs, "w") as output:
        subprocess.call(["seqtk", "subseq", telr_contigs, telr_check_contig_list], stdout = output)
    rm_file(telr_check_contig_list)

    telr_check_align = args.out + "/" + sample_name + ".check.contig.paf"
    with open(telr_check_align, "w") as output:
        subprocess.call(["minimap2", "-cx", preset, "-v", "0", "--secondary=no", args.fasta1, telr_check_contigs], stdout = output)
    rm_file(telr_check_contigs)

    # select full length mapped contig, compare using dnadiff and make mummer plot
    telr_check_align_contig_set = set()
    telr_check_dir = args.out + '/' + 'compare_telr_contig_genome1'
    mkdir(telr_check_dir)
    with open(telr_check_align, "r") as input:
        for line in input:
            entry = line.replace('\n', '').split("\t")
            contig_telr = entry[0].split('_')[0]
            if contig_telr == entry[5] and (int(entry[1]) * 0.8) < (int(entry[3]) - int(entry[2])):
                telr_check_align_contig_set.add(entry[0])
                genome1_extract_fa = telr_check_dir + "/" + '_'.join([entry[5], entry[7], entry[8]]) + '.genome1.fa'
                extract_seq(args.fasta1, entry[5], entry[7], entry[8], genome1_extract_fa)
                contig_fa = telr_check_dir + '/' + entry[0] + '.contig.fa'
                with open(contig_fa, "w") as output:
                    command = "samtools faidx " + telr_contigs + " " + entry[0]
                    subprocess.call(command, shell = True, stdout = output)
                # dnadiff
                prefix_mummer = telr_check_dir + '/' + entry[0]
                subprocess.call(["dnadiff", contig_fa, genome1_extract_fa, "-p", prefix_mummer], stderr = subprocess.DEVNULL)
                mdelta = prefix_mummer + '.mdelta'
                subprocess.call(["mummerplot", "-p", prefix_mummer, "-s", "medium", "-f", "--png", mdelta, "--color"], stdout = subprocess.DEVNULL, stderr = subprocess.STDOUT)
                clean_mummer(prefix_mummer)
                rm_file(genome1_extract_fa)
                rm_file(contig_fa)
                
    telr_no_align_set = telr_check_contig_set.difference(telr_check_align_contig_set)
    rm_file(telr_check_align)

    # generate contig summary file
    contig_summary = args.out + "/" + "contig.summary.tsv"
    with open(contig_summary, "w") as output:
        out_line = '\t'.join(["contig_name", "genome2_support", "genome1_support"])
        output.write(out_line + '\n')
        for ins in telr_contig_set:
            if ins in share_telr_contig_set:
                genome2_support = "yes"
            else:
                genome2_support = "no"
            if ins in telr_lift_contig_set:
                if ins in share_telr_lift_contig_set:
                    genome1_support = "yes"
                else:
                    genome1_support = "no"
            else:
                genome1_support = "TELR_flank_unlifted"
            contig_name = ins.split(':')[0]
            out_line = '\t'.join([contig_name, genome2_support, genome1_support])
            output.write(out_line + '\n')
    
    # generate stats for contig summary file and write in final stats report
    out_line = "\n### stats by comparing two evaluation methods ###"
    write_report(stat_summary, out_line)
    out_line = "within " + str(len(telr_only_contig_set)) + " non-ref TEs in TELR set not in liftover TE set in genome2"
    write_report(stat_summary, out_line)
    num = len(telr_only_contig_set.intersection(share_telr_lift_contig_set))
    out_line = str(num) + " TEs can be lifted to genome1 and supported by genome1 annotations (real support insertions)"
    write_report(stat_summary, out_line)
    num = len(telr_only_contig_set.intersection(telr_lift_only_set))
    out_line = str(num) + " TEs can be lifted to genome1 but not supported by genome1 annotations (potential new insertions in dataset relative to genome1 or misannotation in genome1)"
    write_report(stat_summary, out_line)
    num = len(telr_only_contig_set.intersection(telr_unlift_set))
    out_line = str(num) + " TEs can be not be lifted to genome1 (potential false positive or new insertions in dataset relative to genome1)"
    write_report(stat_summary, out_line)
    
    print("evaluation workflow finished!")
Пример #15
0
def join_runs(run_list, joined_fname_root):

    try:
        shutil.__name__
        deque.__name__
    except NameError:
        import shutil
        from collections import deque

    # Create the directory structure
    mkdir(joined_fname_root + '/')
    mkdir(joined_fname_root + '/chains/')
    mkdir(joined_fname_root + '/results/')
    mkdir(joined_fname_root + '/plots/')

    # Do some checks to ensure that the runs we are joining are compatible with
    # each other...
    for param in [
            'problem_name', 'n_avg', 'flag_autocor_on_file',
            'flag_no_map_write', 'n_P', 'P_name', 'P_limit_min', 'P_limit_max',
            'n_DS_arrays_total', 'n_DS'
    ]:
        check_param_compatibility(run_list, param)

    # Copy the run.dat file for the first run as our new run.dat file
    shutil.copy(run_list[0].filename_root + '/run.dat', joined_fname_root)

    # Read in the chain config files and work out the values for our new file
    n_iterations = 0
    n_iterations_burn = None
    chain_list = deque()
    for run in run_list:
        for i_chain in xrange(run.n_chains):
            chain = Chain(run, i_chain)
            chain_list.append(chain)
            if n_iterations_burn == None:
                n_iterations_burn = chain.n_iterations_burn
                n_iterations += chain.n_iterations
            else:
                n_iterations += chain.n_iterations - chain.n_iterations_burn
    covariance_matrix = chain_list[0].covariance_matrix
    temp = chain_list[0].temp

    # Write the new chain config file
    fout = open(joined_fname_root + '/chains/chain_config_%06d.dat' % (0),
                'wb')
    np.array([n_iterations], dtype=np.int32).tofile(fout)
    np.array([n_iterations_burn], dtype=np.int32).tofile(fout)
    np.array([temp], dtype=np.float64).tofile(fout)
    covariance_matrix.flatten().tofile(fout)
    fout.close()

    # Concatenate all of the chain stats files together
    fout = open(joined_fname_root + '/chains/chain_stats_%06d.dat' % (0), 'wb')
    first_burn = True
    for i_run, run in enumerate(run_list):
        for i_chain in xrange(run.n_chains):
            fin = open(
                run.filename_root + '/chains/chain_stats_%06d.dat' % (i_chain),
                'rb')
            if first_burn == False:
                # Seek past the burn of this chain
                n_iterations_burn = chain_list[i_run +
                                               i_chain].n_iterations_burn
                byte_seek = (8 * run.n_P * 12) + (8 * 6)
                if run.flag_autocor_on_file == 1:
                    byte_seek += (run.n_avg - 1) * 8
                fin.seek(byte_seek * n_iterations_burn, 1)
            else:
                first_burn = False
            shutil.copyfileobj(fin, fout)
            fin.close()
    fout.close()

    # Concatenate all of the chain trace files together
    fout = open(joined_fname_root + '/chains/chain_trace_%06d.dat' % (0), 'wb')
    first_burn = True
    for run in run_list:
        for i_chain in xrange(run.n_chains):
            fin = open(
                run.filename_root + '/chains/chain_trace_%06d.dat' % (i_chain),
                'rb')
            if first_burn == False:
                # Seek past the burn of this chain
                chain = chain_list[i_run + i_chain]
                n_iterations_burn = chain.n_iterations_burn
                n_avg = run.n_avg
                byte_seek = 1 + 8 + (8 * run.n_P)
                if run.flag_no_map_write == 0:
                    for i_DS in xrange(run.n_DS):
                        byte_seek += 8 * run.n_M[i_DS]
                fin.seek(byte_seek * n_iterations_burn * n_avg, 1)
            else:
                first_burn = False
            shutil.copyfileobj(fin, fout)
            fin.close()
    fout.close()
Пример #16
0
 def __make_dir__(self):
     utility.mkdir(self.screenShot)
     utility.mkdir(self.screenShot_keys)
     utility.mkdir(self.screenShot_result)
     utility.mkdir(self.__my_dict__)
Пример #17
0
from bs4 import BeautifulSoup

import utility

from scraper import Story, HTMLFormatter
from utility import Settings

settings_file = "settings.dat"
settings = Settings(settings_file)

# http://forums.spacebattles.com/threads/survival-of-the-fittest-worm-si.297753/
# http://forums.spacebattles.com/threads/to-go-a-viking-asoiaf-au.294304/

url = easygui.enterbox("URL:", "SBDownloader")

if not utility.is_site_down(url):
    doc = utility.get_page(url)
else:
    doc = utility.load("test.html")

soup = BeautifulSoup(doc, "html.parser")

story = Story.parse(soup)
story.download_messages()

fmt = HTMLFormatter()
doc = fmt.export_story(story, settings.author_only)
utility.mkdir(settings.download_path)
utility.save(doc, os.path.join(settings.download_path, story.clean_title+".html"))

settings.store()
Пример #18
0
    def __enter__(self):
        utility.mkdir(self.imageFolderName)

        return self
Пример #19
0
def get_B_C_breadthFirst(i_view, cf_getBC):
    time_calcu = False

    begin = datetime.datetime.now()

    B = []
    C = []

    if cf_getBC.shape_or_pose == "sil_for_deep":
        postfix = "jpg"
    else:
        postfix = "bmp"

    image = get8ImageFromImage(path=cf_getBC.path_video,
                               SID=cf_getBC.i_frame,
                               views=[i_view],
                               postfix=postfix)
    if time_calcu == True:
        print("\ni_frame: %d" % cf_getBC.i_frame)
        print("Time of getting 1 image: " +
              str((datetime.datetime.now() - begin)))

    cf_getBC.i_view = i_view
    cluster_quad_breadthFirst(im=image[0],
                              leftup=cf_getBC.leftup,
                              rightdown=cf_getBC.rightdown,
                              B=B,
                              C=C,
                              cf_getBC=cf_getBC)
    if time_calcu == True:
        print("Time of clustering 1 image: " +
              str((datetime.datetime.now() - begin)))

    # write to file
    if 1:
        pathSave_imageClustered = cf.pathVideo_ + "v%d/" % i_view
        utility.mkdir(pathSave_imageClustered)

        # max_cluster = 5000

        file = open(
            pathSave_imageClustered + "image%d_B.txt" % cf_getBC.i_frame, 'w')
        # i_cluster = 0
        for fp in B:
            file.write(str(fp))
            file.write('\n')
            # i_cluster = i_cluster + 1
            # if i_cluster == max_cluster:
            #     break
        file.close()

        file = open(
            pathSave_imageClustered + "image%d_C.txt" % cf_getBC.i_frame, 'w')
        # i_cluster = 0
        for fp in C:
            file.write(str(fp))
            file.write('\n')
            # i_cluster = i_cluster + 1
            # if i_cluster == max_cluster:
            #     break
        file.close()
    if time_calcu == True:
        print("Time of saving 1 cluster of image: " +
              str((datetime.datetime.now() - begin)))

    return