Пример #1
0
def downloadFiles(url):

    if not os.path.exists(FILE_TMP_FOLDER):
        os.makedirs(FILE_TMP_FOLDER)
    else:
        print("[INFO] Clearing temporary download directory")
        for the_file in os.listdir(FILE_TMP_FOLDER):
            file_path = os.path.join(FILE_TMP_FOLDER, the_file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e)

    fileList = []
    filename = FILE_TMP_FOLDER + "/downloaded.fa.gz"

    print("[INFO] Downloading file {} from remote source".format(url))
    urllib.urlretrieve(FILE_TO_DOWNLOAD, filename)

    print("[INFO] Unpacking gz archive")
    gunzip(filename)

    for file in os.listdir(FILE_TMP_FOLDER):
        if file.endswith(".fa"):
            fileList.append(os.path.join(FILE_TMP_FOLDER, file))

    print("[INFO] Detected {} fasta files".format(len(fileList)))
    return fileList
Пример #2
0
def genome_download(name, output_path):
    path = ''.join([output_path + name.replace(" ", "_")])
    os.makedirs(path)
    ngd.download(group="bacteria",
                 genus=name,
                 file_format="fasta",
                 parallel=10,
                 dry_run=True)
    ngd.download(group="bacteria",
                 genus=name,
                 file_format="fasta",
                 parallel=10,
                 dry_run=False,
                 output=path)
    files = []
    for r, d, f in os.walk(path):
        for file in f:
            if '.gz' in file:
                files.append(os.path.join(r, file))

    for f in files:
        sh.gunzip(f)

    files2 = []
    for r, d, f in os.walk(path):
        for file in f:
            if '.fna' in file:
                files2.append(os.path.join(r, file))

    out = ''.join([output_path + "/" + name.replace(" ", "_") + ".fasta"])
    sh.cat(files2, _out=out)
    return path
Пример #3
0
def download_epgs(result):
    logger.info('download_epgs()')
    index = 1
    downloaded_list = []
    for url in tv_epg_urls:
        file_result = []
        file_result.append("epg #" + str(index))
        try:
            file_name = 'epg-' + str(index) + '.xml.gz'
            file_name = download_file(url, file_name, file_result)

            if file_name.endswith('.gz'):
                xml_file_name = file_name.replace('.gz', '')
                if os.path.exists(xml_file_name):
                    os.remove(xml_file_name)
                gunzip(file_name)
                file_name = xml_file_name

            downloaded_list.append(file_name)
            result.append(file_result[0] + ", " + file_result[1] + ": " +
                          sizeof_fmt(os.path.getsize(file_name)))
            logger.info('download_epg done, xml size: %s',
                        sizeof_fmt(os.path.getsize(file_name)))
        except Exception as e:
            logger.error('ERROR in download_epg %s', e)
            print(e)
        index = index + 1
    return downloaded_list
Пример #4
0
def regrid_time_series_afni(input, n=2):
    """
    This function upsamples a time series using the afni function 3dUpsample. Before running the
    function, set the afni environment by calling AFNI in the terminal. Output is an upsampled nifti
    time series.
    Inputs:
        *input: time series filename.
        *n: upsampling factor.
        
    created by Daniel Haenelt
    Date created: 20-09-2019           
    Last modified: 19-02-2020
    """
    import os
    from sh import gunzip
    
    clean_unzip = 0
    if os.path.splitext(input)[1] == ".gz":
        gunzip(input)
        clean_unzip = 1
        input = os.path.splitext(input)[0]
        
    # prepare path and filename
    path_file = os.path.dirname(input)
    name_file = os.path.splitext(os.path.basename(input))[0]

    # upsample vaso and bold time series
    os.system("3dUpsample -overwrite -datum short " + \
              "-prefix " + os.path.join(path_file,name_file + "_upsampled.nii") + \
              " -n " + str(n) + " -input " + input)
    
    if clean_unzip:
        os.remove(input)
Пример #5
0
def download_file(language, position):
    location = "https://s3.amazonaws.com/code-search-net/CodeSearchNet/v2/{}.zip".format(language)
    # Get the file from the server
    requested_file = urlopen(location)
    total_length = requested_file.headers.get('content-length')
    if total_length:
        total_length = int(total_length)
        blocksize = max(4096, total_length//100)
    else:
        blocksize = 1000000 
    with open("/tmp/{}.zip".format(language), "wb") as tempzip:
        with tqdm(total=total_length, position=position) as tq:
            tq.set_description("Downloading {}".format(language))
            while True:
                data = requested_file.read(blocksize)
                if not data:
                    break
                tempzip.write(data)
                tq.update(blocksize)
    with ZipFile("/tmp/{}.zip".format(language)) as zf:
        zf.extractall(path="./data/")
    # Finally delete the temp file 
    os.remove("/tmp/{}.zip".format(language))
    # Get all of the zipped files and extract them
    files = []
    for file in glob.glob("data/{}/**/*.gz".format(language), recursive=True):
        files.append(file)
    with tqdm(total=len(files), position=position) as tq:
        tq.set_description("Unzipping {}".format(language))
        for i in files:
            gunzip(i)
            tq.update(1)    
        tq.close()
def disGenData():
	disGenDataDic={}
	disgenFileName='all_gene_disease_associations.tsv'
	disGenURL="https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz"
	filepath = os.getcwd()

	print("Extracting DisGen data, job starts",str(datetime.datetime.now()))
	try:
		urllib.urlretrieve(disGenURL,filepath+'/all_gene_disease_associations.tsv.gz')
		urllib.urlcleanup()
		print("Extracting DisGen data, job done",str(datetime.datetime.now()))
	except:
		print ("Can't able to download all_gene_disease_associations.tsv.gz file!!")

	if os.path.exists(filepath+'/all_gene_disease_associations.tsv'):
		os.remove(filepath+'/all_gene_disease_associations.tsv')
	print("Extracting .gz data, job starts",str(datetime.datetime.now()))
	gunzip(filepath+'/all_gene_disease_associations.tsv.gz')
	print("Extracting .gz data, job done",str(datetime.datetime.now()))
	disgendf= pd.read_csv(disgenFileName, delimiter='\t')
	disGenList=list(disgendf['geneSymbol'].unique())
	for gene in disGenList:
		#tempDisGenID=list(disgendf['geneId'][disgendf['geneSymbol']==gene].unique())[0]
		tempDF=disgendf[['diseaseName','diseaseId']][(disgendf['geneSymbol']==gene) & (disgendf['diseaseType'] =='disease')]
		tempDisNamesInfo=list(zip(tempDF['diseaseName'],tempDF['diseaseId']))
		tempDisNames=[i[0] for i in tempDisNamesInfo]
		tempDisNamesURL=['<a target="_blank" href="https://www.disgenet.org/search/0/'+i[1]+'">'+i[0]+'</a>' for i in tempDisNamesInfo]
		tempDisNames=map(str,tempDisNames)
		if len(tempDisNamesInfo)>0:
			disGenDataDic[gene]=[tempDisNames,tempDisNamesURL]
	dicfile='disGen.obj'
	dicf = open(dicfile, 'wb')
	pickle.dump(disGenDataDic, dicf , pickle.HIGHEST_PROTOCOL)
	dicf.close()
	return dicfile
def download_trace(file_count):
    download_file(TRACE_DIR, SHA_FILE_NAME)

    with open(path.join(TRACE_DIR, SHA_FILE_NAME)) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=' ')
        line_count = 0
        for row in csv_reader:
            trace_file_name = row[1]
            trace_file_name = trace_file_name.replace("*", "")
            splits = trace_file_name.split("/")
            if len(splits) > 0:
                _type = splits[0]
                if _type not in file_type:
                    file_type[_type] = 0
                    try:
                        os.mkdir(path.join(TRACE_DIR, _type))
                    except OSError as error:
                        print("path %s already exists" %
                              (path.join(TRACE_DIR, _type)))
                file_type[_type] = file_type[_type] + 1

            download_file(path.join(TRACE_DIR, _type), trace_file_name)
            if trace_file_name.find(".gz") != -1:
                gunzip(path.join(TRACE_DIR, trace_file_name))
            line_count = line_count + 1
            if file_count != 0 and line_count >= file_count:
                break
    print_trace_info()
    return
Пример #8
0
def get_log_file():
  """Retrieve and extract log file data""" 
  input_log_file = os.path.basename(FLAGS.data_url)
  try: 
    urllib.request.urlretrieve(FLAGS.data_url, input_log_file)
    gunzip(input_log_file)
    return os.path.splitext(input_log_file)[0]
  except Exception as e:
    print('Exception {} when retrieving/extracting log file'.format(e))
Пример #9
0
 def get_data(self, url):
     gz_file_name = url.split("/")[-1]
     gz_file_path = os.path.join(self.dir_name, gz_file_name)  
     file_name = gz_file_name.split(".")[0]
     file_path = os.path.join(self.dir_name, file_name)
     os.makedirs(self.dir_name, exist_ok=True)
     if not os.path.exists(file_path):
         urllib.request.urlretrieve(url, gz_file_path) 
         gunzip(gz_file_path)  
     return file_path
Пример #10
0
    def __getitem__(self, idx):

        folder = self.brats[idx]

        flair = self.brats[idx] + '/' + self.brats[idx][54:] + '_flair.nii.gz'
        t1 = self.brats[idx] + '/' + self.brats[idx][54:] + '_t1.nii.gz'
        t1ce = self.brats[idx] + '/' + self.brats[idx][54:] + '_t1ce.nii.gz'
        t2 = self.brats[idx] + '/' + self.brats[idx][54:] + '_t2.nii.gz'
        seg = self.brats[idx] + '/' + self.brats[idx][54:] + '_seg.nii.gz'

        # unzip if not already unzipped
        try:
            gunzip(flair)
        except:
            pass
        try:
            gunzip(t1)
        except:
            pass
        try:
            gunzip(t1ce)
        except:
            pass
        try:
            gunzip(t2)
        except:
            pass
        try:
            gunzip(seg)
        except:
            pass

        flair = self.brats[idx] + '/' + self.brats[idx][
            54:] + '_flair.nii'  # image files location
        t1 = self.brats[idx] + '/' + self.brats[idx][54:] + '_t1.nii'
        t1ce = self.brats[idx] + '/' + self.brats[idx][54:] + '_t1ce.nii'
        t2 = self.brats[idx] + '/' + self.brats[idx][54:] + '_t2.nii'
        seg = self.brats[idx] + '/' + self.brats[idx][54:] + '_seg.nii'

        img, seg, seg_orig = read_img(flair=flair,
                                      t1=t1,
                                      t1ce=t1ce,
                                      t2=t2,
                                      seg=seg)

        sample = {
            'img': img,
            'mask': seg.type(torch.ByteTensor),
            'seg_orig': seg_orig.type(torch.ByteTensor)
        }

        if self.transform:
            sample = self.transform(sample)

        return sample
Пример #11
0
  def postClone(self, cloned_files, target_dir, version):
    """
    Extracts the downloaded assembly.

    .. versionadded:: 0.3.0
    """
    # GZIP and TAR the file and save to the target directory
    for f in cloned_files:
      sh.gunzip(f)

    return 0
Пример #12
0
    def postClone(self, cloned_files, target_dir, version):
        """
    Extracts the compressed archives.

    .. versionadded:: 0.3.0
    """
        # GZIP the files (and remove the archive)
        for f in cloned_files:

            # Only some of the files needs to be gzipped
            if f.endswith(".gz"):
                sh.gunzip(f)
Пример #13
0
  def postClone(self, cloned_files, target_dir, version):
    """
    Extracts the compressed archives.

    .. versionadded:: 0.3.0
    """
    # GZIP the files (and remove the archive)
    for f in cloned_files:

      # Only some of the files needs to be gzipped
      if f.endswith(".gz"):
        sh.gunzip(f)
Пример #14
0
  def postClone(self, cloned_files, target_dir, version):
    """
    .. versionadded:: 0.3.0
    """
    # Start by extracting all the files
    for f in cloned_files:
      # GunZIP the file (and remove the archive)
      sh.gunzip(f)

    # Then let's concat them
    target_path = "{}/NCBI.Homo_sapiens.fa".format(target_dir)
    # Remove ".gz" ending to point to extracted files
    cat_args = [f[:-3] for f in cloned_files]

    # Execute the concatenation in the background and write to the target path
    sh.cat(*cat_args, _out=target_path, _bg=True)
Пример #15
0
def make_fastq_list(directory):

    fastqs = []

    # make sure the dir exists
    if not os.path.isdir(directory):
        log.warn("%s is not a valid dir, exiting", directory)
        raise SystemExit

    directory = os.path.abspath(directory)
    log.info("Reading %s for fastqs", directory)

    # see if there are any compressed files
    gz_blob = os.path.join(directory, "*.fastq.gz")
    gzs = glob.glob(gz_blob)

    for gz in gzs:
        log.info("gunzipping %s", gz)

        # 0 == all good, and bool false
        if sh.gunzip(gz):
            log.warn("gunzipping %s failed, exiting", gz)
            raise SystemExit

    # now glob the fastqs
    blob = os.path.join(directory, "*.fastq")
    fastqs.extend(glob.glob(blob))

    # make sure we got stuff
    if len(fastqs) == 0:
        log.warn("Fastq list is empty, exiting")
        raise SystemExit

    return fastqs
Пример #16
0
  def postClone(self, cloned_files, target_dir, version):
    """
    .. versionadded:: 0.3.0
    """
    # Start by extracting all the files
    for f in cloned_files:
      # GunZIP the file (and remove the archive)
      sh.gunzip(f)

    # Then let's concat them
    target_path = "{}/Genbank.Homo_sapiens.fa".format(target_dir)
    # Remove ".gz" ending to point to extracted files
    cat_args = [f[:-3] for f in cloned_files]

    # Execute the concatenation in the background and write to the target path
    sh.cat(*cat_args, _out=target_path)
Пример #17
0
def make_fastq_list(directory):

    fastqs = []

    # make sure the dir exists
    if not os.path.isdir(directory):
        log.warn( "%s is not a valid dir, exiting", directory)
        raise SystemExit 

    directory = os.path.abspath(directory)
    log.info("Reading %s for fastqs", directory)

    # see if there are any compressed files
    gz_blob = os.path.join(directory, "*.fastq.gz")
    gzs = glob.glob(gz_blob)

    for gz in gzs:
        log.info("gunzipping %s", gz)   

        # 0 == all good, and bool false
        if sh.gunzip(gz):
            log.warn("gunzipping %s failed, exiting", gz)
            raise SystemExit

    # now glob the fastqs
    blob = os.path.join(directory, "*.fastq")
    fastqs.extend(glob.glob(blob))
    
    # make sure we got stuff
    if len(fastqs) == 0:
        log.warn("Fastq list is empty, exiting")
        raise SystemExit

    return fastqs
Пример #18
0
def unzip_all(dpath = '/home/elmirakh/sat_images/'):
    """Массовая распаковка"""
    zip_dirs = [dpath + x + '/' for x in os.listdir(dpath)]
    cnt = 0
    for zdir in tqdm(zip_dirs):
        for path_to_zip_file in os.listdir(zdir):
            if not path_to_zip_file.endswith('.gz'):
                continue
            path_to_zip_file = os.path.join(zdir,path_to_zip_file)
            try:
                gunzip(path_to_zip_file)
                cnt += 1
            except ErrorReturnCode:
                tqdm.write(path_to_zip_file)
                os.remove(path_to_zip_file)
    tqdm.write('Unzipped {} files succesfully'.format(cnt))
    return 
Пример #19
0
def encsr_encff(args):
    encff_time = time.time()
    filePath = './bedAuto/jsonENCSR/'
    encsrDict = dict()
    encffNames = []
    encffLinks = []

    encsrNameList, encsrLinkList = parseSearch(args[0])

    # getting encffs from jsons of encsrs
    # only encffs matching below criteria will be extracted from jsons of encsrs
    # file type: bed narrowPeak, output type: replicated peaks, assembly: GRCh38
    for encsrName, encsrLink in zip(encsrNameList, encsrLinkList):
        r = requests.get(encsrLink, allow_redirects=True)
        filePathCurrent = filePath + encsrName + '.json'
        with open(filePathCurrent, 'wb') as f:
            f.write(r.content)
        with open(filePathCurrent, 'r') as f:
            encsrDict[encsrName] = json.load(f)
        infoFiltered = encsrDict[encsrName]['files']
        for element in range(len(infoFiltered)):
            if infoFiltered[element][
                    'file_type'] == 'bed narrowPeak' and infoFiltered[element][
                        'output_type'] == 'replicated peaks':
                if infoFiltered[element]['assembly'] == 'GRCh38':
                    encffNames.append(
                        infoFiltered[element]['cloud_metadata']['url'][-18:])
                    encffLinks.append(
                        infoFiltered[element]['cloud_metadata']['url'])

    print('parse for encff --- %.2f seconds ---' % (time.time() - encff_time))

    download_time = time.time()
    encffPath = './bedAuto/filesBed/'

    # downloading and unzipping bed files
    for name, link in zip(encffNames, encffLinks):
        r = requests.get(link, allow_redirects=True)
        encffPathCurrent = encffPath + name
        with open(encffPathCurrent, 'wb') as f:
            f.write(r.content)
        gunzip(encffPathCurrent)

    print('download and unzip bed files --- %.2f seconds ---' %
          (time.time() - download_time))
def downloadUniprotIDMapping():
    idmppaingfilepath = os.getcwd()
    print("Extracting mapping data, job starts", str(datetime.datetime.now()))
    try:
        urllib.urlretrieve(
            'ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.2015_03.gz',
            idmppaingfilepath + '/idmapping.dat.2015_03.gz')
        urllib.urlcleanup()
        print("Extracting mapping data, job done",
              str(datetime.datetime.now()))
    except:
        print("Can't able to download idmapping.dat.2015_03.gz file!!")

    if os.path.exists(idmppaingfilepath + '/idmapping.dat.2015_03'):
        os.remove(idmppaingfilepath + '/idmapping.dat.2015_03')
    print("Extracting .gz data, job starts", str(datetime.datetime.now()))
    gunzip(idmppaingfilepath + '/idmapping.dat.2015_03.gz')
    print("Extracting .gz data, job done", str(datetime.datetime.now()))
Пример #21
0
  def postClone(self, cloned_files, target_dir, version):
    """
    Extracts the compressed archives.

    .. versionadded:: 0.3.0
    """
    f = cloned_files[0]

    if self.newer("hg18", version):
      # GZIP and TAR the file and save to the target directory
      sh.tar("-xzf", f, "-C", target_dir)

    else:
      # Rename to ".zip"
      sh.mv(f, f.replace("tar.gz", "zip"))

      # GunZIP the file (and remove the archive)
      sh.gunzip(f)
Пример #22
0
    def postClone(self, cloned_files, target_dir, version):
        """
    Extracts the compressed archives.

    .. versionadded:: 0.3.0
    """
        f = cloned_files[0]

        if self.newer("hg18", version):
            # GZIP and TAR the file and save to the target directory
            sh.tar("-xzf", f, "-C", target_dir)

        else:
            # Rename to ".zip"
            sh.mv(f, f.replace("tar.gz", "zip"))

            # GunZIP the file (and remove the archive)
            sh.gunzip(f)
Пример #23
0
def run(indir, rbs, id_lst):
    assert hasattr(indir, 'mkdir_p')
    assert hasattr(rbs, 'mkdir_p')
    assert hasattr(id_lst, 'mkdir_p')
    names_and_ids = list(csv.DictReader(open(id_lst), delimiter=','))
    snames, sids = zip(*[(v['SampleName'], v['IssueID'])
                         for v in names_and_ids])
    for sid in sids:
        #(rbs / sid).mkdir_p()
        print 'mkdir', rbs / sid
        mkdir_p(rbs / sid)
    old_and_new = newnames_by_dict(indir, rbs, snames, sids)
    for old, new in old_and_new:
        print 'gunzip', old
        print 'symlink', old.stripext(), new
        if not os.path.exists(old.stripext()):
            assert os.path.exists(old)
            sh.gunzip(old)
        os.symlink(old.stripext(), new)
Пример #24
0
def upsample_volume(file_in, file_out, dxyz=[0.4, 0.4, 0.4], rmode="Cu"):
    """
    This function upsamples a nifti volume using the afni function 3dresample. Before running the
    function, set the afni environment by calling AFNI in the terminal. Output is an upsampled nifti
    volume.
    Inputs:
        *file_in: nifti input filename.
        *file_out: nifti output filename.
        *dxyz: array of target resolution in single dimensions.
        *rmode: interpolation methods (Linear, NN, Cu, Bk).
        
    created by Daniel Haenelt
    Date created: 16-12-2019        
    Last modified: 29-05-2020
    """
    import os
    import numpy as np
    from sh import gunzip
    from shutil import copyfile
    from lib.io.get_filename import get_filename

    # get path and file extension of input file
    path_in, _, ext_in = get_filename(file_in)

    # make temporary copy of input file
    tmp = np.random.randint(0, 10, 5)
    tmp_string = ''.join(str(i) for i in tmp)
    file_tmp = os.path.join(path_in, "tmp_" + tmp_string + ext_in)
    copyfile(file_in, file_tmp)

    if os.path.splitext(file_tmp)[1] == ".gz":
        gunzip(file_tmp)
        file_tmp = os.path.splitext(file_tmp)[0]

    # upsample volume
    os.system("3dresample " + \
              "-dxyz " + str(dxyz[0]) + " " + str(dxyz[1]) + " " + str(dxyz[2]) + " " +\
              "-rmode " + str(rmode) + " " + \
              "-inset " + file_tmp + " " + \
              "-prefix " + file_out)

    # remove temporary copy
    os.remove(file_tmp)
Пример #25
0
def download_file(url, filename):

    csvfilename = os.path.splitext(os.path.basename(filename))[0]
    if not os.path.isfile(csvfilename):
        print('Downloading File')
        response = requests.get(url)

        if response.status_code == 200:

            with open(filename, 'wb') as file:

                for chunk in response:
                    file.write(chunk)
            gunzip(filename)
            return (csvfilename)

    else:
        print('File exists')
        return (csvfilename)
Пример #26
0
    def download_epg(self, index, url, downloaded_list):
        self.logger.info("download_epg(%s)" % url)
        start_time = time.time()
        file_name = 'epg-' + str(index) + '.xml.gz'
        try:
            file_name = self.download_file(url, file_name)

            if file_name.endswith('.gz'):
                xml_file_name = file_name.replace('.gz', '')
                if os.path.exists(xml_file_name):
                    os.remove(xml_file_name)
                gunzip(file_name)
                file_name = xml_file_name

            downloaded_list.append(file_name)
        except Exception as e:
            self.logger.error('ERROR in download_epg(%s) %s' % (url, e))
        self.logger.info("download_epg(%s), xml size: %s, time: %sms" %
                         (url, self.sizeof_fmt(os.path.getsize(file_name)),
                          time.time() - start_time))
Пример #27
0
    def _unzip_clean(self, hmlfile):
        """
        Sets the typing of this Sample.

        :param typing: The typing of this Sample.
        :type typing: List[Typing]
        """
        gunzip(hmlfile)
        hml_unzipped = ".".join(hmlfile.split(".")[0:len(hmlfile.split("."))-1])
        cmd = "perl -p -i -e 's/<\?X-NMDP-CORRECTION TRUE\?><\?X-NMDP-NOREPORTS\?>//g' " + hml_unzipped
        os.system(cmd)
        cmd4 = "perl -p -i -e 's/<\?xml.+\?>//g' " + hml_unzipped
        os.system(cmd4)
        cmd1 = "perl -p -i -e 's/\?//g' " + hml_unzipped
        os.system(cmd1)
        cmd2 = "perl -p -i -e 's/ns2://g' " + hml_unzipped
        os.system(cmd2)
        cmd3 = "perl -p -i -e 's/:ns2//g' " + hml_unzipped
        os.system(cmd3)
        return hml_unzipped
Пример #28
0
def fetch_encsr_encff(args):
    fetch_encsr_time = time.time()
    file_path = './bedAuto/jsonENCSR/'
    file_path_all = []
    encff_names = []
    encff_links = []
    encsr_names, encsr_links = parseSearch(args[0])

    for encsr_name, encsr_link in zip(encsr_names, encsr_links):
        r = requests.get(encsr_link, allow_redirects=True)
        file_path_current = file_path + encsr_name + '.json'
        file_path_all.append(file_path_current)
        with open(file_path_current, 'wb') as f:
            f.write(r.content)

    num_processes = len(file_path_all)

    with Pool(num_processes) as p:
        encff_names_links = p.map(multi_encsr_encff, file_path_all)

    print('parse for encff --- %.2f seconds ---' %
          (time.time() - fetch_encsr_time))

    download_time = time.time()
    encff_path = './bedAuto/filesBed/'

    for encff_name, encff_link in encff_names_links:
        encff_names.append(str(encff_name)[2:-2])
        encff_links.append(str(encff_link)[2:-2])

    # downloading and unzipping bed files
    for name, link in zip(encff_names, encff_links):
        r = requests.get(link, allow_redirects=True)
        encff_path_current = encff_path + name
        with open(encff_path_current, 'wb') as f:
            f.write(r.content)
        gunzip(encff_path_current)

    print('download and unzip bed files --- %.2f seconds ---' %
          (time.time() - download_time))
Пример #29
0
    def _setup_mgm(self, ):
        mgm_location = self._find_binary(
            name='gmhmmp',
            options_message='Please select appropriate MetaGeneMark location',
            raise_if_not_found=MetaGeneMarkNotFound)
        mod_file = self._find_mgm_mod_file(dirname(mgm_location))
        gm_key_home = join(self.HOME, '.gm_key')

        if not exists(gm_key_home) or not self._is_gm_key_valid(gm_key_home):
            valid_gm_key = self._find_gm_key()
            if valid_gm_key.endswith(".gz"):
                print('Extracting {} to {}'.format(valid_gm_key, gm_key_home))
                sh.gunzip(valid_gm_key, '-c', _out=gm_key_home)
            else:
                print('Copying {} to {}'.format(valid_gm_key, gm_key_home))
                copy(valid_gm_key, gm_key_home)

        return {
            'bin': mgm_location,
            'mod_path': mod_file,
            # 'valid_key': valid_gm_key,
        }
Пример #30
0
    def decompr_files(dname, fnames):
        """Runs gunzip on a list of files.

        Args:
            dname (str): The Dirname containing files to gunzip.
            fnames (list): list of filenames in dir.

        Examples:
            Pipe.decompr_files('user/inputs/', ['a.gz', 'b.gz'])
        """
        try:
            for fname in fnames:
                if os.path.exists(dname + fname):
                    sh.gunzip(dname + fname)
                elif os.path.exists(dname + os.path.splitext(fname)[0]):
                    return
                else:
                    raise FileNotFoundError(errno.ENOENT,
                                            os.strerror(errno.ENOENT),
                                            dname + fname)
        except BaseException:
            raise
Пример #31
0
def handle_bootimg(filename):
    global KEEPSTUFF
    name = getBasename(filename)
    if (name[:4] in ['boot', 'hosd', 'BOOT']
            or name[:8] in ['recovery', 'fastboot', 'RECOVERY']
            or name[:9] == 'droidboot' or name[:10] == 'okrecovery'
            or name[-4:] == '.bin'):
        subprocess.run([IMGTOOL, filename, 'extract'])
        os.chdir('extracted')
        format_ = getFormat('ramdisk')
        if (format_ == 'LZ4'):
            subprocess.run(['unlz4', 'ramdisk', 'ramdisk.out'], shell=True)
            subprocess.run(['cat', 'ramdisk.out', '|', 'cpio', '-i'],
                           shell=True)
            os.remove('ramdisk.out')
        elif (format_ == 'gzip'):
            cpio(gunzip('ramdisk', '-c'), '-i')
        rm('ramdisk')
        os.chdir('..')
        find_output = find('extracted',
                           '-print0').stdout.decode('utf-8').splitlines()
        for line in find_output:
            if (os.path.isfile(line)):
                format_ = getFormat('ramdisk')
                if (format_ == 'gzip'):
                    mv(line, line, '.gz')
                    gunzip('-f', line, '.gz')
                    result = at_extract(line)
                else:
                    result = at_extract(line)
                print(line + "processed: " + result)
        if (KEEPSTUFF == 1):
            cp('-r', 'extracted', MY_FULL_DIR + '/' + SUB_DIR + '/' + name)
            chown('-R', EXTUSER + ':' + EXTGROUP,
                  MY_FULL_DIR + '/' + SUB_DIR + '/' + name)
        shutil.rmtree("extracted")
    else:
        handle_binary(filename)
Пример #32
0
def download_ftp_data(address, username, password, files):
    """
    """
    print('connecting to: ', address, '...')
    ftp = ftplib.FTP(address)
    print('logging in...')
    ftp.login(username, password)
    for file in files:
        os.makedirs(os.path.dirname(file[1]), exist_ok=True)
        if ask_me_every_time:
            user_input = input(ftp_prompt.format(file[0]))
            if user_input.lower() != 'y':
                print(ftp_skipping_prompt.format(file[0]))
                continue
        print('downloading: ', file[0], '...')
        ftp.sendcmd("TYPE i")
        size = ftp.size(file[0])
        p_bar = progressbar.AnimatedProgressBar(end=size, width=10)
        with open(file[1] + '.gz', 'wb') as f:

            def callback(chunk):
                f.write(chunk)
                p_bar + len(chunk)
                p_bar.show_progress()

            ftp.retrbinary("RETR " + file[0], callback)
            p_bar + size
            p_bar.show_progress()
        print()
        print('extracting...')
        gunzip(file[1] + '.gz', '-f')
        # add \ to \t because backward compatability is important
        with open(file[1], 'r') as f:
            content = f.read()
        content.replace('\t', '\\t')
        with open(file[1], 'w') as f:
            f.write(content)
        print('done')
Пример #33
0
def write_pair_file(debug_path, pair, run_content, run_filename, run_dir):
    if run_filename.endswith('gz'):
        local_file_name = f'R{pair}.fastq.gz'
    else:
        local_file_name = f'R{pair}.fastq'
    open_operator = open
    with open(debug_path, 'a') as f:
        f.write(
            f'{run_filename} ({local_file_name}) is being handled with {open_operator}\n'
        )
    local_file_path = os.path.join(run_dir, local_file_name)
    with open_operator(local_file_path, 'wb') as f:
        f.write(run_content)

    #avoid double zipping:
    if local_file_path.endswith('.gz'):
        try:
            sh.gunzip(local_file_path)
        except:
            shutil.move(local_file_path, local_file_path[:-3])
            pass

    with open(debug_path, 'a') as f:
        f.write(f'R{pair} was handled successfully\n')
Пример #34
0
def ensembl_data_url(gen_ver, species, dir):

    # Get species specific url
    gene_url, peptide_url = ensembl_url(species, gen_ver)
    # Make file if it doesn't exist
    mkfile(f'{dir}/{species}/ensembl')
    # DL datasets
    gene_fn = wget.download(gene_url, f'{dir}/{species}/ensembl')
    peptide_fn = wget.download(peptide_url, f'{dir}/{species}/ensembl')
    # unzip
    gz_f = lambda x: gunzip(x) if '.gz' in x else None
    gz_f(gene_fn), gz_f(peptide_fn)
    files = os.listdir(f'{dir}/{species}/ensembl/')
    #remove gunzip files
    [
        os.remove(os.path.join(f'{dir}/{species}/ensembl/', file))
        for file in files if file.endswith(".gz")
    ]
    return gene_fn.replace('.gz', ''), peptide_fn.replace('.gz', '')
Пример #35
0
from sh import gunzip
from glob import glob

import re, os, json

OUTDIR="data-unzipped"

if not os.stat(OUTDIR):
    print "created dir ", OUTDIR
    os.makedirs(OUTDIR)

for dataFile in glob("data/*.gz"):
    outFile = os.path.join(OUTDIR, re.search('(\d+).gz$', dataFile).groups()[0])
    if not os.stat(outFile):
        gunzip('-c', dataFile, _out=outFile)
        print "unzipped", dataFile, "to", outFile

# then, go through the data files and create a new JSON file that maps
# mpId [of route] -> { latitude, longitude, grade, protection }

allRoutes = dict()
allAreas = dict()

for dataFile in glob("data-unzipped/*"):
    data = json.load(file(dataFile))
    allRoutes.update(dict([(route['mpId'], route) for route in data['routes']]))
    # parentId of a route gives its area. parent of an area is the parent area.
    areas = dict([(area['id'], area) for area in data['areas']])
    allAreas.update(areas)
    # stupid algorithm to fill in missing latitude/longitude with values for
Пример #36
0
 def un_gzip(self, content):
     return sh.gunzip(_in=content).stdout