示例#1
0
def download_all(vol_num, music_num):
    try:
        all_url = "http://luoo-mp3.kssws.ks-cdn.com/low/luoo/radio" + vol_num + "/" + music_num + ".mp3"
        wget.download(all_url)
        wget.filename_from_url(all_url)
        print("正在下载落网" + vol_num + "期第" + music_num + "首歌曲...")
        print("下载完成!")
    except:
        print("第" + music_num + "首歌曲下载失败")
示例#2
0
def download_music(vol_num, download_num):
    try:
        down_url = "http://luoo-mp3.kssws.ks-cdn.com/low/luoo/radio" + vol_num + "/" + download_num + ".mp3"
        wget.download(down_url)
        wget.filename_from_url(down_url)
        print("正在下载落网" + vol_num + "期第" + download_num + "首歌曲")
        print("下载完成!")
    except:
        print("编号错误!!!")
示例#3
0
def processWget(argDict):
    url = argDict['url']
    savePath = argDict['-p']
    argDict['isrecord'] = False
    #### check dir exists, if not create
    if os.path.exists( savePath ) == False:
        os.mkdir( savePath )
    if os.path.exists( savePath ):
        file_name = wget.filename_from_url(url)
        file_full_path = '%s/%s'%(savePath, file_name)
        if os.path.exists( file_full_path ):
            argDict['file_name'] = file_full_path
            return False
        if file_name.find('.Z') != -1 or file_name.find('gz') != -1:
            file_full_path = '%s/%s'%(savePath, file_name.replace('.Z', ''))
            if file_full_path.find('.gz') != -1:
                file_full_path = '%s/%s'%(savePath, file_name.replace('.gz',''))
            if os.path.exists( file_full_path ):
                argDict['file_name'] = file_full_path
                return False
        argDict['isrecord'] = True
        print '[url=%s, savePath=%s]'%(url, savePath)
        file_name = wget.download(url, savePath)
        argDict['file_name'] = file_name
        return True
    else:
        print 'can not create save path'
        return False
示例#4
0
    def get_titles(self):
        """
        Return a list of titles grabbed from a Topix Newswire page.
        """

        # grab topix content
        filename = wget.filename_from_url(self.topix_url) # get filename
        print "[DEBUG] Downloading from topix..."
        with open(wget.download(self.topix_url)) as raw: # download and open
            content = raw.readlines() # save content as list
            print "[DEBUG] Content saved."
        try:
        	remove(filename) # remove downloaded file, if exist
        except:
        	print "[DEBUG] Cannot download topix page."
        	return 0

        # filter results
        titles = [] # container for titles
        for line in content:
            if "<a t=\"artclick\"" in line:
            	# find and filter out title
                titles.append(self.rmtags(line[:line.find("<img")]).strip())
        pp(titles) # pretty print titles to console

        # return list of titles
        return titles
示例#5
0
    def get_titles(self):
        """
        Return a list of titles grabbed from a Topix Newswire page.
        """

        # grab topix content
        filename = wget.filename_from_url(self.topix_url)  # get filename
        print "[DEBUG] Downloading from topix..."
        with open(wget.download(self.topix_url)) as raw:  # download and open
            content = raw.readlines()  # save content as list
            print "[DEBUG] Content saved."
        try:
            remove(filename)  # remove downloaded file, if exist
        except:
            print "[DEBUG] Cannot download topix page."
            return 0

        # filter results
        titles = []  # container for titles
        for line in content:
            if "<a t=\"artclick\"" in line:
                # find and filter out title
                titles.append(self.rmtags(line[:line.find("<img")]).strip())
        pp(titles)  # pretty print titles to console

        # return list of titles
        return titles
示例#6
0
def _download_graphviz(url, verbose=3):
    """Import example dataset from github.

    Parameters
    ----------
    url : str, optional
        url-Link to graphviz. The default is 'https://erdogant.github.io/datasets/graphviz-2.38.zip'.
    verbose : int, optional
        Print message to screen. The default is 3.

    Returns
    -------
    tuple : (gfile, curpath).
        gfile : filename
        curpath : currentpath

    """
    curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                           'RESOURCES')
    gfile = wget.filename_from_url(url)
    PATH_TO_DATA = os.path.join(curpath, gfile)
    if not os.path.isdir(curpath):
        if verbose >= 3: print('[treeplot] >Downloading graphviz..')
        os.makedirs(curpath, exist_ok=True)

    # Check file exists.
    if not os.path.isfile(PATH_TO_DATA):
        # Download data from URL
        if verbose >= 3: print('[treeplot] >Downloading graphviz..')
        wget.download(url, curpath)

    return (gfile, curpath)
示例#7
0
def get_geo_names(
        url='https://erdogant.github.io/datasets/country_and_code.zip',
        verbose=3):
    """Import dataset from github.

    Parameters
    ----------
    url : str
        url-Link to dataset.
    verbose : int, optional
        Print message to screen. The default is 3.

    Returns
    -------
    tuple containing import status and resources.

    """
    import wget
    import os
    curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url))

    # Check file exists.
    if not os.path.isfile(PATH_TO_DATA):
        if verbose >= 3: print('[googletrends] Downloading resources..')
        wget.download(url, curpath)

    # Extract and import local dataset
    df = pd.read_csv(PATH_TO_DATA)
    df['code'] = df['code'].str.upper()
    # Return
    return df
示例#8
0
def download_resources(url='https://erdogant.github.io/datasets/SVG_MAPS.zip', verbose=3):
    """Import example dataset from github.

    Parameters
    ----------
    url : str
        url-Link to dataset. The default is 'https://erdogant.github.io/datasets/SVG_MAPS.zip'.
    verbose : int, optional
        Print message to screen. The default is 3.

    Returns
    -------
    tuple containing import status and resources.

    """
    import wget
    curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url))

    # Check file exists.
    if not os.path.isfile(PATH_TO_DATA):
        if verbose>=3: print('[worldmap] Downloading resources..')
        wget.download(url, curpath)

    # Extract and import local dataset
    [DIROK, DIRMAP] = _extract_zip_files(PATH_TO_DATA)
    # Return
    return DIROK, DIRMAP
示例#9
0
    def get_image_name(self,
                       image_url: Optional[str] = None,
                       delimiter_key: Optional[str] = None,
                       use_wget: bool = False) -> str:
        """
        Builds image name from URL

        :param image_url: (str) Image URL
        :param delimiter_key: (str) - Token used to determine start of image
                                      name in url
        :param use_wget: (boolean) - For testing purposes or specific need,
                                     force use of wget.

        :return: (str) Name of URL

        """
        image_url = image_url or self.image_url
        delimiter_key = delimiter_key or self.url_split_token

        LOG.debug(f"Image URL: {image_url}")
        if image_url is None:
            msg = 'Image Url is None.'
            self.status = Status.ERROR
            self.image_info.error_info = msg
            LOG.error(msg)
            return ''

        # Build regexp from key
        sig_comp = re.compile(delimiter_key)

        # Check if url has key (try two different ways)
        match = sig_comp.search(image_url)
        if match is not None and not use_wget:
            LOG.debug("Image name found via regex")
            image_name = image_url.split(delimiter_key, 1)[1]

        else:
            LOG.debug("Image name found via wget")
            image_name = wget.filename_from_url(image_url)
            if image_name is not None:
                image_name = image_name.split(delimiter_key, 1)[1]
            LOG.debug(f'Image URL: {image_url}    Image_name: {image_name}   '
                      f'delimiter: {delimiter_key}')

        # Didn't find the url or something bad happened
        if image_name is None:
            msg = f"Unable to get image_name from url: {image_url}"
            self.status = Status.ERROR
            self.image_info.error_info = msg
            LOG.error(msg)

        # Append the extension if it is not present (and image name is not an empty string)
        elif image_name != '' and not image_name.endswith(self.EXTENSION):
            image_name += f'.{self.EXTENSION}'

        LOG.debug(f"Image Name: {image_name}")

        return image_name
示例#10
0
 def getfilename_fromurl(url):
     """Get Destination filename for a downloaded file"""
     # Try to get destination Filename from Content Disposition
     tmprequest = urllib2.urlopen(url)
     filename = wget.filename_from_headers(tmprequest.info())
     # Get filename from url
     if filename == None:
         filename = wget.filename_from_url(url)
     return filename
示例#11
0
def install_lastools():
    file_name = wget.filename_from_url(LASTOOLS_URL)
    if not os.path.exists(BLAST2DEM_EXE):
        print('lastools missing, downloading...')
        with closing(request.urlopen(LASTOOLS_URL)) as r:
            with open(file_name, 'wb') as f:
                shutil.copyfileobj(r, f)
        with zipfile.ZipFile(file_name, "r") as zip_ref:
            zip_ref.extractall("")
        os.remove(file_name)
示例#12
0
    def _get_wav_file(self, ix):
        print(ix)
        pod = self.pods_info[ix]
        pod_name = wget.filename_from_url(pod['content_url'])
        # pod_name = 'a182f2b0-e229-4035-82cd-77a7447068f9_2.wav'
        pod_name = os.path.join(self.config['PODS_DIR'], pod_name)

        # pod_aud = load_audio(pod_name, self.config['SAMPLING_RATE'])
        ads = len(pod['ads'])
        return pod_name, ads
 def get_episode(self, episode_number):
     filename = wget.filename_from_url(self.file_urls[episode_number])
     full_file_path = self.destination_dir + filename
     if (not os.path.isfile(full_file_path)):
         print "Downloading... " + filename
         file_url = self.file_urls[episode_number]
         print file_url
         wget.download(file_url, out=full_file_path)
         print "Done... Next!"
     else:
         print "Skipping ... " + filename
示例#14
0
def get_pdf(link, directory='./'):
    global file_count
    file_name = wget.filename_from_url(link)
    file = s.get(link, stream=True)

    local_file = open(os.path.join(directory, file_name), 'wb')
    local_file.write(file.content)
    local_file.close()

    print("downloaded\x1b[1;32m",file_name,"\x1b[0m")
    file_count += 1
示例#15
0
def guess_ContentType_from_url(url):
    filename = filename_from_url(url)
    filename = filename if filename and url.endswith(
        filename) else None  # 排除连接到php的url
    if filename and "." in filename:
        filetype = filename.rsplit(".", 1)[-1]
        debug_suffix.add(filetype)
    else:
        filetype = "html"
    filetype = "html" if "htm" in filetype else filetype
    return (filename, filetype, -1, -1
            )  # filename, filetype, filesize, connect_time
示例#16
0
def fetch_aal3_vascular_atlas(target_affine=np.diag((5, 5, 5))):
    """ Fetch the AAL3 brain atlas given its resolution.

    Parameters
    ----------
    target_affine : np.array, (default=np.diag((5, 5, 5))), affine matrix for
        the produced Nifti images

    Return
    ------
    mask_full_brain : Nifti Image, full mask brain
    atlas_rois : Nifti Image, ROIs atlas
    """
    data_dir = os.path.join(os.path.expanduser('~'), 'hemolearn_data')
    aal3_dir = os.path.join(data_dir, 'AAL3v1')

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    if not os.path.exists(aal3_dir):
        os.makedirs(aal3_dir)

    url = 'https://www.gin.cnrs.fr/wp-content/uploads/AAL3v1_for_SPM12.zip'
    dest_filename = os.path.join(aal3_dir, wget.filename_from_url(url))

    if not os.path.exists(os.path.join(aal3_dir, 'AAL3')):
        # download files
        wget.download(url, out=aal3_dir)

        # extract files
        with zipfile.ZipFile(dest_filename, 'r') as zip_ref:
            zip_ref.extractall(aal3_dir)

        # clean directory
        cmd = (
            f"find {data_dir} -type f \( -iname \*.m -o "  # noqa: W605
            f"-iname \*.zip -o -iname \*.rtf -o -iname "  # noqa: W605
            f"\*.pdf \) -delete")  # noqa: W605
        subprocess.call(cmd, shell=True, stdout=subprocess.DEVNULL)

    atlas_fname = os.path.join(aal3_dir, 'AAL3', 'AAL3v1.nii.gz')
    atlas_to_return = image.load_img(atlas_fname)

    atlas_to_return = image.resample_img(atlas_to_return,
                                         target_affine,
                                         interpolation='nearest')

    brain_mask = image_nilearn.binarize_img(atlas_to_return, threshold=0)

    return brain_mask, atlas_to_return
示例#17
0
def get_xml():
    url = cfg.GOOGLE_FILE

    filename = wget.filename_from_url(url)

    for file in os.listdir(os.getcwd()):
        if file == filename:
            print('File, ' + file + ', already exists!')
            os.remove(file)
            print('File, ' + file + ', has been removed')

    xml = wget.download(url)

    print('File, ' + filename + ', has been downloaded')
示例#18
0
    def import_example(self, data='USA', verbose=3):
        """Import example dataset from github source.

        Description
        -----------
        Import one of the few datasets from github source.

        Parameters
        ----------
        data : str
            * 'USA'
            * 'RUS'
        verbose : int, (default: 3)
            Print message to screen.

        Returns
        -------
        pd.DataFrame()
            Dataset containing mixed features.

        """
        if data == 'USA':
            url = 'https://erdogant.github.io/datasets/USA_2016_elections.zip'
        elif data == 'RUS':
            url = 'https://erdogant.github.io/datasets/RUS_2018_elections.zip'
        else:
            if verbose >= 3:
                print(
                    '[benfordslaw] >[%s] does not exists. Try "USA" or "RUS" <return>'
                    % (data))

        curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               'data')
        PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url))
        if not os.path.isdir(curpath):
            os.makedirs(curpath, exist_ok=True)

        # Check file exists.
        if not os.path.isfile(PATH_TO_DATA):
            if verbose >= 3:
                print(
                    '[benfordslaw] >Downloading [%s] dataset from github source..'
                    % (data))
            wget.download(url, curpath)

        # Import local dataset
        if verbose >= 3: print('[benfordslaw] >Import dataset [%s]' % (data))
        df = pd.read_csv(PATH_TO_DATA, sep=',')
        # Return
        return df
示例#19
0
def import_example(data='sprinkler', n=10000, verbose=3):
    """Load example dataset.

    Parameters
    ----------
    data : str, (default: sprinkler)
        Pre-defined examples. 
        'titanic', 'sprinkler', 'alarm', 'andes', 'asia', 'pathfinder', 'sachs'
    n : int, optional
        Number of samples to generate. The default is 1000.
    verbose : int, (default: 3)
        Print progress to screen.
        0: None, 1: ERROR, 2: WARN, 3: INFO, 4: DEBUG, 5: TRACE

    Returns
    -------
    df : pd.DataFrame()

    """
    import wget

    url = 'https://erdogant.github.io/datasets/'
    if data=='sprinkler':
        url=url + 'sprinkler.zip'
    elif data=='titanic':
        url=url + 'titanic_train.zip'
    else:
        try:
            DAG = import_DAG(data, verbose=2)
            df = sampling(DAG, n=n, verbose=2)
        except:
            print('[bnlearn] >Oops! Example dataset not found!')
            df = None
        return df

    curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url))
    if not os.path.isdir(curpath):
        os.mkdir(curpath)

    # Check file exists.
    if not os.path.isfile(PATH_TO_DATA):
        if verbose>=3: print('[bnlearn] >Downloading example dataset..')
        wget.download(url, curpath)

    # Import local dataset
    if verbose>=3: print('[bnlearn] >Import dataset..')
    df = pd.read_csv(PATH_TO_DATA)
    return df
示例#20
0
def download_pod(pod_url):
    """
    Download a single pod based on pod_url

    Args:

    Returns:

    """
    fname = wget.filename_from_url(pod_url)
    if not os.path.exists(os.path.join(PODS_DIR, fname)):
        try:
            print("Downloading ", fname)
            wget.download(pod_url, PODS_DIR, bar=None)
        # break
        except Exception as e:
            print(e, fname)
    def get_targz(self):
        archivefile_payload = {'id' : self.pmcid}
        archivefile_locator = requests.get('http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi', params=archivefile_payload)
        record = BeautifulSoup(archivefile_locator.content)

        # parse response for archive file location
        archivefile_url = record.oa.records.record.find(format='tgz')['href']

        archivefile_name = wget.filename_from_url(archivefile_url)
        complete_path_targz = os.path.join(self.parameters["data_dir"], archivefile_name)
        urllib.urlretrieve(archivefile_url, complete_path_targz)
        self.complete_path_targz = complete_path_targz

         # @TODO For some reason, wget hangs and doesn't finish, using
         # urllib.urlretrieve() instead for this for now.
         # archivefile = wget.download(archivefileurl, wget.bar_thermometer)
        self.phase['get_targz'] = True
示例#22
0
文件: utils.py 项目: yenlow/nCoV2019
def get_rawfile_from_github(raw_url: str, outdir: str = None) -> None:
    """
    Download raw script from github. URL should point to the raw script off github

    Args:
        raw_url (str): url of raw script off github (should be publicly accessible)
        outdir (str): directory for saving the script; defaults to current path otherwise
    Example:
        >>> get_rawfile_from_github('https://raw.githubusercontent.com/beoutbreakprepared/nCoV2019/master/entry-checker.R','other_authors')

    """
    from wget import download, filename_from_url
    filename = filename_from_url(raw_url)
    if outdir:
        outpath = outdir + '/' + filename
    else:
        outpath = filename
    download(raw_url, outpath)
示例#23
0
def process_file(tasks, dem_files, dem_hs_files, process_name, options_dict,
                 zf, multiDirectional, tile_pyramid_levels, tar_dir, dem_dir):
    '''
    Download file using wget, extract dem from tar archive, and calculate stats
    '''
    while True:
        url = tasks.get()
        if not isinstance(url, str):
            print('[%s] evaluation routine quits' % process_name)
            # Indicate finished
            dem_files.put(0)
            dem_hs_files.put(0)
            break
        else:
            out_file = join(tar_dir, wget.filename_from_url(url))
            m_file = basename(out_file)
            root, ext = splitext(m_file)
            if ext == '.gz':
                root, ext = splitext(root)
            m_file = join(dem_dir, root + '_reg_dem.tif')
            m_ovr_file = join(m_file, ".ovr")
            m_hs_file = join(dem_dir, root + '_reg_dem_hs.tif')
            m_hs_ovr_file = join(m_hs_file, ".ovr")
            if options_dict['download']:
                if not exists(out_file) or overwrite:
                    print('Processing file {}'.format(url))
                    out_file = wget.download(url, out=tar_dir)
            if options_dict['extract']:
                # Only extract if DEM file does not exists
                # FIXME this extracts all files??
                extract_tar(out_file, dem_dir=dem_dir)
            if options_dict['build_tile_overviews']:
                if not exists(m_ovr_file) or overwrite:
                    calc_stats_and_overviews(m_file, tile_pyramid_levels)
            if options_dict['build_tile_hillshade']:
                if not exists(m_hs_file) or overwrite:
                    create_hillshade(m_file, m_hs_file, zf, multiDirectional)
            if options_dict['build_tile_hillshade_overviews']:
                if not exists(m_hs_ovr_file) or overwrite:
                    calc_stats_and_overviews(m_hs_file, tile_pyramid_levels)
            dem_files.put(m_file)
            dem_hs_files.put(m_hs_file)
    return
示例#24
0
    def get_targz(self):
        try:
            archivefile_payload = {'id' : self.pmcid}
            archivefile_locator = requests.get('http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi', params=archivefile_payload)
            record = BeautifulSoup(archivefile_locator.content)

            # parse response for archive file location
            archivefile_url = record.oa.records.record.find(format='tgz')['href']

            archivefile_name = wget.filename_from_url(archivefile_url)
            complete_path_targz = os.path.join(self.dirs.data_dir, archivefile_name)
            urllib.urlretrieve(archivefile_url, complete_path_targz)
            self.complete_path_targz = complete_path_targz

             # @TODO For some reason, wget hangs and doesn't finish, using
             # urllib.urlretrieve() instead for this for now.
#            archivefile = wget.download(archivefileurl, wget.bar_thermometer)
        except:
            raise ConversionError(message='could not get the tar.gz file from the pubmed', doi=self.doi)
示例#25
0
    def down_info(self):
        assert not self.is_html(), "Can't get down_info from html url!"
        filename = wget.filename_from_headers(
            self.headers) or wget.filename_from_url(self.url)
        filename = urllib.parse.unquote(filename)
        if filename and "." in filename:
            filetype = filename.rsplit(".", 1)[-1]
        else:
            suffix = re.search(r"plain|rar|zip|7z|mobi|epub|application"
                               )  # applicaton/类下的后缀名基本不是文本文件(pdf,doc等不算)
            if suffix:
                suffix = suffix.group()
                suffix = suffix.replace("plain",
                                        "txt").replace("application", "app")
            else:
                assert False, "Unknown filetype:" + self.ContentType
            filetype = None if suffix is None else suffix

        filesize = int(self.headers.get('Content-Length', -1))
        connect_time = self.elapse
        return filename, filetype, filesize, connect_time
示例#26
0
def download_image(url, filepath):
    """Downloads an image, the output filepath is a hash of the url

    :url: The url to download
    :returns: The filename. If an error occured or the url passed does not
    point to an image, None is returned

    """
    check = r'[^/\\&\?]+\.\w{3,4}(?=([\?&].*$|$))'
    match = re.search(check, url)
    if match:
        filename = wget.filename_from_url(url)
        if filepath[-1] != '/':
            filepath += '/'

        wget.download(url,
                      filepath + filename,
                      bar=lambda current, total, width: None)
        return filename

    return None
def downbakupfile(AccessKeyId, AccessKeySecret, args):
    client = AcsClient(AccessKeyId, AccessKeySecret, 'cn-shanghai', timeout=600)
    start_time = args.st
    end_time = args.et

    request = DescribeBackupsRequest()
    request.set_accept_format('json')

    request.set_DBInstanceId(args.i)

    # 新建目录并下载备份文件
    if os.path.exists(args.d):
        pass
    else:
        os.mkdir(args.d)

    # 获取备份总数及每页显示数,计算页数
    request.set_StartTime(start_time)
    request.set_EndTime(end_time)
    response = client.do_action_with_exception(request)
    pages = math.ceil(eval(str(response, encoding='utf-8'))['TotalRecordCount'] / eval(str(response, encoding='utf-8'))['PageRecordCount'])

    for page in range(1, pages + 1):
        request.set_PageNumber(page)
        response = client.do_action_with_exception(request)
        backupdetail = eval(str(response, encoding='utf-8'))['Items']['Backup']

        for i in range(len(backupdetail)):
            bakfile_url = backupdetail[i]['BackupDownloadURL']  # 外网下载地址
            # bakfile_url = backupdetail[i]['BackupIntranetDownloadURL']   # 内网下载地址
            re_result = wget.filename_from_url(bakfile_url)
            bakfile = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), args.d),re_result)
            if os.path.exists(bakfile):
                pass
            else:
                wget.download(bakfile_url, out=bakfile)
                print('下载文件 %s 成功' % bakfile)

    deletefile(args.d)
示例#28
0
    def download_nndb(self):
        """
        Download fullnames from NNDB website, save into fullnames.dat.
        """

        for i in xrange(26):
            print "\n[DEBUG] %d pages left..." % (26 - i)
            # url pattern for all NNDB pages based on last names
            url = "http://www.nndb.com/lists/%d/000063%d" % (493 + i, 304 + i)

            # download page, get raw data.
            fn = wget.filename_from_url(url)
            with open(wget.download(url)) as raw:
                content = raw.readlines()
            os.remove(fn)

            for line in content:
                if "nndb.com/people" in line:
                    name = self.rmtags(line).replace("\n", "")
                    with open(self.nndb_file, 'a') as w:
                        w.write(name + "\n")
        print "[DEBUG] NNDB Download complete!"
示例#29
0
    def download(url: str,
                 path: str,
                 name: str = None,
                 ext: str = None,
                 timestamp: bool = False):
        working_directory = os.getcwd()
        if name is None:
            name = wget.filename_from_url(url)

        if name is None:
            name = str(uuid.uuid4())

        if timestamp:
            name = f'{MediaTools.generate_date_string()}_{name}'

        if not os.path.exists(path):
            os.makedirs(path)

        file_path = os.path.join(path, name)

        filename = wget.download(url, file_path)
        os.chdir(working_directory)
示例#30
0
文件: Tagify.py 项目: dannyyiu/tagify
    def download_nndb(self):
        """
        Download fullnames from NNDB website, save into fullnames.dat.
        """

        for i in xrange(26):
            print "\n[DEBUG] %d pages left..." % (26 - i)
            # url pattern for all NNDB pages based on last names
            url = "http://www.nndb.com/lists/%d/000063%d" % (493 + i, 304 + i)

            # download page, get raw data.
            fn = wget.filename_from_url(url)
            with open(wget.download(url)) as raw:
                content = raw.readlines()
            os.remove(fn)

            for line in content:
                if "nndb.com/people" in line:
                    name = self.rmtags(line).replace("\n", "")
                    with open(self.nndb_file, 'a') as w:
                        w.write(name + "\n")
        print "[DEBUG] NNDB Download complete!"
示例#31
0
def import_example(data='titanic', verbose=3):
    """Import example dataset from github source.

    Parameters
    ----------
    data : str, optional
        Name of the dataset 'sprinkler' or 'titanic' or 'student'.
    verbose : int, optional
        Print message to screen. The default is 3.

    Returns
    -------
    pd.DataFrame()
        Dataset containing mixed features.

    """
    if data == 'sprinkler':
        url = 'https://erdogant.github.io/datasets/sprinkler.zip'
    elif data == 'titanic':
        url = 'https://erdogant.github.io/datasets/titanic_train.zip'
    elif data == 'student':
        url = 'https://erdogant.github.io/datasets/student_train.zip'

    curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url))
    if not os.path.isdir(curpath):
        os.mkdir(curpath)

    # Check file exists.
    if not os.path.isfile(PATH_TO_DATA):
        if verbose >= 3:
            print('[pca] >Downloading example dataset from github source..')
        wget.download(url, curpath)

    # Import local dataset
    if verbose >= 3: print('[pca] >Import dataset [%s]' % (data))
    df = pd.read_csv(PATH_TO_DATA)
    # Return
    return df
示例#32
0
    def get_targz(self):
        # make request for archive file location
        archivefile_payload = {'id': self.pmcid}
        archivefile_locator = requests.get(
            'http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi',
            params=archivefile_payload)
        record = BeautifulSoup(archivefile_locator.content)
        # parse response for archive file location
        archivefile_url = record.oa.records.record.find(format='tgz')['href']
        archivefile_name = wget.filename_from_url(archivefile_url)
        complete_path_targz = os.path.join(self.parameters["data_dir"],
                                           archivefile_name)

        # @TODO For some reason, wget hangs and doesn't finish
        # archivefile = wget.download(archivefileurl, wget.bar_thermometer)
        # Using urllib.urlretrieve() instead of wget for now:

        # Download targz
        urllib.urlretrieve(archivefile_url, complete_path_targz)
        self.complete_path_targz = complete_path_targz

        self.phase['get_targz'] = True
示例#33
0
def get_project(browser, project, to_git=False):
    name = project.rpartition("/")[-1]
    if name.startswith("exam"):
        return

    if not os.path.isdir(name):
        os.mkdir(name)
    os.chdir(name)

    browser.get(project)
    while not page_is_loaded(browser, "projects"):
        continue

    for links in browser.find_elements_by_tag_name("a"):
        link = links.get_attribute("href")
        if link.startswith("https://ceph.assistants.epita.fr/") and not os.path.exists(wget.filename_from_url(link)):
            wget.download(link)
    while rows(browser) is True:
        for links in browser.find_elements_by_tag_name("a"):
            link = links.get_attribute("href")
            if link.startswith("https://ceph.assistants.epita.fr/") and not os.path.exists(wget.filename_from_url(link)):
                try:
                    wget.download(link)
                except Exception as e:
                    pass
                    print()
                    print(e)
                    print("Error:")
                    print(link)
                    print(wget.filename_from_url(link))
                    print()

    if to_git:
        for git_links in browser.find_elements_by_tag_name("input"):
            git_link = git_links.get_attribute("value")
            if git_link.startswith("git@"):
                git.Git(".").clone(git_link)

    os.chdir("../")
示例#34
0
#!/usr/bin/env python3

import os, wget
from ase import Atom, Atoms
from ase.build import bulk
from ase.calculators.lammpsrun import LAMMPS  #基于文件的ASE计算器
from ase.calculators.lammpslib import LAMMPSlib  #基于LAMMPS原生的Python接口

# 势函数下载
url = "https://openkim.org/files/MO_418978237058_005/NiAlH_jea.eam.alloy"
pot_fname = wget.filename_from_url(url)
if not os.path.exists(pot_fname):
    pot_fname = wget.download(url)

# 模型构建
Ni = bulk('Ni', cubic=True)
H = Atom('H', position=Ni.cell.diagonal() / 2)
NiH = Ni + H
NiH.pbc = True

# 开始计算
lammps = LAMMPS(files=[pot_fname],
                parameters={
                    'pair_style': 'eam/alloy',
                    'pair_coeff': ['* * {} H Ni'.format(pot_fname)]
                })
lammps.set(command="/usr/bin/lmp")

NiH.calc = lammps
print("Energy ", NiH.get_potential_energy())
示例#35
0
def _import_example(data='2dpeaks', url=None, sep=';', verbose=3):
    """Import example dataset from github source.

    Description
    -----------
    Import one of the few datasets from github source or specify your own download url link.

    Parameters
    ----------
    data : str
        Name of datasets: "2dpeaks" or "2dpeaks_image"
    url : str
        url link to to dataset.
    Verbose : int (default : 3)
        Print to screen. 0: None, 1: Error, 2: Warning, 3: Info, 4: Debug, 5: Trace.

    Returns
    -------
    pd.DataFrame()
        Dataset containing mixed features.

    """
    if url is not None:
        data = wget.filename_from_url(url)
    elif data == '2dpeaks_image':
        url = 'https://erdogant.github.io/datasets/' + data + '.png'
    elif data == '2dpeaks':
        url = 'https://erdogant.github.io/datasets/' + data + '.zip'
    elif data == '1dpeaks':
        x = [
            0, 13, 22, 30, 35, 38, 42, 51, 57, 67, 73, 75, 89, 126, 141, 150,
            200
        ]
        y = [
            1.5, 0.8, 1.2, 0.2, 0.4, 0.39, 0.42, 0.22, 0.23, 0.1, 0.11, 0.1,
            0.14, 0.09, 0.04, 0.02, 0.01
        ]
        # X = np.c_[x, y]
        return y
    else:
        if verbose >= 3: print('[findpeaks] >Nothing to download <return>.')
        return None

    curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url))
    if not os.path.isdir(curpath):
        os.makedirs(curpath, exist_ok=True)

    # Check file exists.
    if not os.path.isfile(PATH_TO_DATA):
        if verbose >= 3:
            print('[findpeaks] >Downloading from github source: [%s]' % (url))
        wget.download(url, curpath)

    # Import local dataset
    if verbose >= 3: print('[findpeaks] >Import [%s]' % (PATH_TO_DATA))
    if data == '2dpeaks_image':
        cv2 = stats._import_cv2()
        X = cv2.imread(PATH_TO_DATA)
    else:
        X = pd.read_csv(PATH_TO_DATA, sep=sep).values
    # Return
    return X
def main():
    try:

        # standard flags
        parser = argparse.ArgumentParser(description =
            'Command-line interface to jats-to-mediawiki.xslt, a script to ' +
            'manage conversion of articles (documents) from JATS xml format ' +
            'to MediaWiki markup, based on DOI or PMCID')
        parser.add_argument('-d', '--destination', default='articles/',
            help='path to destination directory for purposes of this script')
        parser.add_argument('-x', '--xmlcatalogfiles',
            default='dtd/catalog-test-jats-v1.xml',
            help='path to xml catalog files for xsltproc')

        # includes arbitrarily long list of keywords, or an input file
        parser.add_argument('-i', '--infile', nargs='?',
            type=argparse.FileType('r'),default=sys.stdin,
            help='path to input file', required=False)
        parser.add_argument('-o', '--outfile', nargs='?',
            type=argparse.FileType('w'), default=sys.stdout,
            help='path to output file', required=False)
        parser.add_argument('-a', '--articleids', nargs='+',
            default=None,
            help='an article ID or article IDs, either as DOIs or PMCIDs')

        args = parser.parse_args()

#        print args #debug


        # Handle and convert input values
        destination = args.destination
        xmlcatalogfiles = args.xmlcatalogfiles
        infile = args.infile
        outfile = args.outfile
        articleids = []

        # add articleids if passed as option values
        if args.articleids:
            articleids.extend([to_unicode_or_bust(articleid)
                               for articleid in args.articleids])
        # add articleids from file or STDIN
        if not sys.stdin.isatty() or infile.name != "<stdin>":
            articleids.extend([to_unicode_or_bust(line.strip())
                               for line in infile.readlines()])
        # De-duplicate by converting to set (unique) then back to list again
        articleids = list(set(articleids))

        # set environment variable for xsltproc and jats dtd
        try:
            cwd = to_unicode_or_bust(os.getcwd())
            if xmlcatalogfiles.startswith("/"):
                os.environ["XML_CATALOG_FILES"] = xmlcatalogfiles
            else:
                os.environ["XML_CATALOG_FILES"] = (cwd +
                    to_unicode_or_bust("/") +
                    to_unicode_or_bust(xmlcatalogfiles) )
        except:
            print 'Unable to set XML_CATALOG_FILES environment variable'
            sys.exit(-1)
        # print "\n" + os.environ.get('XML_CATALOG_FILES') + "\n" #debug

        # create destination directory
        destination = cwd + "/" + to_unicode_or_bust(destination)
        try:
            if not os.path.exists(destination):
                os.makedirs(destination)
        except:
            print 'Unable to find or create temporary directory'
            sys.exit(-1)

        # separate DOIs and PMCIDs
        articledois = [i for i in articleids if re.match('^10*', i)]
        articlepmcids = [i for i in articleids if re.match('^PMC', i)]

        articlepmcidsfromdois = []

        # Send DOIs through PMC ID converter API:
        # http://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
        if articledois:

            articledois = ",".join(articledois)
            idpayload = {'ids' : articledois, 'format' : 'json'}
            idconverter = requests.get(
                'http://www.pubmedcentral.nih.gov/utils/idconv/v1.0/',
                params=idpayload)
            print idconverter.text
            records = idconverter.json()['records']
            if records:
                articlepmcidsfromdois = [i['pmcid'] for i in records]

        # Extend PMCIDs with those from converted DOIs
        articlepmcids.extend(articlepmcidsfromdois)

        # De-duplicate with set to list conversion
        articlepmcids = list(set(articlepmcids))

        print "\nArticle IDs to convert:\n" #debug
        print articlepmcids #debug

        # Main loop to grab the archive file, get the .nxml file, and convert
        for articlepmcid in articlepmcids:

            # @TODO make flag an alternative to .tar.gz archive download
            # use instead the regular API for xml document
            # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=PMC2953622
            # unclear if this front-facing XML is updated frequently
            # I recall from plos that updates are made via packaged archives

            # request archive file location
            archivefilepayload = {'id' : articlepmcid}
            archivefilelocator = requests.get(
                'http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi',
                params=archivefilepayload)
            record = BeautifulSoup(archivefilelocator.content)

            # parse response for archive file location
            archivefileurl = record.oa.records.record.find(
                format='tgz')['href']

            # download the file
            archivefilename = wget.filename_from_url(archivefileurl)

            if not os.path.exists(destination + archivefilename):
                urllib.urlretrieve(archivefileurl, destination + archivefilename)
                print "\nDownloading file..."
            else:
                print "\nFound local file, skipping download..."

                # @TODO For some reason, wget hangs and doesn't finish, using
                # urllib.urlretrieve() instead for this for now.
#               archivefile = wget.download(archivefileurl, wget.bar_thermometer)

            # open the archive
            archivedirectoryname, archivefileextension = archivefilename.split(
                '.tar.gz')

            if not os.path.exists(destination + archivedirectoryname):
                print "\nExtracting " + archivedirectoryname + " ..."
                tfile = tarfile.open(destination + archivefilename, 'r:gz')
                tfile.extractall(destination)
            else:
                print "\nFound local directory, skipping extraction..."

            # run xsltproc
            # @TODO use list comprehension instead
            for n in glob.glob(destination + archivedirectoryname + "/*.nxml"):
                nxmlfilepath = n
            print "\nConverting... "
            print nxmlfilepath
            xsltoutputfile = open(destination + articlepmcid + ".mw.xml", 'w')
            xslt_file = os.path.abspath(
                os.path.dirname(__file__)) + '/' + 'jats-to-mediawiki.xsl'
            xsltcommand = call(
                ['xsltproc', xslt_file, nxmlfilepath],
                stdout=xsltoutputfile)
            print "\nReturning results..."
            if xsltcommand == 0:
                print xsltoutputfile.name + "\n"
            else:
                print "xslt conversion: failure"
                sys.exit(-1)

    except KeyboardInterrupt:
        print "Killed script with keyboard interrupt, exiting..."
    except Exception:
        traceback.print_exc(file=sys.stdout)

    sys.exit(0)
import wget
import tempfile
import os

url = 'https://p0.ifengimg.com/2019_30/1106F5849B0A2A2A03AAD4B14374596C76B2BDAB_w1000_h626.jpg'

# 获取文件名
file_name = wget.filename_from_url(url)
print(file_name)  #1106F5849B0A2A2A03AAD4B14374596C76B2BDAB_w1000_h626.jpg

# 下载文件,使用默认文件名,结果返回文件名
file_name = wget.download(url)
print(file_name)  #1106F5849B0A2A2A03AAD4B14374596C76B2BDAB_w1000_h626.jpg

# 下载文件,重新命名输出文件名
target_name = 't1.jpg'
file_name = wget.download(url, out=target_name)
print(file_name)  #t1.jpg

# 创建临时文件夹,下载到临时文件夹里
tmpdir = tempfile.gettempdir()
target_name = 't2.jpg'
file_name = wget.download(url, out=os.path.join(tmpdir, target_name))
print(file_name)  #/tmp/t2.jpg
示例#38
0
        url = str(link.get('href'))
        if "http://www.fab.mil.br/cabine/voos/" in url and ".pdf" in url:
            str_date = re.search('\d+', url).group(0)
            date = datetime.datetime.strptime(str_date, "%Y%m%d").date()
            flights.append(FileFlights(url, date))

    # remove all files and create downloads directory
    if os.path.exists('downloads/'):
        os.popen('rm -f downloads/*')
    else:
        os.mkdir('downloads')

    # download all files
    for flight in flights:
        filename = wget.download(flight.url, 'downloads')
        filename = wget.filename_from_url(flight.url)
        filepath = 'downloads/' + filename
        fileobj = open(filepath,'rb')
        doc = PDFDocument(fileobj)
        result = get_tables_from_document(doc)
        # print result
        for r in result:

            for i in r:
                # print i
                res = [x for x in i if '' not in i]
                if len(res) > 0:
                    print len(res), res
                    # print [t.encode('utf-8') for t in i]
                # print i
                # a = []
示例#39
0
def extract_ad(pod_info, create_ads=False, create_non_ads=False):
    """
    extract ad from a given audio file based on timestamps

    Args:

    Returns:

    """
    pod_info['fname'] = wget.filename_from_url(pod_info['content_url'])
    if os.path.exists(os.path.join(PODS_DIR, pod_info['fname'])):
        pod_meta = MP3_META(os.path.join(PODS_DIR, pod_info['fname']))
        pod_file_length = float(pod_meta.info.length)
        try:
            pod_length_json = pod_info['content_duration']
            if pod_length_json.count(':') == 1:
                pod_length = [float(x) for x in pod_length_json.split(':')]
                pod_length_json = pod_length[0]*60 + pod_length[1]
            elif pod_length_json.count(':') == 2:
                pod_length = [float(x) for x in pod_length_json.split(':')]
                pod_length_json = pod_length[0]*3600 + \
                    pod_length[1]*60 + pod_length[2]
            else:
                pod_length_json = float(pod_length_json)
        except ValueError as e:
            print(e, pod_info['fname'])
            pod_length_json = pod_file_length
        # if abs(pod_file_length-pod_length_json) < 5:
        if pod_file_length <= pod_length_json:
            # if True:
            # print('Extracting ad from {}'.format(pod_info['fname']))
            if create_non_ads or create_ads:
                pod_aud, pod_sr = librosa.load(os.path.join(
                    PODS_DIR, pod_info['fname']), sr=None)
                pod_aud, pod_sr = preprocess_aud(pod_aud, pod_sr)
                # return 1
                aud_len = len(pod_aud)
                ad_slices = []
                non_ad_aud = np.array([])
                ad_stop = 0
                for i, ad in enumerate(pod_info['ads']):
                    ad_slice = slice(
                        floor(int(ad['ad_start'])*pod_sr), ceil((int(ad['ad_end'])+1)*pod_sr))
                    ad_aud = pod_aud[ad_slice.start:ad_slice.stop]
                    ad_slices.append(ad_slice)
                    non_ad_aud = np.append(
                        non_ad_aud, pod_aud[ad_stop:ad_slice.start])
                    ad_stop = ad_slice.stop
                    ad_fname = os.path.join(ADS_DIR, "{}_{}.wav".format(
                        pod_info['fname'].split('.')[0], i))

                    if not os.path.exists(ad_fname) and create_ads:
                        soundfile.write(ad_fname, ad_aud, pod_sr, format='WAV')
                if ad_slice.stop < aud_len:
                    non_ad_aud = np.append(non_ad_aud, pod_aud[ad_slice.stop:])
                # print(ad_slices)
                ad_ranges = [x for y in [list(range(ad_slice.start, ad_slice.stop))
                                         for ad_slice in ad_slices] for x in y]

                try:
                    assert len(ad_ranges)+len(non_ad_aud) <= aud_len
                    non_ad_fname = os.path.join(NON_ADS_DIR, "{}_content.wav".format(
                        pod_info['fname'].split('.')[0]))
                    if not os.path.exists(non_ad_fname) and create_non_ads:
                        soundfile.write(non_ad_fname, non_ad_aud,
                                        pod_sr, format='WAV')
                except AssertionError as ae:
                    print("{} Aud,ad length mismatch".format(pod_info['fname']),
                          len(ad_ranges), len(non_ad_aud), aud_len, aud_len-len(ad_ranges)+len(non_ad_aud))
        else:
            print('Skipping {} length mismatch'.format(pod_info['fname']))
# print(file_name)
# audio_request = requests.get(obj_link)
# with open("D:\\Downloads\\" + file_name, "wb") as file:
	# file.write(audio_request.content)
	
# print(audio_request.status_code)
# print(audio_request.headers["content-type"])
# print(audio_request.encoding)
#====================================

dir_to_save = "/"
if sys.platform == "win32":
	dir_to_save = "D:\\Downloads\\"
elif sys.platform == "linux":
	dir_to_save = "/storage/sdcard0/Download/"
	
print("Directory to save:	", dir_to_save)

if not os.path.exists(dir_to_save):
	print("Directory to save not found!")
	quit()

file_name = wget.filename_from_url(obj_link)

if not os.path.exists(dir_to_save + file_name):
	wget.download(obj_link, dir_to_save + file_name)
else:
	print("This file already downloaded!")
	quit()

示例#41
0
def path_and_filename_from_url(url, data_dir):
    """ Given a URL and directory, return the filename in that dir """
    filename = wget.filename_from_url(url)
    filename = os.path.join(data_dir, filename)
    return filename