示例#1
0
def main():
    if not SKIP_DOWNLOAD:
        if os.path.exists('.tmp'):
            shutil.rmtree('.tmp')
        os.makedirs('.tmp', exist_ok=True)

        print("Downloading files...")
        download_file(RP_URL, '.tmp/rp.zip')
        download_file(BP_URL, '.tmp/bp.zip')

    if os.path.exists('.tmp/rp'):
        shutil.rmtree('.tmp/rp')
    print('Extracting resource pack')
    with ZipFile('.tmp/rp.zip') as zipf:
        zipf.extractall('.tmp/rp')

    if os.path.exists('.tmp/bp'):
        shutil.rmtree('.tmp/bp')
    print('Extracting behavior pack')
    with ZipFile('.tmp/bp.zip') as zipf:
        zipf.extractall('.tmp/bp')

    stable = DOWNLOAD_MODE == 'stable'

    harvest('.tmp/bp', stable)
    strip_sounds('.tmp/rp/sounds/sound_definitions.json')
示例#2
0
def deal_li(li):
    # print(li.find_element_by_tag_name("a").get_attribute("href"))
    pi_driver = get_driver(
        li.find_element_by_tag_name("a").get_attribute("href"))
    # print(pi_driver.page_source)
    # title
    h1 = pi_driver.find_element_by_xpath(
        '//*[@class="photo-hd"]').find_element_by_tag_name('h1')
    title = h1.text.replace(":", "")
    print("title:%s" % title)

    div_pic = pi_driver.find_element_by_xpath('//*[@id="img"]')
    url = div_pic.find_element_by_tag_name('img').get_attribute("src")
    print("url:%s" % url)

    file_type = os.path.basename(url).split(".")[1]

    file_path = ("/Users/lyf/Pictures/crawler_images/%s.%s" % (title, file_type)) \
        .replace(" ", "")

    print(file_path)

    downloader.download_file(url, file_path)

    # 需要vip才能下载原尺寸
    # size_span = pi_driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[2]/div[2]/p[2]/span')
    # print("size:%s"%size_span.text)
    pi_driver.close()
示例#3
0
def main():
    style = "=+" * 20
    if url_checker(args.url) is False:  # url format check
        raise argparse.ArgumentTypeError(
            'Value has to be in full url format http:// or http://')
    print(style)
    print("Box.com PDF Downloader by @lfasmpao")

    box_object = Scraper(args.url, args.driver_location, args.use_x11,
                         args.wait_time)
    print("Please wait for about {} seconds...".format(args.wait_time))
    box_object.load_url()
    dl_name = box_object.get_download_title()
    print(style)
    print("DATA TO BE DOWNLOADED\nTitle: {}\nBox.com URL: {}".format(
        dl_name, args.url))

    print(style)
    dl_url = box_object.get_download_url()
    print("Download URL:", dl_url)
    print(style)
    box_object.clean()  # clean

    # make directory
    directory = os.path.dirname(args.output_location)
    if not os.path.exists(directory):
        os.makedirs(directory)
    print("Downloading..\nFile will be save as:",
          str(args.output_location + dl_name + ".pdf"))
    download_file(url=dl_url,
                  path=str(args.output_location + dl_name + ".pdf"))
示例#4
0
def download():
    r = requests.get(url)
    page = bs4.BeautifulSoup(r.text, features="html.parser")
    table = page.table
    if platform.architecture()[0][0:2] == "64":
        download_url = table.find("a", href=True, title=re.compile("x86_64")).get(
            "href"
        )
    else:
        download_url = table.find("a", href=True, title=re.compile("i686")).get("href")
    downloader.download_file(download_url, os.path.join(cd, "libmpv.7z"))
示例#5
0
def download() -> None:
    r = requests.get(url)
    page = bs4.BeautifulSoup(r.text, features="html.parser")
    # last tested version is 5.8
    versions = page.find_all("li")
    version = [i for i in versions if "5.8" in i.text][-1].a.get("href")[0:-1]
    download_url = (
        url
        + "/"
        + version
        + "/"
        + "tt5sdk_{v}_{p}.7z".format(v=version, p=get_url_suffix_from_platform())
    )
    print("Downloading from " + download_url)
    downloader.download_file(download_url, os.path.join(cd, "ttsdk.7z"))
示例#6
0
def deal_detail_page(_url):
    root_ele = etree.HTML(get_source(_url))  # type:Element
    img_ele = root_ele.xpath('''//*[@id="img"]''')  # type :Element
    for img in img_ele[0]:  # type:Element
        print(img)
    src_url = ""
    title = ""
    for map in img.items():
        print(map)
        if map[0] == "src":
            src_url = url + map[1]
        if map[0] == "title":
            title = map[1]
    file_type = os.path.basename(src_url).split(".")[1]
    file_path = ("%s%s.%s" % (dir_path, title.replace("/", ""), file_type))
    print(file_path)
    downloader.download_file(src_url, file_path)
示例#7
0
文件: main.py 项目: valbg/pertanian
def iterate_over_selects(selectors, hidden_inputs=None):
    # if there are no hidden inputs specified default it to empty array
    if not hidden_inputs:
        hidden_inputs = []
    # iterate over the selects (drop-downs) on page
    for select in selectors:
        # get name of the selector (a.k.a. province, status, years, etc.)
        opt_name = select.attrs.get('name')

        # if it is already in use go to next selector
        if opt_name in values_in_use:
            continue

        # else append it to the list of selectors in use
        values_in_use.append(opt_name)

        # get all available options and iterate over them
        options = select.findAll('option')

        # for each option of the current drop down - make a request
        iterate_over_options(options, opt_name, hidden_inputs)

        # Once we have exercised all options for a level remove it from the list of values in use.
        # We can then iterate over the same element once again by using the next option of the
        # parent selector. If by removing it we end up removing the only child (root)
        # that means we have completed.

        values_in_use.remove(opt_name)
        request_values[opt_name] = ''

        if len(values_in_use) == 0:
            exit("Completed!")

    # Proceed downloading the file
    if len(hidden_inputs) == 8:

        # process hidden fields into a request
        download_request_headers = {}
        for hidden_input in hidden_inputs:
            download_request_headers[hidden_input.attrs.get('name')] = hidden_input.attrs.get('value')

        # add 'excel' header
        download_request_headers['save'] = 'xl'

        #actually download
        downloader.download_file(download_url, download_request_headers)
示例#8
0
    def read_robots(robots_path):
        f = downloader.download_file(robots_path)
        if f is False:
            return False

        if f is None:
            return None

        try:
            return f.read().decode('utf-8')
        except:
            return False
示例#9
0
def main():
    parser = argparse.ArgumentParser(
        description='Node configurator generator',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mode',
                        help='node type',
                        choices=('api', 'peer', 'dual'),
                        required=True)
    parser.add_argument('--voting',
                        help='node will be voting',
                        action='store_true')
    parser.add_argument('--harvesting',
                        help='node will be harvesting',
                        action='store_true')
    parser.add_argument('--output',
                        help='output directory',
                        default='../settings')
    parser.add_argument('--force',
                        help='overwrite output directory',
                        action='store_true')
    parser.add_argument('--ask-pass',
                        help='ask about pass when loading pem key files',
                        action='store_true')
    args = parser.parse_args()

    if not Path(args.output).is_dir():
        os.makedirs(args.output, mode=0o700)

    feature_settings = {
        'voting': args.voting,
        'harvesting': args.harvesting,
        'ask-pass': args.ask_pass
    }
    configurator = NodeConfigurator(args.output, args.force, args.mode,
                                    feature_settings)
    download_file(configurator.dir, NEMESIS_SEED)
    download_file(configurator.dir, MONGO_SCRIPTS)
    configurator.run()
示例#10
0
def download_vid(link):
    browser = webdriver.Firefox(proxy=SELENIUM_PROXY)
    browser.get(link)
    if "kisscartoon" in link:
        link_text = "HERE"
    elif "kissanime" in link:
        link_text = "CLICK HERE"
    else:
        raise SystemError("Unknown domain")
    WebDriverWait(browser, TIME_LIMIT).until(EC.presence_of_element_located((By.LINK_TEXT, link_text)))
    assert "If the player does not work," in browser.page_source
    browser.find_element_by_tag_name('body').send_keys(Keys.ESCAPE)
    save_link = browser.find_element_by_link_text(link_text).get_attribute('href')
    browser.close()
    filename = link.split('/')[-1].split('?')[0] + '.mp4'

    if download_file(save_link, "%s/%s" % (SHOW_NAME, filename), proxy=URLLIB_PROXY) is False:
        raise SystemError("Connection error")
示例#11
0
文件: main.py 项目: dvogel/usabenford
def download_and_analyze(fiscal_year, agency, spending_type):
    """The is a worker function, run on a separate thread."""
    dbconn = Connection()
    db = dbconn[settings.DB_NAME]
    monthly_analyses = db['monthly_analyses']

    analyses = monthly_analyses.find({'fiscal_year': fiscal_year, 'agency': agency, 'spending_type': spending_type})
    needed = analyses_needed(analyses)
    if needed:
        (filename, url, destpath) = usaspending.file_info(fiscal_year, agency, spending_type)
        dl_result = download_file(filename, url, destpath)
        if isinstance(dl_result, DownloadFileFailure):
            return (False, dl_result)
        print >>sys.stdout, "Got file %s" % filename
       
        try:
            analyses = analyze_file(destpath, fiscal_year, 
                                    settings.ANALYSIS_DATEFIELDS[spending_type],
                                    settings.ANALYSIS_FIELDS[spending_type])
            save_analyses(db, fiscal_year, agency, spending_type, analyses)
            return (True, analyses)
        except _csv.Error, e:
            return (False, e)
示例#12
0
 def download(self, track: Track, file_path: str) -> None:
     downloader.download_file(track.url, file_path)
示例#13
0
DEFAULT_SIZE_URL = '/400_320_102400'

response = requests.get(f'{URL}/s/s46/search/artist?ima=0348')
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")

groups = soup.find_all(class_='col2-wrap')

group_info = {}

for group in groups:
    group_name = group.find(class_='com-title-type0').getText()

    members = group.find_all(class_='box')
    member_list = []
    for member in members:
        member_info = {
            'name': member.find(class_='name').getText(),
            'kana': member.find(class_='kana').getText()
        }
        img_url = member.find('img')['src']
        member_info['img'] = img_url.replace(DEFAULT_SIZE_URL, '')
        member_list.append(member_info)
        file_name = f'{member_info["name"]}.jpg'
        print(f'{file_name} is downloading')
        downloader.download_file(f'{URL}{member_info["img"]}', file_name)
        print(f'{file_name} download was finished')

    group_info[group_name] = member_list
示例#14
0
            return _chapters_list_el

        chapters_list_el = get_chapters_list_el()

        for chapter_el in chapters_list_el:
            chapters_name.append(
                chapter_el.find_element_by_css_selector('span.track').text)
        for i, chapter_name in enumerate(chapters_name):
            chapters_list_el = get_chapters_list_el()
            chapters_list_el[i].click()

            start_time = time()
            added_audio = False
            while time() - start_time < 120 and not added_audio:
                audio_file_el = WebDriverWait(driver, 60).until(
                    EC.presence_of_element_located(
                        (By.CSS_SELECTOR, 'audio#audioplayer')))
                audio_file_url = audio_file_el.get_attribute('src')
                if audio_file_url not in chapters_audio_url:
                    chapters_audio_url.append(audio_file_url)
                    added_audio = True
                sleep(.1)
            chapters_menu_el.click()
        driver.get(book_url)
        print('collected these chapters:')
        for i in range(len(chapters_name)):
            download_file(book_name, chapters_audio_url[i],
                          f'{i + 1}-{chapters_name[i]}')
finally:
    driver.close()
""" 
The url to run this program use this url:
https://raw.githubusercontent.com/rmlassesen/dataset/master/p_pladser.csv
"""
import downloader as download
import convert_csv as convert_csv
import mean as mean

if __name__ == '__main__':
    global file_name
    data_income_file = download.download_file()

data = convert_csv.convert_csv_to_dataframe(data_income_file)

mean.mean_find(data)
示例#16
0
    return file_dict


cred = conf.getUserCread()

if cred[0] is not None and cred[3] is not None:
    sk = Skype(cred[0], cred[1], "tokenFile")
    file_dict = get_msg_list(cred[3])
    while True:
        user_input = input('n/id/quit >> ')
        if user_input.isnumeric():
            index = int(user_input)
            if 1 <= index <= len(file_dict):
                file_tuple = file_dict[index]
                dw.download_file(
                    file_tuple[0], file_tuple[1],
                    "skype_token {0}".format(sk.conn.tokens["skype"]), cred[2])
            else:
                print('Please Enter valid Id')
        else:
            if user_input == 'quit':
                break
            elif user_input == 'n':
                file_dict = get_msg_list(cred[3])
            else:
                print('Please Enter Valid Option')

elif cred[0] is not None and cred[3] is None:
    sk = Skype(cred[0], cred[1], "tokenFile")
    contact = sk.contacts
    contact_dict = {}
inputargs = {}
sites = []
ddir = []

if '-r' in sys.argv:
    i = 2
    r = True
else:
    i = 1
    r = False
print(sys.argv)
while i < sys.argv.index('-f'):
    inputargs[sys.argv[i]] = float(sys.argv[i + 1])
    i += 2

xy = sys.argv.index('-f')
xye = int(inputargs['-nf'])

print(inputargs)
print(sys.argv)
name = []
for zxy in range(xye):
    sites.append(sys.argv[xy + zxy + 1])
    ddir.append(sys.argv[xy + zxy + 2 + xye])
    name.append('File' + str(zxy))

print(inputargs)
for i in range(len(sites)):
    downloader.download_file(sites[i], ddir[i], name[i], r,
                             int(inputargs['-n']), inputargs['-i'])
示例#18
0
def main():
    """
    Parses command line options, then delegates to various other functions.
    """

    usage_str = """
%prog OPTION [FILENAME | dir: DIRECTORY | repo] | Download a file from Github
%prog push OPTION [FILEPATH | DIRPATH]          | Push a file to Github

Examples:
`grabrc .emacs`  -- Download .emacs from Github.
`grabrc dir:.emacs.d --outfile .irssiconfig` - Download the .emacs.d directory from Github.
`grabrc repo --destdir=/tmp/` -- Download and untar the repository in /tmp/.
`grabrc push /home/user/.vimrc` -- Save ~/.vimrc to Github, overwriting the existing .vimrc.
"""

    parser = OptionParser(usage=usage_str, version="r33")

    download_group = OptionGroup(parser, "Download: All (files, directories, repositories)")
    download_group.add_option("-o", "-O", "--name", "--outfile",
                              dest="outfile", action="store", metavar="NAME",
                              help="Rename the downloaded item to NAME.")

    download_group.add_option("-d", "--destdir",
                              dest="destdir", action="store", metavar="DIR",
                              help="Place the downloaded item in DIR. \
                              Default: The current directory.")

    download_group.add_option("--no-backup",
                              dest="nobackup", action="store_true",
                              help="If the file already exists, don't make a backup. \
                              Default: False. If the item already exists, it will be backed up.")

    dir_group = OptionGroup(parser, "Download: Repositories")
    dir_group.add_option("-k", "--keep-tar",
                         dest="tar", action="store_true",
                         help="Download the repository as a tar.gz file. \
                               Default: Untar the repository.")

    dir_group.add_option("-z", "--keep-zip",
                         dest="zip", action="store_true",
                         help="Download the repository as a .zip.")

    filegroup = OptionGroup(parser, "Download: Files")
    filegroup.add_option("-a", "--append",
                         dest="append", action="store_true",
                         help="If file already exists, append to existing file. \
                         Default: Back up existing file")

    filegroup.add_option("-r", "--replace",
                         dest="replace", action="store_true",
                         help="If the file already exists, replace it")

    filegroup.add_option("-p", "--print",
                         dest="stdout", action="store_true",
                         help="Print the file to the console instead of saving it.")

    savegroup = OptionGroup(parser, "Upload")
    savegroup.add_option("-m", "--message",
                         dest="message",
                         help="Specify a commit message for saving a file to Github.")

    # Validate and parse options, set mode
    map(parser.add_option_group,
        [download_group, filegroup, dir_group])

    (opts, args) = parser.parse_args()
    logging.debug("Options and arguments: %s / %s" % (opts, args))

    # Simple substitute for logging
    def usage_exit(level, reason):
        parser.print_help()
        print "[%s] %s" % (level.upper(), reason)
        sys.exit(1)

    try_msg = "Try either 'grabrc FILE' to download a file from Github \
or 'grabrc push FILEPATH' to upload a file."

    # Validate options: number of arguments
    if len(args) > 2 or len(args) == 0:
        usage_exit("error", "Invalid number of arguments. " + try_msg)

    # Validate options: either "save" or empty
    mode = "download"
    if len(args) == 1:
        arg = args[0]
        if arg == "save":
            usage_exit("error", "Please specify a file to save.")
        elif arg == "repo":
            mode = "repo"
        else:
            download_name = arg
    elif "push" in args:
        mode = "upload"
        upload_filepath = (n for n in args if n != "push").next()
    else:
        usage_exit("error", "Invalid arguments. " + try_msg)

    # Validate options: invalid combinations
    if opts.append and opts.replace:
        util.exit_runtime_error("Both --append and --replace were selected. Please select only one.")
    if opts.zip and opts.tar:
        util.exit_runtime_error("Both --keep-zip and --keep-tar were selected. Please select only one.")

    # Set defaults
    opts.destdir = opts.destdir or os.getcwd()
    opts.destdir = util.sanitize_path(opts.destdir)
    if opts.outfile:
        opts.outfile = util.sanitize_path(opts.outfile)

    # Check config file (~/.grabrc) for Github username
    configpath = "%s/.grabrc" % os.path.expanduser("~")

    if opts.__dict__.get('github'):
        github_acc = opts.github

    # Interactively prompt for username if ~/.grabrc does not exist
    if not os.path.isfile(configpath):
        print """\
        ========================================================
        Welcome! This seems to be your first time starting %s.
        Please enter your Github username.
        %s will search for files in the repository named %s""" \
        % (Const.PROG_NAME, Const.PROG_NAME, Const.REPO_NAME)

        github_acc = raw_input('-- Github account: ')
        cfile = open(configpath, 'w+')
        cfile.write(github_acc)
    else:
        cfile = open(configpath, 'r+')
        github_acc = cfile.readline().strip()
    cfile.close()

    opts.github = github_acc
    logging.debug("Github account: %s" % github_acc)

    # Execute actual script
    DIR_PREFIX = "dir:"
    if mode == "upload":
        uploader.save(upload_filepath, opts)
    elif mode == "download":
        if download_name.startswith(DIR_PREFIX):
            downloader.download_subdirectory(download_name[len(DIR_PREFIX):], opts)
        else:
            downloader.download_file(download_name, opts)
    elif mode == "repo":
        downloader.download_repo_nongit(opts)
示例#19
0
""" 
The url to run this program use this url:  https://raw.githubusercontent.com/MikkelHansen95/dataset/master/movies_metadata.csv
"""
import downloader
import convert_csv
import library.popular_danish_movie as popular_danish_movie
import library.english_action_movie as english_action_movie
import library.plot_reliase_and_runtime as reliase_and_runtime
import library.plot_adult_movies as plot_adult_movies
import library.buzz_words as buzz_words
import library.animated_movies as animated_movies
import library.highest_budget as highest_budget

if __name__ == '__main__':
    global file_name
    file_name = downloader.download_file()

data = convert_csv.convert_csv_to_dataframe(file_name)

print(plot_adult_movies.plotting_adult_and_non_adult_movies(data)) # With plot
print(animated_movies.find_number_of_animated_movies(data))
print(highest_budget.find_highest_budget(data))
print(popular_danish_movie.find_most_popular_danish_movie(data))
print(english_action_movie.english_action_movie_with_biggest_revenue(data))

# Plots - Der er lavet dataframes men plot mangler.
reliase_and_runtime.create_plot_realise_and_runtime(data)
buzz_words.find_buzz_words(data)
import os
import config
import log_util
import mysql_util
import downloader
import time
if __name__ == "__main__":
    # 创建下载目录
    if not os.path.exists(config.DOWNLOAD_PATH):
        os.makedirs(config.DOWNLOAD_PATH)
    sql = "select * from sp_random_download_task_multiVer_copy where dl_status = 0 limit 1"
    dbhandler = mysql_util.MysqlUtil()
    # 每次获取一条没有下载的数据
    cur_selected_item = dbhandler.fetchone(sql)
    while cur_selected_item:
        # 乐观锁
        # 当此更新语句成功影响行后才确认拿到下载链接
        res = dbhandler.update("update sp_random_download_task_multiVer_copy set dl_status = 1 where dl_id = %s and dl_status = 0" ,(cur_selected_item[0],))
        if not res:
            cur_selected_item = dbhandler.fetchone(sql)
            continue
        else:
            print("downloading dl_id = {}".format(cur_selected_item[0]))
            downloader.download_file(cur_selected_item, config.THREAD_NUM)