Exemplo n.º 1
0
def manage_download(path, url):
    # report_error mangles return values; so we don't use it here
    try:
        download_file(url, path)
        print "  Downloaded, processing..."
        return True
    except Exception, e:
        print "  Couldn't fetch URL", e
        return False
Exemplo n.º 2
0
def dump_video(video_url, video_counter, root, session):
    vreq = session.get(video_url)
    vfp = open(root + "video%d.html" % video_counter, "wb")
    vfp.write(vreq.content)
    vfp.close()

    dom = fromstring(vreq.content)
    
    player_div = dom.find(".//div[@id='UserVideoPlayerObjectDiv']")
    obj = demjson.decode(player_div.getnext().text[58:-11])
    file_url = obj['modes'][1]['config']['file']

    download_file(session, file_url, root+"video%d.flv"%video_counter)
Exemplo n.º 3
0
def fetch_food_menu( date=datetime.today() ):
    # calculate the first day of the week
    monday = (date - timedelta(date.weekday())).date()

    # filename format: 'menusitisis_YYYYMMDD'
    filename = 'menusitisis_%d%02d%02d' % (monday.year, monday.month, monday.day)
    doc_path = dir_name + filename + '.doc'
    html_path = dir_name + filename + '.html'

    # create the folder path, if necessary
    if not path.exists(dir_name):
        makedirs(dir_name)

    # download the doc file
    logger.debug('Trying to fetch "%s"' % doc_path)
    download_file(link + filename + '.doc', doc_path)

    if not path.exists(doc_path):
        return None

    if not _convert_to_html(doc_path, html_path):
        return None

    # reads the html code from disk
    file_html = open(html_path, 'r')
    html = file_html.read()
    file_html.close()

    # Parse the html code #
    logger.debug('Trying to parse...')
    try:
        food_menu = _parse_html(html)
        for i in xrange(7):
            date_ = monday + timedelta(days=i)
            food_menu[i]['date'] = _date_to_datetime(date_)

    except Exception as ex:
        logger.error(ex)
        return None

    # Update the database #
    try:
        _update_database(food_menu, monday)
    except OperationFailure as ex:
        logger.error('DB Error: %s' % ex)
        return None
    except Exception as ex:
        logger.error(ex)
        return None

    return food_menu
Exemplo n.º 4
0
def download_model(model_name, dst_dir='./', meta_info=None):
    if meta_info is None:
        meta_info = _default_model_info
    meta_info = dict(meta_info)
    if model_name not in meta_info:
        return (None, 0)
    if not os.path.isdir(dst_dir):
        os.mkdir(dst_dir)
    meta = dict(meta_info[model_name])
    assert 'symbol' in meta, "missing symbol url"
    model_name = os.path.join(dst_dir, model_name)
    download_file(meta['symbol'], model_name+'-symbol.json')
    assert 'params' in meta, "mssing parameter file url"
    download_file(meta['params'], model_name+'-0000.params')
    return (model_name, 0)
Exemplo n.º 5
0
  def mac_install_cmake(self):
    """Check for and install cmake.

    Assumes that if cmake is already installed, then the user has correctly set
    their path variable such that the command "cmake --version" will work.

    Raises:
      FileDownloadError: If the cmake tar fails to download, or is incorrectly
          downloaded.
      ExtractionError: If the cmake tar cannot be properly extracted.
    """
    if find_executable("cmake"):
      logging.info("CMake already installed.")
      return
    cmake_version = util.get_file_name(
        CMAKE_VERSIONS.get(self.version)[0], False)
    location = util.check_dir(self.cmake_path, cmake_version, "bin/cmake")
    if location:
      self.cmake_path = location
      logging.info("CMake found at " + self.cmake_path)
      return

    logging.info("CMake not installed. Downloading now.")
    url, file_hash = CMAKE_VERSIONS.get(self.os_version, (None, None))
    url = urlparse.urljoin(CMAKE_DOWNLOAD_PREFIX, url)
    location = os.path.join(common.BASE_DIR, "cmake.tar.gz")
    location = util.download_file(url, location, "cmake", file_hash)
    if not location:
      raise common.FileDownloadError("https://cmake.org/download/", "Please "
                                     "rerun this script afterwards with the "
                                     "flag\n\t--cmake=/path/to/cmake")
    if not util.extract_tarfile(location, "r:gz", self.cmake_path, "cmake"):
      raise common.ExtractionError(location)
    logging.info("CMake successfully installed.")
Exemplo n.º 6
0
  def mac_install_cwebp(self):
    """Check for and install cwebp.

    Assumes that if cwebp is already installed, then the user has correctly set
    their path variable such that the command "cwebp -h" will work.
    Raises:
      FileDownloadError: If the cwebp tar fails to download, or is incorrectly
          downloaded.
      ExtractionError: If the cwebp tar cannot be properly extracted.
    """
    if find_executable("cwebp"):
      logging.info("cwebp already installed.")
      return
    location = util.check_dir(self.cwebp_path, CWEBP_VERSION, "cwebp")
    if location:
      self.cwebp_path = location
      logging.info("cwebp found at " + self.cwebp_path)
      return
    logging.info("cwebp not installed. Downloading now.")
    location = os.path.join(common.BASE_DIR, "cwebp.tar.gz")
    location = util.download_file(CWEBP_URL, location, "cwebp", CWEBP_HASH)
    if not location:
      raise common.FileDownloadError("https://developers.google.com/speed/webp/"
                                     "docs/precompiled", "Please rerun this "
                                     "script afterwards with the flag\n"
                                     "\t--cwebp=/path/to/cwebp")
    if not util.extract_tarfile(location, "r:gz", self.cwebp_path, "cwebp"):
      raise common.ExtractionError(location)
    logging.info("cwebp successfully installed.")
Exemplo n.º 7
0
  def windows_install_python(self):
    """Checks for and installs at least Python 2.7.8.

    Raises:
      FileDownloadError: If the Python installer fails to download, or is
          downloaded incorrectly.
      InstallInterruptError: If the user cancels the wait for installation of
          ImageMagick.
      InstallFailedError: If msiexec fails, or Python cannot be installed.
    """
    if find_executable("python"):
      if check_python_version():
        logging.info("Python already installed.")
        return
      else:
        logging.info("Python version not sufficient. Updating now.")
    else:
      logging.info("Python not installed. Downloading now.")
    url, file_hash = PYTHON_VERSIONS.get(self.version)
    url = PYTHON_BASE_URL + url
    location = os.path.join(common.BASE_DIR, "python.msi")
    location = util.download_file(url, location, "python", file_hash)
    if not location:
      raise common.FileDownloadError("https://www.python.org/downloads/release/"
                                     "python-278/", "Please rerun this script "
                                     "after completing manual installation.\n")
    logging.info("Opening Python installer. For convenience, please select the "
                 "'Add python.exe to Path' option.")
    try:
      subprocess.call("msiexec /i " + location, shell=True)
    except subprocess.CalledProcessError:
      raise common.InstallFailedError("Python", "https://www.python.org/"
                                      "downloads/release/python-278/", "Please "
                                      "rerun this script after installating "
                                      "Python manually.")
Exemplo n.º 8
0
def download_binaries():
    "Parse config and download dse binaries (local)"

    # TODO since this is done locally on the cperf tool server, is there any possible concurrency
    # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove)
    filename = os.path.join(dse_cache, dse_tarball)

    dse_url = config['dse_url']
    username = config['dse_username'] if 'dse_username' in config else None
    password = config['dse_password'] if 'dse_password' in config else None
    url = urljoin(dse_url, dse_tarball)

    # Fetch the SHA of the tarball:
    correct_sha = download_file_contents(url+'.sha', username, password).split(" ")[0]
    assert(len(correct_sha) == 64, 'Failed to download sha file: {}'.format(correct_sha))

    if os.path.exists(filename):
        print("Already in cache: {}".format(filename))
        real_sha = digest_file(filename)
        if real_sha != correct_sha:
            print("Invalid SHA for '{}'. It will be removed".format(filename))
            os.remove(filename)
        else:
            return

    # Fetch the tarball:
    request = download_file(url, filename, username, password)
    real_sha = digest_file(filename)
    # Verify the SHA of the tarball:
    if real_sha != correct_sha:
        raise AssertionError(
            ('SHA of DSE tarball was not verified. should have been: '
             '{correct_sha} but saw {real_sha}').format(correct_sha=correct_sha, real_sha=real_sha))
Exemplo n.º 9
0
  def windows_install_cwebp(self):
    """Check for and install cwebp in given directory.

    Raises:
      FileDownloadError: If the cwebp zip fails to download, or is downloaded
          incorrectly.
    """
    if find_executable("cwebp"):
      if check_cwebp_version():
        logging.info("cwebp already installed.")
        return
      else:
        logging.info("cwebp version not sufficient. Updating now.")
    else:
      location = util.check_dir(self.cwebp_path,
                                CWEBP_VERSIONS.get(self.version)[0],
                                "\\bin\\cmake.exe")
      if location:
        logging.info("CMake already installed.")
        self.cmake_path = location
        return
    version, file_hash = CWEBP_VERSIONS.get(self.version)
    logging.info("cwebp not installed. Downloading now...")
    url = CWEBP_BASE_URL + version + ".zip"
    location = os.path.join(common.BASE_DIR, "cwebp.zip")
    location = util.download_file(url, location, "cwebp", file_hash)
    if not location:
      raise common.FileDownloadError("https://developers.google.com/speed/webp/"
                                     "docs/precompiled", "Please rerun this "
                                     "script afterwards with the flag\n\t"
                                     "--cmake=\\path\\to\\cmake")
    util.extract_zipfile(location, "r", self.cwebp_path, "cwebp")
    logging.info("cwebp successfully installed.")
Exemplo n.º 10
0
  def windows_install_cmake(self):
    """Check for and install cmake.

    Raises:
      FileDownloadError: If the CMake zip fails to download, or is downloaded
          incorrectly.
    """
    if find_executable("cmake"):
      if check_cmake_version():
        logging.info("CMake already installed.")
        return
      else:
        logging.info("CMake version not sufficient. Updating now.")
    else:
      location = util.check_dir(self.cmake_path, CMAKE_VERSION,
                                os.path.join("bin", "cmake.exe"))
      if location:
        logging.info("CMake already installed.")
        self.cmake_path = location
        return
      else:
        logging.info("CMake not installed. Downloading now...")
    location = os.path.join(common.BASE_DIR, "cmake.zip")
    location = util.download_file(CMAKE_URL, location, "cmake", CMAKE_HASH)
    if not location:
      raise common.FileDownloadError("https://cmake.org/download/", "Please "
                                     "rerun this script afterwards with the "
                                     "flag\n\t--cmake=\\path\\to\\cmake")
    util.extract_zipfile(location, "r", self.cmake_path, "cmake")
    logging.info("cmake successfully installed.")
Exemplo n.º 11
0
  def windows_fix_directx(self):
    """Attempt to fix problems DirectX may be having with Visual Studio.

    DirectX comes pre-installed on Windows 7 and up, but having Visual C++ 2010
    or higher may give an "S1023" error due to it being newer than the latest
    version of DirectX, June 2010 DirectX SDK. This can be fixed by
    reinstalling DirectX once Visual C++ has been established.

    Raises:
      FileDownloadError: If the Visual Studio installer fails to download, or
          is downloaded incorrectly.
    """
    logging.info("Attempting to fix problems with DirectX...")
    try:
      subprocess.call("MsiExec.exe /passive /X{F0C3E5D1-1ADE-321E-8167-"
                      "68EF0DE699A5}", shell=True)
      subprocess.call("MsiExec.exe /passive /X{1D8E6291-B0D5-35EC-8441-"
                      "6616F567A0F7}", shell=True)
    except subprocess.CalledProcessError:
      logging.warning("MsiExec.exe failed. Could not resolve conflicts with "
                      "DirectX and Visual Studio.")
      return
    location = os.path.join(common.BASE_DIR, "directx.exe")
    location = util.download_file(DIRECTX_URL, location, "DirectX",
                                  DIRECTX_HASH)
    if not location:
      raise common.FileDownloadError("http://www.microsoft.com/en-us/download/"
                                     "details.aspx?id=6812", "Please rerun "
                                     "this script after completing manual "
                                     "installation.")
    subprocess.call("start cmd /c " + location, shell=True)
    logging.info("DirectX successfully reinstalled.")
Exemplo n.º 12
0
  def mac_install_ant(self):
    """Check for and install Apache Ant.

    Raises:
      FileDownloadError: If the ant tar fails to download, or is incorrectly
          downloaded.
      ExtractionError: If the ant tar cannot be properly extracted.
    """
    if find_executable("ant"):
      logging.info("Apache Ant already installed.")
      return
    location = util.check_dir(self.ant_path, ANT_VERSION, "bin/ant")
    if location:
      self.ant_path = location
      logging.info("Apache Ant already installed.")
      return
    logging.info("Apache Ant not installed. Installing now.")
    location = os.path.join(common.BASE_DIR, "ant.tar.gz")
    location = util.download_file(ANT_URL, location, "Ant", ANT_HASH)
    if not location:
      raise common.FileDownloadError("https://www.apache.org/dist/ant/"
                                     "binaries/", "Please rerun this script "
                                     "again afterwards.")
    if not util.extract_tarfile(location, "r:gz", self.ant_path, "Ant"):
      raise common.ExtractionError(location)
    logging.info("Apache Ant successfully installed.")
Exemplo n.º 13
0
def download_binaries():
    "Parse config and download dse binaries (local)"

    # TODO since this is done locally on the cperf tool server, is there any possible concurrency
    # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove)
    filename = os.path.join(dse_cache, dse_tarball)

    dse_url = config['dse_url']
    username = config['dse_username'] if 'dse_username' in config else None
    password = config['dse_password'] if 'dse_password' in config else None
    url = urljoin(dse_url, dse_tarball)

    # Fetch the SHA of the tarball: download_file_contents returns the request.text of the url.
    # the sha file has the format '874c11f7634974fb41006d30199b55b59fd124db ?./dse-5.0.0-bin.tar.gz'
    # so we split on the space and then check that the sha hexidecimal is 40 characters
    correct_sha = download_file_contents(url+'.sha', username, password).split(" ")[0]
    assert(len(correct_sha) == 40), 'Failed to download sha file: {}'.format(correct_sha)

    if os.path.exists(filename):
        logger.info("Already in cache: {}".format(filename))
        real_sha = digest_file(filename)
        if real_sha != correct_sha:
            logger.info("Invalid SHA for '{}'. It will be removed".format(filename))
            os.remove(filename)
        else:
            return

    # Fetch the tarball:
    request = download_file(url, filename, username, password)
    real_sha = digest_file(filename)
    # Verify the SHA of the tarball:
    if real_sha != correct_sha:
        raise AssertionError(
            ('SHA of DSE tarball was not verified. should have been: '
             '{correct_sha} but saw {real_sha}').format(correct_sha=correct_sha, real_sha=real_sha))
Exemplo n.º 14
0
  def windows_install_imagemagick(self):
    """Check for and install ImageMagick.

    Raises:
      FileDownloadError: If the ImageMagick installer fails to download, or is
          downloaded incorrectly.
      InstallInterruptError: If the user cancels the wait for installation of
          ImageMagick.
    """
    if find_executable("convert"):
      logging.info("ImageMagick is already installed.")
      return
    logging.info("ImageMagick not installed. Downloading now...")
    url, file_hash = IMAGEMAGICK_VERSIONS.get(self.version)
    url = IMAGEMAGICK_BASE_URL + url
    location = os.path.join(common.BASE_DIR, "imagemagick.exe")
    location = util.download_file(url, location, "imagemagick", file_hash)
    if not location:
      raise common.FileDownloadError("http://www.imagemagick.org/script/binary-"
                                     "releases.php", "Please rerun this script "
                                     "after completing manual installation.\n")
    subprocess.call("start cmd /c " + location, shell=True)
    if not util.wait_for_installation("convert"):
      raise common.InstallInterruptError("ImageMagick")
    logging.info("ImageMagick successfully installed.")
Exemplo n.º 15
0
  def android_download_ndk(self, directory):
    """Checks OS version and downloads the appropriate Android NDK.

    Args:
      directory: String indication of location to unpack NDK
    Raises:
      FileDownloadError: NDK bin or exe fails to download
      InstallInterruptError: if the wait for the NDK
    """
    if self.system == common.LINUX:
      os_version = subprocess.check_output("uname -m", shell=True)
      if os_version.strip() == "x86_64":
        url, file_hash = NDK_VERSIONS.get(common.LINUX_64)
      else:
        url, file_hash = NDK_VERSIONS.get(common.LINUX_32)
    elif self.system == common.WINDOWS:
      os_version = platform.architecture()[0]
      if os_version == "64bit":
        url, file_hash = NDK_VERSIONS.get(common.WINDOWS_64)
      else:
        url, file_hash = NDK_VERSIONS.get(common.WINDOWS_32)
    else:  # self.system = common.MAC
      url, file_hash = NDK_VERSIONS.get(self.system)
    filetype = util.get_file_type(url)
    url = NDK_DOWNLOAD_PREFIX + url
    ndk_location = os.path.join(directory, "ndk." + filetype)
    ndk_location = util.download_file(url, ndk_location, "Android NDK",
                                      file_hash)
    if not ndk_location:
      raise common.FileDownloadError("http://developer.android.com/ndk/"
                                     "downloads/index.html", "Please rerun "
                                     "this script afterwards with the flag\n"
                                     "\t--android_ndk=/path/to/android_ndk")

    if filetype == "bin":
      # Allow execution by all parties.
      os.chmod(ndk_location, 0755)
      current_dir = os.getcwd()
      os.chdir(common.BASE_DIR)
      os.system(ndk_location)
      os.chdir(current_dir)
      os.remove(ndk_location)
    elif filetype == "exe":
      os.chdir(self.ndk_path)
      subprocess.call("start cmd /c " + ndk_location, shell=True)
      # toolchain-licenses\COPYING is one of the last things to be extracted.
      if not util.wait_for_installation("COPYING", search=True,
                                        basedir=self.ndk_path):
        raise common.InstallInterruptError("Android NDK")
      os.chdir(current_dir)
    else:
      raise common.UnknownFileTypeError(filetype, "Please manually extract "
                                        "Android NDK and rerun this script "
                                        "afterwards with the flag\n\t"
                                        "--android_ndk=/path/to/android_ndk")
Exemplo n.º 16
0
def download_lab(course):
    print("  -------------  Start downloading " + course +
          "'s Lab  -------------  ")
    url = "https://webcms3.cse.unsw.edu.au/" + course + "/18s1"
    r = client.get(url, verify=False)
    soup = BeautifulSoup(r.text, "lxml")
    ### Lab Activities, Labs
    sider_bar = soup.find('div', id='sidebar')
    lab = sider_bar.find('a', string=re.compile('Lab'))
    if not lab:
        print(course + " may not have Lab")
        return
    location = lab['href']

    url_lec = root + location
    r = client.get(url_lec, verify=False)
    soup = BeautifulSoup(r.text, "lxml")
    blocks = soup.find_all('div', 'panel panel-primary')
    dict[course]["lab"] = {}

    for block in blocks:
        week_str = block.h4.text.strip()
        small = block.h4.small.text.strip()
        week_str = week_str.replace(small, "")
        week_str = week_str.replace("active", "")
        week_str = re.sub(r'\n', "", week_str)
        week_str = " ".join(week_str.split())
        week_str = week_str.strip()
        dict[course]["lab"][week_str] = {}

        path = os.path.join(data_path, course, week_str, "lab")
        if not os.path.exists(path):
            os.makedirs(path)

        items = block.find_all('li', 'list-group-item')
        for item in items:
            name = item.find('a').text.strip()
            if len(name) <= 0:
                continue
            name = " ".join(name.split())

            pdf = item.div.find('a', title="Download")
            if pdf:
                pdf_url = root + pdf.get('href')
                path = os.path.join(data_path, course, week_str, "lab", name)
                path.replace("\"", "§")
                succ = util.download_file(pdf_url, path)
                name = name.replace(".", "&")
                dict[course]["lab"][week_str][name] = pdf_url
    print("  -------------  Lab download complete. :^ )  -------------  ")
Exemplo n.º 17
0
    def resolve(self, URL, id, context):
        logger.debug("Fetching %s ..." % URL)
        #determine cache path
        url = urlparse.urlparse(URL)
        # Handle relative paths for network locations
        if url.netloc:
            self.last_url = url
        else:
            if not self.last_url:
                raise ValueError("Invalid URL provided for DTD: %s" % URL)
            url = urlparse.urlparse(urlparse.urljoin(self.last_url.geturl(), URL))

        local_base_directory = os.path.join(self.cache, url.netloc)
        local_file = local_base_directory + url.path

        #cache if necessary
        if not os.path.exists(local_file):
            if not os.path.exists(os.path.split(local_file)[0]):
                os.makedirs(os.path.split(local_file)[0])
            download_file(url.geturl(), local_file)

        #resolve the cached file
        return self.resolve_file(open(local_file), context, base_url=URL)
Exemplo n.º 18
0
def scrape_videos(user_id, root, session):
    print(" - videos")

    page_counter = 1
    video_counter = 1
    should_continue = True
    
    while(should_continue):
        print("   - page %d" % page_counter)
        
        req = session.get(VIDEO_OVERVIEW_URL % (user_id, page_counter))
        dom = fromstring(req.content)
        folder = root + "videos/"
        os.makedirs(folder, exist_ok=True)
        fp = open(folder + "gallery-page%d.html" % page_counter, 'wb')
        fp.write(req.content)
        fp.close()

        # we're doing a depth first search
        thumbs = dom.xpath(".//div[@class='galleryThumb']")
        for thumb in thumbs:
            print("       - video %d" % video_counter)
            video_url = "http://www.arto.com" + thumb.getchildren()[0].get('href')
            video_thumb = thumb.getchildren()[0][0].get('src')

            download_file(session, video_thumb, folder+"video%d.jpg"%video_counter)

            dump_video(video_url, video_counter, folder, session)
            
            video_counter += 1

        e = dom.xpath('.//a[text()="Næste"]')        
        page_counter += 1
        
        if not e:
            should_continue = False
def prepare_data():
    data_name = os.path.join('data', 'friendster-300K.npz')
    t_name = os.path.join('data', 'friendster-300K-T.npz')
    x_name = os.path.join('data', 'friendster-300K-x.npy')
    b_name = os.path.join('data', 'friendster-300K-b.npy')
    util.download_file(
        'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K.npz',
        data_name)
    util.download_file(
        'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K-T.npz',
        t_name)
    util.download_file(
        'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K-x.npy',
        x_name)
    util.download_file(
        'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K-b.npy',
        b_name)
    return sp.sparse.load_npz(data_name), sp.sparse.load_npz(t_name), np.load(
        x_name), np.load(b_name)
Exemplo n.º 20
0
def __download_model():
    url_weight = "https://pjreddie.com/media/files/yolov3.weights"
    url_cfg = "https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg?raw=true"
    url_names = "https://github.com/pjreddie/darknet/blob/master/data/coco.names?raw=true"

    fileAlreadyExists = os.path.isfile(PATH_TO_WEIGHTS)
    if not fileAlreadyExists:
        if not os.path.exists(os.path.join(PATH_TO_MODELS, MODEL_NAME)):
            os.makedirs(os.path.join(PATH_TO_MODELS, MODEL_NAME))
        print('Downloading frozen inference graph (.weight, .cfg, .classes): ')
        download_file(url_weight, PATH_TO_WEIGHTS)
        download_file(url_cfg, PATH_TO_CFG)
        download_file(url_names, PATH_TO_CLASSES)
Exemplo n.º 21
0
def main():
    #宮城県のデータのexcelファイル(公式)
    url="https://www.pref.miyagi.jp/uploaded/attachment/826977.xlsx"
    file_name=download_file(url)

    wb=load_workbook(file_name,data_only=True)
    ws=wb["日別集計(HP掲載)"]

    #データを集めていく
    today=datetime.date.today()
    d=[]
    max_row=ws.max_row
    for i in range(2,max_row):
        dt=ws.cell(i,1).value #日付

        #cellに記入されていない行に来たら終了
        if dt is None:
            break
        #今日以降のセルの行に行くと終了
        date=dt.date()
        if date>today:
            break

        date_isoformat=date.isoformat()
        count=ws.cell(i,7).value #合計
        #今日の日付の分が0人の時は、記入されていないこととして、ループ終了する
        if date==today and count==0:
            break
            
        print(date,count)
        d.append({
            "date":date_isoformat,
            "count":count
        })
    
    #jsonに書きだし
    data={
        "data":d
    }
    print(data)
    os.makedirs("data",exist_ok=True)
    with open("data/miyagi_data.json","w") as f:
        json.dump(data,f,indent=4)
Exemplo n.º 22
0
Arquivo: mac.py Projeto: niu2x/gxm
    def mac_install_macports(self):
        """Check for and install MacPorts.

    Raises:
      FileDownloadError: If the MacPorts package fails to download, or is
          incorrectly downloaded.
      UnknownFileTypeError: If the type of the downloaded package does not match
          any of the supported types.
    """
        if os.path.isfile(MACPORTS_LOCATION):
            logging.info("MacPorts already installed.")
            return
        else:
            logging.info("MacPorts not installed. Downloading now.")
        url, file_hash = MACPORTS_VERSIONS.get(self.os_version)
        url = MACPORTS_DOWNLOAD_PREFIX + url
        suffix = util.get_file_type(url)
        location = os.path.join(common.BASE_DIR, "macports." + suffix)
        location = util.download_file(url, location, "macports", file_hash)
        if not location:
            raise common.FileDownloadError(
                "https://guide.macports.org/chunked/"
                "installing.macports.html", "Please rerun "
                "this script again afterwards.")
        logging.info(
            "Installing Mac Ports. Sudo may prompt you for your password.")
        if suffix == "pkg":
            try:
                subprocess.call("sudo installer -pkg " + location +
                                " -target /",
                                shell=True)
            except subprocess.CalledProcessError:
                raise common.PermissionDeniedError(
                    "installer", "Please enter your "
                    "password to install MacPorts")
        elif suffix == "dmg":
            subprocess.call("hdiutil attach " + location, shell=True)
        else:
            raise common.UnknownFileTypeError(
                suffix, "Please manually install "
                "MacPorts, or run this script again "
                "with the flag\n\t--no_macports")
        self.bash_profile_changed = True  # Mac ports installation will probably
Exemplo n.º 23
0
 def run(self):
     queueLock.acquire()
     print '-' * 10, 'Start threadID: %d ' % self.threadID, '-' * 10, '\n'
     queueLock.release()
     if not workQueue.empty():
         queueLock.acquire()
         data = self.q.get()
         downloadpath = os.path.join(self.dirpath, data['RDSInstanse'])
         queueLock.release()
         for download in data['bakdownloadurldict'].items():
             objectpath = download_file(downloadpath, str(download[1]),
                                        str(download[0]))
             putobjectpath = os.path.join(
                 str(self.newdate),
                 os.path.join(str(data['RDSInstanse']), str(download[0])))
             print putobjectpath
             PutObjectFromFile(self.bucket, putobjectpath, objectpath)
             print 'put %s to oss successful' % putobjectpath
     else:
         print '-' * 10, 'Start threadID: %d ' % self.threadID, '-' * 10, '\n'
Exemplo n.º 24
0
def load_cifar100(data_path, label_mode):
    """ Download cifar100 if necessary and load the images and labels for training and test sets.

    Args:
        data_path: (str) path to the directory where CIFAR100 is, or where to download it to.
        label_mode: (str) type of label; one of `fine` or `coarse`.

    Returns:
        train_imgs, train_labels, test_imgs, test_labels.
    """

    cifar_url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    cifar_local_folder = 'cifar-100-python'

    # Check if dataset exists and download it if does not
    name = cifar_url.split('/')[-1]
    file_path = util.download_file(data_path, name, cifar_url)
    tarfile.open(file_path, 'r:gz').extractall(data_path)
    input_dir = os.path.join(data_path, cifar_local_folder)

    # Extract train and valid ##################################################################
    d = unpickle(os.path.join(input_dir, 'train'))
    train_imgs = d['data']
    train_labels = np.array(d[f'{label_mode}_labels'], dtype=np.uint8)

    train_imgs = train_imgs.reshape(TRAIN_EX, CIFAR_SHAPE[0], CIFAR_SHAPE[1],
                                    CIFAR_SHAPE[2])
    # Transpose images to shape = [TRAIN_EX, height, width, channels]
    train_imgs = np.transpose(train_imgs, (0, 2, 3, 1))

    # Extract test  ############################################################################
    d = unpickle(os.path.join(input_dir, 'test'))
    test_imgs = d['data']
    test_labels = np.array(d[f'{label_mode}_labels'], dtype=np.uint8)

    test_imgs = test_imgs.reshape(TEST_EX, CIFAR_SHAPE[0], CIFAR_SHAPE[1],
                                  CIFAR_SHAPE[2])
    # Transpose images to shape = [TEST_EX, height, width, channels]
    test_imgs = np.transpose(test_imgs, (0, 2, 3, 1))

    return train_imgs, train_labels, test_imgs, test_labels
Exemplo n.º 25
0
Arquivo: android.py Projeto: niu2x/gxm
    def android_download_sdk(self, directory):
        """Download Android SDK and unpack into specified directory.

    Args:
      directory: String indication of location to unpack SDK to
    Raises:
      FileDownloadError: SDK tar or zip fails to download
      UnknownFileTypeError: If the file downloaded is neither a tar or a zip,
          and cannot be extracted.
    """
        url, file_hash = SDK_VERSIONS.get(self.system)
        suffix = util.get_file_type(url)
        sdk_location = os.path.join(directory, "sdk." + suffix)
        sdk_location = util.download_file(url, sdk_location, "Android SDK",
                                          file_hash)
        if not sdk_location:
            raise common.FileDownloadError(
                "http://developer.android.com/sdk/index."
                "html#", "Please rerun this script "
                "afterwards with the flag\n"
                "\t--android_sdk=/path/to/android_sdk")
        if suffix == "tgz":
            util.extract_tarfile(sdk_location, "r", directory, "Android SDK")
        elif suffix == "zip":
            util.extract_zipfile(sdk_location, "r", directory, "Android SDK")
        else:
            raise common.UnknownFileTypeError(
                suffix, "Please manually extract "
                "Android SDK and rerun this script "
                "afterwards with the flag\n"
                "\t--android_sdk=/path/to/android_sdk")
        if self.system == common.MAC:
            # Sometimes, permissions aren't set correctly on tools/android on OSX.
            # Change permissions to allow execution by user
            android = os.path.join(directory, SDK_NAMES.get(self.system),
                                   "tools", "android")
            curr_permissions = os.stat(android)
            os.chmod(android, curr_permissions.st_mode | stat.S_IXUSR)
        # Update self.sdk_path to now include the SDK name
        self.sdk_path = os.path.join(self.sdk_path, SDK_NAMES.get(self.system))
Exemplo n.º 26
0
def download_binaries():
    "Parse config and download dse binaries (local)"

    # TODO since this is done locally on the cperf tool server, is there any possible concurrency
    # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove)
    filename = os.path.join(dse_cache_local, dse_tarball)

    dse_url = config['dse_url']
    username = config['dse_username'] if 'dse_username' in config else None
    password = config['dse_password'] if 'dse_password' in config else None
    url = urljoin(dse_url, dse_tarball)

    # Fetch the SHA of the tarball: download_file_contents returns the request.text of the url.
    # the sha file has the format '874c11f7634974fb41006d30199b55b59fd124db ?./dse-5.0.0-bin.tar.gz'
    # so we split on the space and then check that the sha hexidecimal is 40 characters
    correct_sha = download_file_contents(url + '.sha', username,
                                         password).split(" ")[0]
    assert (len(correct_sha) == 40
            ), 'Failed to download sha file: {}'.format(correct_sha)

    if os.path.exists(filename):
        logger.info("Already in cache: {}".format(filename))
        real_sha = digest_file(filename)
        if real_sha != correct_sha:
            logger.info(
                "Invalid SHA for '{}'. It will be removed".format(filename))
            os.remove(filename)
        else:
            return

    # Fetch the tarball:
    request = download_file(url, filename, username, password)
    real_sha = digest_file(filename)
    # Verify the SHA of the tarball:
    if real_sha != correct_sha:
        raise AssertionError(
            ('SHA of DSE tarball was not verified. should have been: '
             '{correct_sha} but saw {real_sha}').format(
                 correct_sha=correct_sha, real_sha=real_sha))
Exemplo n.º 27
0
  def android_download_sdk(self, directory):
    """Download Android SDK and unpack into specified directory.

    Args:
      directory: String indication of location to unpack SDK to
    Raises:
      FileDownloadError: SDK tar or zip fails to download
      UnknownFileTypeError: If the file downloaded is neither a tar or a zip,
          and cannot be extracted.
    """
    url, file_hash = SDK_VERSIONS.get(self.system)
    suffix = util.get_file_type(url)
    sdk_location = os.path.join(directory, "sdk." + suffix)
    sdk_location = util.download_file(url, sdk_location, "Android SDK",
                                      file_hash)
    if not sdk_location:
      raise common.FileDownloadError("http://developer.android.com/sdk/index."
                                     "html#", "Please rerun this script "
                                     "afterwards with the flag\n"
                                     "\t--android_sdk=/path/to/android_sdk")
    if suffix == "tgz":
      util.extract_tarfile(sdk_location, "r", directory, "Android SDK")
    elif suffix == "zip":
      util.extract_zipfile(sdk_location, "r", directory, "Android SDK")
    else:
      raise common.UnknownFileTypeError(suffix, "Please manually extract "
                                        "Android SDK and rerun this script "
                                        "afterwards with the flag\n"
                                        "\t--android_sdk=/path/to/android_sdk")
    if self.system == common.MAC:
      # Sometimes, permissions aren't set correctly on tools/android on OSX.
      # Change permissions to allow execution by user
      android = os.path.join(directory, SDK_NAMES.get(self.system), "tools",
                             "android")
      curr_permissions = os.stat(android)
      os.chmod(android, curr_permissions.st_mode | stat.S_IXUSR)
    # Update self.sdk_path to now include the SDK name
    self.sdk_path = os.path.join(self.sdk_path, SDK_NAMES.get(self.system))
Exemplo n.º 28
0
  def mac_install_macports(self):
    """Check for and install MacPorts.

    Raises:
      FileDownloadError: If the MacPorts package fails to download, or is
          incorrectly downloaded.
      UnknownFileTypeError: If the type of the downloaded package does not match
          any of the supported types.
    """
    if os.path.isfile(MACPORTS_LOCATION):
      logging.info("MacPorts already installed.")
      return
    else:
      logging.info("MacPorts not installed. Downloading now.")
    url, file_hash = MACPORTS_VERSIONS.get(self.os_version)
    url = MACPORTS_DOWNLOAD_PREFIX + url
    suffix = util.get_file_type(url)
    location = os.path.join(common.BASE_DIR, "macports." + suffix)
    location = util.download_file(url, location, "macports", file_hash)
    if not location:
      raise common.FileDownloadError("https://guide.macports.org/chunked/"
                                     "installing.macports.html", "Please rerun "
                                     "this script again afterwards.")
    logging.info("Installing Mac Ports. Sudo may prompt you for your password.")
    if suffix == "pkg":
      try:
        subprocess.call("sudo installer -pkg " + location + " -target /",
                        shell=True)
      except subprocess.CalledProcessError:
        raise common.PermissionDeniedError("installer", "Please enter your "
                                           "password to install MacPorts")
    elif suffix == "dmg":
      subprocess.call("hdiutil attach " + location, shell=True)
    else:
      raise common.UnknownFileTypeError(suffix, "Please manually install "
                                        "MacPorts, or run this script again "
                                        "with the flag\n\t--no_macports")
    self.bash_profile_changed = True  # Mac ports installation will probably
Exemplo n.º 29
0
  def windows_setup_visual_studio(self):
    """Check for compatible versions of Visual Studio and Visual C++.

    If no compatible version of Visual Studio is detected, download default
    version. If a compatible version is detected, check if a compatible
    version of the C++ compiler has been installed.

    Raises:
      FileDownloadError: If the Visual Studio installer fails to download, or
          is downloaded incorrectly.
    """
    for line in self.programs.splitlines():
      if VS_NAME_PREFIX in line:
        for name in get_all_vs():
          if line.strip() == name:
            self.vs_version = VS_COMPATIBLE_VERSIONS.get(name.split(" ")[-1])
            logging.info("Visual Studio already installed.")
            self.windows_check_compiler()
            return
    logging.info("Visual Studio not installed. Installing " + VS_DEFAULT_NAME +
                 " now...")
    location = os.path.join(common.BASE_DIR, "vs_community.exe")
    location = util.download_file(VS_DEFAULT_URL, location,
                                  "Visual Studio Installer", VS_DEFAULT_HASH)
    if not location:
      raise common.FileDownloadError("https://www.visualstudio.com/en-us/"
                                     "downloads/download-visual-studio-vs.aspx",
                                     "Please rerun this script after "
                                     "completing manual installation.")
    logging.info("Now lauching Visual Stusio Installer.\n*** Please ensure you "
                 "select \"Visual C++\" ***\nYour computer will "
                 "likely need to be restarted. If so, click 'Restart Now' when "
                 "prompted and rerun this script after reboot.\nIf no restart "
                 "is required, click 'Finish' and rerun script.")
    subprocess.call("cmd /k " + location, shell=True)
    # cmd /k will stop the script, but just in case, exit
    sys.exit()
Exemplo n.º 30
0
  def update_java(self):
    """Update Java Runtime Environment.

    There's a bug in the Java installer that sees Yosemite and El Capitan
    (10.10 and 10.11) as '10.1', and hence the android won't run. The official
    Apple package, which is installed in this function, doesn't have that bug.

    Raises:
      InstallInterruptError: If the wait for installing Java update was
          cancelled.
    """
    if self.os_version < OSX_10_10_YOSEMITE:
      return
    logging.info("Java update required by Android.")
    location = os.path.join(common.BASE_DIR, "java.dmg")
    location = util.download_file(JAVA_UPDATE_URL, location, "java",
                                  JAVA_UPDATE_HASH)
    if not location:
      logging.warn("Please visit https://support.apple.com/kb/DL1572 for "
                   "download link and extraction instructions.\nPlease rerun "
                   "this script afterwards to complete setup.")
    logging.info("Finder will open. Double click on \"JavaForOSX.pgk\" to "
                 "continue installation")
    subprocess.call("hdiutil attach " + location, shell=True)
Exemplo n.º 31
0
def download_binaries():
    "Parse config and download dse binaries (local)"

    # TODO since this is done locally on the cperf tool server, is there any possible concurrency
    # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove)
    filename = os.path.join(dse_cache, dse_tarball)

    dse_url = config['dse_url']
    username = config['dse_username'] if 'dse_username' in config else None
    password = config['dse_password'] if 'dse_password' in config else None
    url = urljoin(dse_url, dse_tarball)

    # Fetch the SHA of the tarball:
    correct_sha = download_file_contents(url + '.sha', username,
                                         password).split(" ")[0]
    assert (len(correct_sha) == 64,
            'Failed to download sha file: {}'.format(correct_sha))

    if os.path.exists(filename):
        print("Already in cache: {}".format(filename))
        real_sha = digest_file(filename)
        if real_sha != correct_sha:
            print("Invalid SHA for '{}'. It will be removed".format(filename))
            os.remove(filename)
        else:
            return

    # Fetch the tarball:
    request = download_file(url, filename, username, password)
    real_sha = digest_file(filename)
    # Verify the SHA of the tarball:
    if real_sha != correct_sha:
        raise AssertionError(
            ('SHA of DSE tarball was not verified. should have been: '
             '{correct_sha} but saw {real_sha}').format(
                 correct_sha=correct_sha, real_sha=real_sha))
Exemplo n.º 32
0
Arquivo: mac.py Projeto: niu2x/gxm
    def mac_install_cmake(self):
        """Check for and install cmake.

    Assumes that if cmake is already installed, then the user has correctly set
    their path variable such that the command "cmake --version" will work.

    Raises:
      FileDownloadError: If the cmake tar fails to download, or is incorrectly
          downloaded.
      ExtractionError: If the cmake tar cannot be properly extracted.
    """
        if find_executable("cmake"):
            logging.info("CMake already installed.")
            return
        cmake_version = util.get_file_name(
            CMAKE_VERSIONS.get(self.version)[0], False)
        location = util.check_dir(self.cmake_path, cmake_version, "bin/cmake")
        if location:
            self.cmake_path = location
            logging.info("CMake found at " + self.cmake_path)
            return

        logging.info("CMake not installed. Downloading now.")
        url, file_hash = CMAKE_VERSIONS.get(self.os_version, (None, None))
        url = urlparse.urljoin(CMAKE_DOWNLOAD_PREFIX, url)
        location = os.path.join(common.BASE_DIR, "cmake.tar.gz")
        location = util.download_file(url, location, "cmake", file_hash)
        if not location:
            raise common.FileDownloadError(
                "https://cmake.org/download/", "Please "
                "rerun this script afterwards with the "
                "flag\n\t--cmake=/path/to/cmake")
        if not util.extract_tarfile(location, "r:gz", self.cmake_path,
                                    "cmake"):
            raise common.ExtractionError(location)
        logging.info("CMake successfully installed.")
Exemplo n.º 33
0
def process_tweebo():
    """Read, convert and save the Tweebo corpus"""
    download_file(TWEEBO_DAILY547_DL, "Data/TweeboDaily547.conll")
    download_file(TWEEBO_OCT27_DL, "Data/TweeboOct27.conll")

    def interpret_conll(path):
        """
        Read a CONLL file line-by-line and export the tags
        :param path: The path of the file
        :return: A list of Tag objects.
        """
        logging.info("Reading %s...", path)
        ret = []
        tweebo = ['N', 'O', '^', 'S', 'Z', 'V', 'L',
                  'M', 'A', 'R', '!', 'D', 'P', '&',
                  'T', 'X', 'Y', '#', '@', '~', 'U',
                  'E', '$', ',', 'G']
        ref = [u'NOUN', u'PRON', 'NOUN', 'DET', 'NOUN',
               'VERB', '']
        tagmap = {
            'N': u'NOUN',
            'O': u'PRON',
            '^': u'NOUN',
            'S': u'X',
            'Z': u'NOUN',
            'V': u'VERB',
            'L': u'PRON',
            'M': u'NOUN',
            'A': u'ADJ',
            'R': u'ADV',
            '!': u'.',
            'D': u'DET',
            'P': u'CONJ',
            '&': u'CONJ',
            'T': u'PRT',
            'X': u'DET',
            'Y': u'DET',
            '#': u'X',
            '@': u'NOUN',
            '~': u'X',
            'U': u'X',
            'E': u'.',
            '$': u'NUM',
            ',': u'.',
            'G': u'X'
        }
        with open(path, 'r') as fp:
            for line in fp:
                line = line.strip()
                if len(line) == 0:
                    continue
                line = line.split()
                word, raw = line
                t = Tag(word, tagmap[raw])
                ret.append(t)
        return ret

    d547 = interpret_conll("Data/TweeboDaily547.conll")
    o24 = interpret_conll("Data/TweeboOct27.conll")

    with open('Data/TweeboDaily547.pkl', 'w') as fout:
        logging.info("Saving daily...")
        pickle.dump(d547, fout, pickle.HIGHEST_PROTOCOL)

    with open('Data/TweeboOct27.pkl', 'w') as fout:
        logging.info("Saving Oct...")
        pickle.dump(o24, fout, pickle.HIGHEST_PROTOCOL)
Exemplo n.º 34
0
    fname = parts[-1]

    f, e = os.path.splitext(fname)
    lasfile = None
    if e.lower() == '.laz':
        lasfile = f + '.las'
        if os.path.exists(lasfile):
            already_in_stock += 1
            print("Already have %s" % lasfile)
            continue

    # Fetch the file
    count += 1
    print("Downloading %d/%d %s.." % (count, total, fname), end="")
    try:
        if download_file(url, fname):
            success += 1
    except Exception as e:
        print("..failed, %s" % e)
        fail += 1
    print()

    if lasfile:
        # Uncompress the file
        print("Unpacking %s.." % fname, end="")
        try:
            args = [laszip, fname]
            p = subprocess.check_output(args)
            unpacked = True
        except Exception as e:
            print("..failed, %s" % e)
Exemplo n.º 35
0
#!/usr/bin/env python

from util import download_file, parse_vulnerable_softwares, send_email
from xmlExtractor import XMLReader

if __name__ == "__main__":
    download_file()
    xmlReader = XMLReader()
    xmlReader.xml_parser()
    
    vulnearble = parse_vulnerable_softwares(xmlReader.get_cves())
    
    if len(vulnearble) > 0:
        msg = "{}".format("\n".join(vulnearble[::-1]))
        print msg
        send_email(msg)
Exemplo n.º 36
0
 def reply_pic(self, pic_url, ext):
     log.info("接收到图片,地址:" + pic_url)
     filename = "%s_%s.%s" % (time.time(), self.conversation_id, ext)
     util.download_file(pic_url, filename)
     self.replier.pic(filename)
     os.remove(filename)
Exemplo n.º 37
0
class Club(Best11):
    """
    Contains all info pertaining to an individual club
    (e.g. Noworry About MJ), given its id.
    """
    def __init__(self, club_id=None, club=None, manager=None):
        """
        Parameters:
            club_id (int > str)
        """
        super().__init__()

        if not any((club_id, club, manager)):
            raise Exception("You must provide either club_id, club or manager")
        elif club_id:
            # The club_id has been given. Verify it is num...
            if not isinstance(club_id, int):
                # if str number, convert to int
                if not (r"^\d{1,4}$", club_id):
                    raise ValueError(
                        """Please enter the club_id in number form,\
                    or use alternative constructors to initialise using club name or manager."""
                    )
                club_id = int(club_id)
        elif club:
            # elif passed the club name, get the corresponding club_id for that club
            club_id = self.club_id_from_club(club)
        elif manager:
            # elif passed the club manager, get the corresponding club_id for that manager

            # if set to user, get username from session
            if manager == 'user': manager = self.session.username

            club_id = self.club_id_from_manager(manager)

        self.club_id = club_id
        self.params = {'id': self.club_id}
        self.soup_dict = self.__get_soup_dict()

    def __repr__(self):
        # TODO
        pass

    def __str__(self):
        # TODO
        pass

    # --- Soup ---

    def __get_soup_dict(self):
        """ Returns a dict of multi-use soup elements.
        e.g. the club_info_table is called by multiple properties. """
        request = self.session.request("GET",
                                       "vizualizare_club.php?",
                                       params=self.params)
        soup = make_soup(request)

        # Set instance var for full soup
        self.soup = soup

        # Common soup elements
        club_info = soup.find_all('table')[3].find_all('tr')[1]
        return {
            'club_info': club_info,
            'club_info_links': club_info.find_all('a'),
            'equipment': soup.find_all('table')[5]
        }

    # --- Avatar ---

    @property
    def avatar(self):
        """ Returns the link to a club's avatar.
        r-type: str """
        request = self.session.request("GET",
                                       "vizualizare_club.php?",
                                       params=self.params)
        soup = make_soup(request)

        # Grab avatar link from soup and replace spaces to make working link
        avatar = soup.find_all('table')[1].find_all('tr')[2].find('img').get(
            'src').replace(' ', '%20')

        # If avatar is the defualt img
        if '/standard.jpg' in avatar:
            return False

        full_link = f"{self.session.MAIN_URL}{avatar}"
        return full_link

    def download_avatar(self):
        """ Downloads club's avatar to current directory.
        If club's avatar is default, returns False. 
        r-type: str """

        if not (avatar := self.avatar):
            return False
        try:
            util.download_file(avatar)
        except:
            raise Exception("Could not download avatar!")
Exemplo n.º 38
0
    json.dump(collection, f)

# collect card images for cards in collection
image_folder = os.path.join("public", "images")
try:
    os.mkdir(image_folder)
except:
    pass
image_extension = ".jpg"
# determine missing images
image_filelist = set(glob.glob(os.path.join(image_folder, "*")))
missing_images = {x for x in collection if image_filename(x, image_folder, image_extension) not in image_filelist}
# download missing card images with rate limiter
no_uri = 0
failed = set()
for c_id in tqdm.tqdm(missing_images, desc = "downloading missing images"):
    card = card_index[c_id]
    if 'image_uris' in card:
        try:
            uri = card['image_uris']['normal']
            download_file(uri, headers = None, filename = image_filename(c_id, image_folder, image_extension))
            time.sleep(0.1) # rate limiter
        except:
            failed.add(c_id)
    else:
        no_uri += 1

pprint(f"no image uri available for {no_uri} cards")
pprint(f"download failed for cards: {failed}")

make_prices_overview(collection, card_index)
Exemplo n.º 39
0
    print(u"Matched jobs found")

else:
    print(u"No package jobs found, aborting")
    sys.exit(1)

if dryrun:
    print(u"Dry Run only, no files will be downloaded")

for job in jobnames:
    file_paths = u.fetch_job_file_paths(job, jobnames)
    match = jobnames[job]
    for filename, url in file_paths:
        print(u"Downloading file {0}".format(filename))
        path = u.get_final_path(job, match, filename)
        u.download_file(url, path)
        # Hash file, if it has not changed sha1 hash for this job, we unlink it so it won't be updated by the rest of the script
        hash = u.hash_file(path)
        if u.same_hash(filename, hash, conn):
            print(u"==> File not changed since last run, skipping")
            os.remove(path)
            continue
        elif not dryrun:  # as long as we are not a dry run, update the hash
            if not u.update_hash(filename, hash, conn):
                print(u"==> Could not save hash, deleting file")
                os.remove(path)
                continue
        else:
            # always remove file in dryrun
            os.remove(path)
Exemplo n.º 40
0
        already_in_stock = 0
        print("Files to download: %d" % total)

        for (folder, url) in everything:
            outputfolder = os.path.join(base, "Lidar", folder)
            parts = url.split('/')
            fname = parts[-1]

            if not os.path.exists(outputfolder):
                os.makedirs(outputfolder)

            os.chdir(outputfolder)
            count += 1

            if os.path.exists(os.path.join(outputfolder, fname)):
                print("Already have \"%s\" / \"%s\"" % folder, fname)
                already_in_stock += 1
                continue

            print("Downloading %d/%d %s.." % (count, total, fname), end="")
            try:
                success += download_file(url, fname)
            except Exception as e:
                print(".. failed, %s" % e)
                fail += 1
            print()

    print("Downloaded %d, failed to download %d, already had %d" %
          (success, fail, already_in_stock))
    exit(0)
Exemplo n.º 41
0
batch_size = 32
num_classes = 10
epochs = 20
hidden_units = 100

learning_rate = 1e-6
clip_norm = 1.0

# the data, split between train and test sets

from util import download_file

s3_url = 'https://s3.amazonaws.com/neural-networking-book/ch02/notMNIST_3.5.pickle?versionId=j53VUhZj_FXe9iFSN0O.KLedt08.DGy4'

pickle_file = download_file(s3_url, 'notMNSIT_3.5.pickle')
#pickle_file = './data/notMNIST.pickle'
image_size = 28
num_of_labels = 10
with open('./' + pickle_file, 'rb') as f:
    save = pickle.load(f)
    training_dataset = save['train_dataset']
    training_labels = save['train_labels']
    validation_dataset = save['valid_dataset']
    validation_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', training_dataset.shape, training_labels.shape)
    print('Validation set', validation_dataset.shape, validation_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)
Exemplo n.º 42
0
 def file_report_download(self, report, url_params={}):
   if self.token:
     token = self.token
   params = util.merge_params(self.params, url_params)
   url = "%s/file/report/download" % (self.connection_string)
   return util.download_file(url, params, {'token':token,'report':report})        
Exemplo n.º 43
0
trained_path = './trained'
base_url = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/'

if not os.path.exists(data_path):
	os.mkdir(data_path)

if not os.path.isdir(trained_path):
	os.mkdir(trained_path)

flowers_archive_path = os.path.join(data_path, '102flowers.tgz')
img_label_path = os.path.join(data_path, 'imagelabels.mat')
setid_path = os.path.join(data_path, 'setid.mat')

if not os.path.isfile(flowers_archive_path):
	print ('Downloading images...')
	util.download_file(base_url + '102flowers.tgz')
else:
	print("Images data already existed\n")

if not os.path.isdir('./data/jpg'):
	print("Unzip the images files...")
	tarfile.open(flowers_archive_path).extractall(path=data_path)

if not os.path.isfile(img_label_path):
    print("Downloading image labels...")
    util.download_file(base_url + 'imagelabels.mat')
else:
	print("Image labels already existed\n")
if not os.path.isfile(setid_path):
	print("Downloading train/test/valid splits...")
	util.download_file(base_url + 'setid.mat')
Exemplo n.º 44
0
def download_lecture_notes(course):
    print("  -------------  Start downloading " + course +
          "'s Lecture  -------------  ")
    url = "https://webcms3.cse.unsw.edu.au/" + course + "/19T1"
    r = client.get(url, verify=False)
    if not r.status_code == 200:
        return print("")
    soup = BeautifulSoup(r.text, "lxml")
    sider_bar = soup.find('div', id='sidebar')
    lec = sider_bar.find('a', string="Lectures")
    if not lec:
        lec = sider_bar.find('a', string="Lectures  ")
    location = lec['href']

    url_lec = root + location
    r = client.get(url_lec, verify=False)
    soup = BeautifulSoup(r.text, "lxml")
    blocks = soup.find_all('div', 'panel panel-primary')
    dict[course]["lec"] = {}

    #	print(sider_bar)

    for block in blocks:
        week_str = block.h4.text.strip()
        small = block.h4.small.text.strip()
        week_str = week_str.replace(small, "")
        week_str = week_str.replace("active", "")
        week_str = re.sub(r'\n', "", week_str)
        week_str = " ".join(week_str.split())
        week_str = week_str.strip()
        dict[course]["lec"][week_str] = {}

        path = os.path.join(course, week_str)
        if not os.path.exists(path):
            os.makedirs(path)

        items = block.find_all('li', 'list-group-item')
        for item in items:
            name = item.find('a').text.strip()
            if len(name) <= 0:
                continue
            name = " ".join(name.split())

            pdf = item.div.find('a', title="Download")
            pdf_url = root
            if pdf:
                pdf_url = root + pdf.get('href')
            if pdf_url == root:
                pdf_url = item.div.a.get('href')

            if pdf_url != root:
                name = name.replace("/", " ")
                path = os.path.join(course, week_str, name)
                #				path = path.replace("\"", "§")
                succ = util.download_file(pdf_url, path)
                name = name.replace(".", "&")
                dict[course]["lec"][week_str][name] = pdf_url
            else:
                print("Cannot find lecture pdf")

    print("  -------------  Lecture download complete. :^ )  -------------  ")
Exemplo n.º 45
0
class Club(Spider):
    """ Contains all info pertaining to an indviudal club
    (e.g Noworry About MJ), given its id. """

    def __init__(self, sesh, club_id):
        super().__init__(sesh)

        ## Verify that club_id is num
        if not isinstance(club_id, int):
            # if str number, convert to int
            if not (r"^\d{1,4}$", club_id):
                raise ValueError("""Please enter the club_id in number form,\
                or use alternative constructors to initialise using club name or manager.""")
            club_id = int(club_id)

        self.club_id = club_id

        # The same params are used so frequently across the class
        # that I just made it an instance var
        self.params = {'id': self.club_id}

        self.soup_dict = self.__get_soup_dict()

    def __repr__(self):
        class_name = self.get_class_name()
        return f'''{class_name} (\
            \n\tClub_id: {self.club_id}\
            \n\tClub_name: {self.club_name}\
            \n\tStatus: {self.status}\
            \n\t)'''

    def __str__(self):
        class_name = self.get_class_name()
        return f"{class_name} object ({self.club_name} [{self.club_id}])"

    @classmethod
    def fromName(cls, session, club_name):
        """ -Alternative Constructor-
        :param manager :type str
        Returns an instance that corresponds to that club_name. 
        If cannot be found, Exception is raised. 
        """
        club_id = cls.get_club_id_from_name(session, club_name)
        if not club_id:
            raise Exception(f"Could not get id for manager: {club_name}")
        return cls(session, club_id)

    @classmethod
    def fromManager(cls, session, manager):
        """ -Alternative Constructor- 
        :param club_name :type str
        Returns an instance that corresponds to that club_name.
        If cannot be found, Exception is raised.
        """
        club_id = cls.get_club_id_from_manager(session, manager)
        if not club_id:
            raise Exception(f"Could not get id from manager: {manager}")
        return cls(session, club_id)

    # --- Soup ---

    def __get_soup_dict(self):
        """ Returns a dict of multi-use soup elements.
        e.g. the club_info_table is called by multiple properties. """
        request = req_get(self.sesh, "vizualizare_club.php?", params=self.params)
        soup = make_soup(request)

        # Set instance var for full soup
        self.soup = soup

        # Common soup elements
        club_info = soup.find_all('table')[3].find_all('tr')[1]
        return {
            'club_info': club_info,
            'club_info_links': club_info.find_all('a'),
            'equipment': soup.find_all('table')[5]
        }

    # --- Avatar ---

    @property
    def avatar(self):
        """ Returns the link to a club's avatar. """
        request = req_get(self.sesh, "vizualizare_club.php?", params=self.params)
        soup = make_soup(request)

        # Grab avatar link from soup and replace spaces to make working link
        avatar = soup.find_all('table')[1].find_all('tr')[2].find('img').get('src').replace(' ', '%20')

        # If avatar is the defualt img
        if '/standard.jpg' in avatar:
            return False

        full_link = MAIN_URL + avatar
        return full_link

    def download_avatar(self):
        """ Downloads club's avatar to current directory.
        If club's avatar is default, returns False. """

        if not (avatar:= self.avatar):
            return False
        try:
            util.download_file(avatar)
        except:
            raise Exception("Could not download avatar!")
from util import download_file
from util import exists

FILE_NAME = 'forge-1.7.10-10.13.4.1558-1.7.10-installer.jar'
if not exists(FILE_NAME):
    print('downloading server...')
    download_file('https://files.minecraftforge.net/maven/net/minecraftforge/forge/1.7.10-10.13.4.1558-1.7.10/forge-1.7.10-10.13.4.1558-1.7.10-installer.jar', FILE_NAME)
    print('downloading server finished')
Exemplo n.º 47
0
    def get_artifact(self,
                     artifact,
                     local_path=None,
                     only_newer=True,
                     background=False):

        key = artifact.get('key')
        bucket = artifact.get('bucket')

        if key is None:
            assert not artifact['mutable']
            assert artifact.get('url') is not None or \
                artifact.get('qualified') is not None

            remote_path = artifact.get('url')
            if remote_path is None:
                remote_path = artifact.get('qualified')

            key = hashlib.sha256(remote_path.encode()).hexdigest()
            local_path = fs_tracker.get_blob_cache(key)
            if os.path.exists(local_path):
                self.logger.info(
                    ('Immutable artifact exists at local_path {},' +
                     ' skipping the download').format(local_path))
                return local_path

            if artifact.get('url') is not None:
                download_file(remote_path, local_path, self.logger)
            else:
                if remote_path.startswith('dockerhub://') or \
                   remote_path.startswith('shub://'):
                    self.logger.info(
                        ('Qualified {} points to a shub or dockerhub,' +
                         ' skipping the download'))
                    return remote_path

                download_file_from_qualified(remote_path, local_path,
                                             self.logger)

            self.logger.debug(
                'Downloaded file {} from external source {}'.format(
                    local_path, remote_path))
            return local_path

        if local_path is None:
            if 'local' in artifact.keys() and \
                    os.path.exists(artifact['local']):
                local_path = artifact['local']
            else:
                if artifact['mutable']:
                    local_path = fs_tracker.get_artifact_cache(key)
                else:
                    local_path = fs_tracker.get_blob_cache(key)
                    if os.path.exists(local_path):
                        self.logger.info(
                            ('Immutable artifact exists at local_path {},' +
                             ' skipping the download').format(local_path))
                        return local_path

        local_path = re.sub('\/\Z', '', local_path)
        local_basepath = os.path.dirname(local_path)

        self.logger.info(
            "Downloading dir {} to local path {} from storage...".format(
                key, local_path))

        if only_newer and os.path.exists(local_path):
            self.logger.debug(
                'Comparing date of the artifact in storage with local')
            storage_time = self._get_file_timestamp(key)
            local_time = os.path.getmtime(local_path)
            if storage_time is None:
                self.logger.info(
                    "Unable to get storage timestamp, storage is either " +
                    "corrupted or has not finished uploading")
                return local_path

            if local_time > storage_time - self.timestamp_shift:
                self.logger.info(
                    "Local path is younger than stored, skipping the download")
                return local_path

        tar_filename = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        self.logger.debug("tar_filename = {} ".format(tar_filename))

        def finish_download():
            try:
                self._download_file(key, tar_filename)
            except BaseException as e:
                self.logger.debug(e)

            if os.path.exists(tar_filename):
                # first, figure out if the tar file has a base path of .
                # or not
                self.logger.info("Untarring {}".format(tar_filename))
                listtar, _ = subprocess.Popen(['tar', '-tf', tar_filename],
                                              stdout=subprocess.PIPE,
                                              stderr=subprocess.PIPE,
                                              close_fds=True).communicate()
                listtar = listtar.strip().split(b'\n')
                listtar = [s.decode('utf-8') for s in listtar]

                self.logger.info('List of files in the tar: ' + str(listtar))
                if listtar[0].startswith('./'):
                    # Files are archived into tar from .; adjust path
                    # accordingly
                    basepath = local_path
                else:
                    basepath = local_basepath

                tarcmd = ('mkdir -p {} && ' +
                          'tar -xf {} -C {} --keep-newer-files') \
                    .format(basepath, tar_filename, basepath)

                self.logger.debug('Tar cmd = {}'.format(tarcmd))

                tarp = subprocess.Popen(['/bin/bash', '-c', tarcmd],
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT,
                                        close_fds=True)

                tarout, tarerr = tarp.communicate()
                if tarp.returncode != 0:
                    self.logger.info('tar had a non-zero return code!')
                    self.logger.info('tar cmd = ' + tarcmd)
                    self.logger.info('tar output: \n ' + str(tarout))

                if len(listtar) == 1:
                    actual_path = os.path.join(basepath, listtar[0])
                    self.logger.info('Renaming {} into {}'.format(
                        actual_path, local_path))
                    retry(lambda: os.rename(actual_path, local_path),
                          no_retries=5,
                          sleep_time=1,
                          exception_class=OSError,
                          logger=self.logger)

                os.remove(tar_filename)
            else:
                self.logger.warning(
                    'file {} download failed'.format(tar_filename))

        if background:
            t = Thread(target=finish_download)
            t.start()
            return (local_path, t)
        else:
            finish_download()
            return local_path
Exemplo n.º 48
0
 def report_format_download(self, file, url_params={}):
   if self.token:
     token = self.token
   params =  util.merge_params(self.params, url_params)
   url = "%s/report/format/download" % (self.connection_string) 
   return util.download_file(url, params, {'token':token,'file':file})    
Exemplo n.º 49
0
#!/usr/bin/env python

from util import download_file, parse_vulnerable_softwares, send_email
from xmlExtractor import XMLReader

if __name__ == "__main__":
    download_file()
    xmlReader = XMLReader()
    xmlReader.xml_parser()

    vulnearble = parse_vulnerable_softwares(xmlReader.get_cves())

    if len(vulnearble) > 0:
        msg = "{}".format("\n".join(vulnearble[::-1]))
        print msg
        send_email(msg)
Exemplo n.º 50
0
def prepare_data():
    data_name = os.path.join('data', 'enwiki8.npy')
    util.download_file('https://mxnet-experiment.s3.amazonaws.com/enwiki-dataset/enwiki8.npy', data_name)
    with open(data_name, 'rb') as f:
        return np.load(f)
Exemplo n.º 51
0
def process_tweebo():
    """Read, convert and save the Tweebo corpus"""
    download_file(TWEEBO_DAILY547_DL, "Data/TweeboDaily547.conll")
    download_file(TWEEBO_OCT27_DL, "Data/TweeboOct27.conll")

    def interpret_conll(path):
        """
        Read a CONLL file line-by-line and export the tags
        :param path: The path of the file
        :return: A list of Tag objects.
        """
        logging.info("Reading %s...", path)
        ret = []
        tweebo = [
            'N', 'O', '^', 'S', 'Z', 'V', 'L', 'M', 'A', 'R', '!', 'D', 'P',
            '&', 'T', 'X', 'Y', '#', '@', '~', 'U', 'E', '$', ',', 'G'
        ]
        ref = [u'NOUN', u'PRON', 'NOUN', 'DET', 'NOUN', 'VERB', '']
        tagmap = {
            'N': u'NOUN',
            'O': u'PRON',
            '^': u'NOUN',
            'S': u'X',
            'Z': u'NOUN',
            'V': u'VERB',
            'L': u'PRON',
            'M': u'NOUN',
            'A': u'ADJ',
            'R': u'ADV',
            '!': u'.',
            'D': u'DET',
            'P': u'CONJ',
            '&': u'CONJ',
            'T': u'PRT',
            'X': u'DET',
            'Y': u'DET',
            '#': u'X',
            '@': u'NOUN',
            '~': u'X',
            'U': u'X',
            'E': u'.',
            '$': u'NUM',
            ',': u'.',
            'G': u'X'
        }
        with open(path, 'r') as fp:
            for line in fp:
                line = line.strip()
                if len(line) == 0:
                    continue
                line = line.split()
                word, raw = line
                t = Tag(word, tagmap[raw])
                ret.append(t)
        return ret

    d547 = interpret_conll("Data/TweeboDaily547.conll")
    o24 = interpret_conll("Data/TweeboOct27.conll")

    with open('Data/TweeboDaily547.pkl', 'w') as fout:
        logging.info("Saving daily...")
        pickle.dump(d547, fout, pickle.HIGHEST_PROTOCOL)

    with open('Data/TweeboOct27.pkl', 'w') as fout:
        logging.info("Saving Oct...")
        pickle.dump(o24, fout, pickle.HIGHEST_PROTOCOL)