Exemplo n.º 1
0
 def test_remote_download_urls_extract(self):
     executor.delete_dir(REMOTE_TARGET, TEST_DATA.creds)
     wget.download(REMOTE_TARGET, self.urls, creds=TEST_DATA.creds, extract=True)
     act_hash = hasher.get_md5(REMOTE_TARGET, TEST_DATA.creds)
     assert act_hash == TEST_DATA.download_md5_extracted, "remote download failed. Expected: {}, Actual: {}".format(
         TEST_DATA.download_md5, act_hash
     )
Exemplo n.º 2
0
 def test_local_download_urls_extract(self):
     shutil.rmtree(LOCAL_TARGET)
     wget.download(LOCAL_TARGET, TEST_DATA.zip_url,\
                   filenames='x.zip', extract=True)
     assert len(glob('{}/*.xml'.format(
         LOCAL_TARGET))) > 0, 'Failed to dowload/extract the url.'
     shutil.rmtree(LOCAL_TARGET)
Exemplo n.º 3
0
 def test_local_download_urls(self):
     executor.delete_dir(REMOTE_TARGET, TEST_DATA.creds)
     wget.download(LOCAL_TARGET, self.urls)
     act_hash = hasher.get_md5(LOCAL_TARGET)
     assert act_hash == TEST_DATA.download_md5, "local download failed. Expected: {}, Actual: {}".format(
         TEST_DATA.download_md5, act_hash
     )
Exemplo n.º 4
0
 def test_remote_download_urls(self):
     executor.delete_dir(REMOTE_TARGET, TEST_DATA.creds)
     wget.download(REMOTE_TARGET, self.urls, creds=TEST_DATA.creds)
     act_hash = hasher.get_md5(REMOTE_TARGET, TEST_DATA.creds)
     assert act_hash==TEST_DATA.download_md5,\
        'remote download failed. Expected: {}, Actual: {}'\
        .format(TEST_DATA.download_md5, act_hash)
Exemplo n.º 5
0
 def test_download_with_key(self):
     executor.delete_dir(REMOTE_TARGET, TEST_DATA.creds)
     wget.download(REMOTE_TARGET, self.urls, creds=TEST_DATA.creds)
     rsync.download(REMOTE_TARGET, LOCAL_TARGET, creds=TEST_DATA.creds)
     act_hash = hasher.get_md5(REMOTE_TARGET, TEST_DATA.creds)
     assert act_hash == TEST_DATA.download_md5, "upload failed. Expected: {}, Actual: {}".format(
         TEST_DATA.download_md5, act_hash
     )
Exemplo n.º 6
0
 def test_download_with_key(self):
     executor.delete_dir(REMOTE_TARGET, TEST_DATA.creds)
     wget.download(REMOTE_TARGET, self.urls, creds=TEST_DATA.creds)
     rsync.download(REMOTE_TARGET, LOCAL_TARGET, creds=TEST_DATA.creds)
     act_hash = hasher.get_md5(REMOTE_TARGET, TEST_DATA.creds)
     assert act_hash==TEST_DATA.download_md5,\
        'upload failed. Expected: {}, Actual: {}'\
        .format(TEST_DATA.download_md5, act_hash)
def main(lon, lat):
    '''First argument degrees longitude (E is positive, W negative)
        of the landslide location,
        second argument latitude (N positive, S negative),
        in decimal format(not minutes etc.)'''
    nlat, wlon, slat, elon = ten_km_square(lat, lon)
    #print("(NLat:{:.4f},WLon:{:.4f}),(SLat:{:.4f},ELon:{:.4f});".format(nlat, wlon, slat, elon))
    region = (
        '[{:.4f},{:.4f}], [{:.4f},{:.4f}], [{:.4f},{:.4f}], [{:.4f},{:.4f}]'.
        format(wlon, nlat, elon, nlat, wlon, slat, elon, slat))
    rectangle = ('[{:.4f},{:.4f},{:.4f},{:.4f}]'.format(
        wlon, slat, elon, nlat))
    lister = region.split('/')
    finallist = [ast.literal_eval(i) for i in lister]  # to remove apostrophe
    final = [list(value) for value in finallist]  # final list
    print(final)
    import ee
    import ee.mapclient
    import ee
    ee.Initialize()
    import ee.mapclient
    start = ee.Date('2017-01-01')
    finish = ee.Date('2017-03-20')

    rectangle = ee.Geometry.Polygon(
        final
    )  #passing the final value as the polygon boundaries defined 10km square area
    collection = ee.ImageCollection('COPERNICUS/S2')

    filteredCollection = collection.filterBounds(rectangle).filterDate(
        start, finish)

    first = filteredCollection.first()
    path = first.getDownloadURL({'region': final, 'scale': 10})
    print(path)
    urls = [path]
    wget.download(
        '/home/path/to/save', urls[0], filenames='x.zip',
        extract=True)  #path to your location to save a particular image
Exemplo n.º 8
0
def get_pdfs(listofurls):
    if len(listofurls)<1:
       return False
    os.mkdir('/tmp/havenpdfs',0755);
    numberdownload = 0
    try: 
       items = wget.download('/tmp/havenpdfs', listofurls, extract=False)
       numberdownload = numberdownload + 1
    except:
       print numberdownload
       print "Problem..."   

    return numberdownload 
    if not(os.path.exists(glove_dir)):
        os.makedirs(glove_dir)

    with open(osp.join(glove_dir,name ), "wb") as code:
        code.write(f.read())
'''

if __name__ == "__main__":
    vector_size = [50, 150, 300]
    vector_names_glove = [
        "glove_wiki_50.txt", "glove_wiki_150.txt", "glove_wiki_300.txt"
    ]
    vector_names_word2vec_50 = ["w2v_wiki_50.txt"]
    print "Starting download"
    wget.download(glove_dir,
                  urls=glove_urls,
                  filenames=vector_names_glove,
                  parallelism=3)

    #  print "Downloaded", vector_names_glove
    #wget.download(word2vec_dir, urls=w2v_50, filenames=w2v_names_50, parallelism=3)
    #print "Downloaded", w2v_names_50
    #wget.download(word2vec_dir, urls=w2v_150, filenames=w2v_names_150, parallelism=3)
    # print "Downloaded", w2v_names_150
    #wget.download(word2vec_dir, urls=w2v_300, filenames=w2v_names_300, parallelism=3)
    # print "Downloaded", w2v_names_300
    # wget.download(glove_dir, urls=[glove_urls[0]],filenames=[vector_names[0]],parallelism=3)
    #  wget.download(glove_dir, urls=[glove_urls[1]],filenames=[vector_names[1]],parallelism=3)
    # wget.download(glove_dir, urls=[glove_urls[2]],filenames=[vector_names[2]],parallelism=3)
    '''

    This takes some time. The files are quite big.
Exemplo n.º 10
0
 def test_local_download_urls_extract(self):
     shutil.rmtree(LOCAL_TARGET)
     wget.download(LOCAL_TARGET, TEST_DATA.zip_url, filenames="x.zip", extract=True)
     assert len(glob("{}/*.xml".format(LOCAL_TARGET))) > 0, "Failed to dowload/extract the url."
     shutil.rmtree(LOCAL_TARGET)
Exemplo n.º 11
0
mesDrugs = mesResults['drug']
mesEvents = mesDrugs['event']
mesPartitions = mesEvents['partitions']

os.chdir('data')
for partition in mesPartitions:
    url = partition['file']
    print(url)
    file_name = url.split('/')[-1]
    dir_name = url.split('/')[-2]
    if not os.path.isfile(os.path.join(dir_name, file_name[0:-4])):
        if not os.path.isdir(dir_name):
            os.makedirs(dir_name)

        os.chdir(dir_name)
        monfichier = wget.download(os.getcwd(), url, filenames=file_name)
        monzip = zipfile.ZipFile(file_name, 'r')
        monzip.extractall()
        os.remove(file_name)
        os.chdir('..')

os.chdir('..')

#I save the fields needed to answer question 1 from the document provided.
#I planned to make use of safetyreportid, safetyreportversion, companynumb, but I did not have time in the end.
#It needs to be done in a more finalised version
#There are several reaction (reactionmeddrapt) fields per record, and we create one separate line for each in the output file

all_files = []
os.chdir('data')
Exemplo n.º 12
0
def main3(imgs):
    wget.download('.', imgs)
Exemplo n.º 13
0
import urllib
import re
from parallel_sync import wget
from os.path import abspath, dirname, join
#
# parse html
#
link = "http://ronnywang-newsdiff.s3-website-ap-northeast-1.amazonaws.com/2016/"
fp = urllib.urlopen(link)
myHTML = fp.read()

matches = re.findall('<a href="201602([^\"]+)">', myHTML, re.DOTALL)
#print(matches)

# 
# download
#
prefix = 'http://ronnywang-newsdiff.s3-website-ap-northeast-1.amazonaws.com/2016/201602'
downList = [ prefix + x  for x in matches]
#print(downList)
targetPath = join(dirname(abspath(__file__)),'extract')
wget.download(targetPath, downList, extract=True)
Exemplo n.º 14
0
    for line in status:
        orders = int(line)
    status.close()

    if data['lender'] > orders:
        status = open('status.txt', 'w')
        status.write(str(data['lender']))
        status.close()
        media_url = data['path'].split('/')
        path = ''
        for i in range(media_url.index('media'), len(media_url)):
            path = os.path.join(path, media_url[i])
        path_details = os.path.join(path, 'details.txt')

        mk_dir = os.path.join('/home/flytech/Documents/Projekty/WebMediaRoot', data['name'])
        if not os.path.exists(mk_dir):
            os.mkdir(mk_dir)
        download_target = [os.path.join('http://127.0.0.1:8000/', path_details)]
        wget.download(mk_dir, download_target)

        details = open(os.path.join(mk_dir, 'details.txt'), 'r')
        photos = []
        path_photo = os.path.join('http://127.0.0.1:8000/', path)
        for line in details:
            photos.append(os.path.join(path_photo, line.split(', ')[0]))

        wget.download(mk_dir, photos)

    time.sleep(1)
Exemplo n.º 15
0
    def parse_items(self, response):
        # Create an item
        item = NetCDFScraperItem()

        # Populate it
        item["url"] = response.url
        item["date"] = str(datetime.datetime.utcnow())
        item["status"] = "UNKWN"

        if response.url.endswith(".nc4") or response.url.endswith(
                ".nc") or response.url.endswith("dods"):
            stored_item = None
            # Try to check if this url has been visided using mongodb
            client = MongoClient(self.mongodb_url)
            user = self.configSectionMap("mongodb")['user']
            password = self.configSectionMap("mongodb")['password']
            print "user:"******" password:"******"url": response.url})
            else:
                print "No MongoDb authentication"
            client.close()

            if stored_item is not None:
                # Now a very trivial beahviour
                # It should be smarter
                item["status"] = "ASTRD"
                return item

            downloaded = False
            tempdir = ""
            filename = ""

            # Check if it is served by an opendap server
            try:
                # The netcdf file is hosted by a opendap server
                # There is non need to download it
                rootgrp = Dataset(response.url)
                rootgrp.close()
                filename = response.url
            except:
                # Unfortunatly the netcdf have to be downloaded
                tempdir = tempfile._get_default_tempdir() + "/" + next(
                    tempfile._get_candidate_names())
                path = urlparse.urlsplit(response.url).path
                filename = tempdir + "/" + posixpath.basename(path)
                wget.download(tempdir, response.url)
                downloaded = True
# Get the feature
            netCDF2JSON = NetCDF2JSON()
            feature = netCDF2JSON.get(filename, response.url)

            # Remove the downloaed file and directory if needed
            if downloaded is True:
                os.remove(filename)
                os.rmdir(tempdir)

            # Check if the feature is valid
            if feature is not None:
                #print json.dumps(feature,None,"\t")

                # Try to save the item in mongodb
                client = MongoClient(self.mongodb_url)
                if client.netcdf.authenticate(user, password) is True:
                    db = client.netcdf
                    items = db.items
                    item_id = items.insert(feature)
                    item["status"] = "NEWFT"
                else:
                    print "No MongoDb authentication"
                client.close()

# Return the item
        return item
Exemplo n.º 16
0
def download_urls():
    """ downloads some files """
    urls = ['http://www.nationalgeographic.com/dc/exposure/homepage/photoconfiguration/image/70759_photo_nxqzsecnr7nwui2pbv33cboxp3vu2hmpyjyavf6lo6pvvsfavj3q_850x478.jpg',\
            'http://www.nationalgeographic.com/dc/exposure/homepage/photoconfiguration/image/70867_photo_g2j2wmgshw2nhigkyrhbstxkylvu2hmpyjyavf6lo6pvvsfavj3q_850x478.jpg']
    wget.download('/tmp/images', urls, creds=env)
Exemplo n.º 17
0
bucket = 'colab-pea'
filename = './10MB'
path = 'test.zip'
#s3.upload_file(filename, bucket, path)

urls = [
    'https://newsmaze.net/storage/new_proof_disk/Pea/Director1.7z',
    'https://newsmaze.net/storage/new_proof_disk/Pea/Director2.7z',
    'https://newsmaze.net/storage/new_proof_disk/Pea/Director3.7z',
    'https://newsmaze.net/storage/new_proof_disk/Pea/Director4.7z',
    'https://newsmaze.net/storage/new_proof_disk/Pea/Director5.7z',
    'https://newsmaze.net/storage/new_proof_disk/Pea/ELGBlockchanin.7z',
    'https://newsmaze.net/storage/new_proof_disk/Pea/EnergyProject.7z'
]
# wget.download('/tmp', urls)
# or a single file:
wget.download(os.path.join(os.getcwd(), 'tmp'), urls, tries=10)

for url in urls:
    try:
        filename = re.search('\w*\.\w*$', url).group(0)
        path = os.path.join(os.getcwd(), 'tmp', filename)
        #path = f'./tmp/{filename}'
        #wget.download(os.path.join(os.getcwd(),'tmp'), url, filenames=filename)
        print(f'uploading {filename} to s3')
        s3.upload_file(path, bucket, filename)
        #os.remove(path)

    except:
        print(f'error downloading file {url} error: {sys.exc_info()}')
Exemplo n.º 18
0
def download_urls():
    """ downloads some files """
    urls = ['http://www.nationalgeographic.com/dc/exposure/homepage/photoconfiguration/image/70759_photo_nxqzsecnr7nwui2pbv33cboxp3vu2hmpyjyavf6lo6pvvsfavj3q_850x478.jpg',\
            'http://www.nationalgeographic.com/dc/exposure/homepage/photoconfiguration/image/70867_photo_g2j2wmgshw2nhigkyrhbstxkylvu2hmpyjyavf6lo6pvvsfavj3q_850x478.jpg']
    wget.download('/tmp/images', urls, creds=env)
from parallel_sync import wget
import os

target_path = os.getcwd()

url = "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2"

wget.download(target_path, urls=url)
Exemplo n.º 20
0
import urllib
import re
from parallel_sync import wget
from os.path import abspath, dirname, join
#
# parse html
#
link = "http://ronnywang-newsdiff.s3-website-ap-northeast-1.amazonaws.com/2016/"
fp = urllib.urlopen(link)
myHTML = fp.read()

matches = re.findall('<a href="201602([^\"]+)">', myHTML, re.DOTALL)
#print(matches)

#
# download
#
prefix = 'http://ronnywang-newsdiff.s3-website-ap-northeast-1.amazonaws.com/2016/201602'
downList = [prefix + x for x in matches]
#print(downList)
targetPath = join(dirname(abspath(__file__)), 'extract')
wget.download(targetPath, downList, extract=True)