def test_media_io_base_download(self): self.request.http = HttpMockSequence([ ({'status': '200', 'content-range': '0-2/5'}, '123'), ({'status': '200', 'content-range': '3-4/5'}, '45'), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(3, download._chunksize) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '123') self.assertEqual(False, done) self.assertEqual(3, download._progress) self.assertEqual(5, download._total_size) self.assertEqual(3, status.resumable_progress) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '12345') self.assertEqual(True, done) self.assertEqual(5, download._progress) self.assertEqual(5, download._total_size)
def test_media_io_base_download(self): self.request.http = HttpMockSequence( [({"status": "200", "content-range": "0-2/5"}, "123"), ({"status": "200", "content-range": "3-4/5"}, "45")] ) download = MediaIoBaseDownload(fh=self.fh, request=self.request, chunksize=3) self.assertEqual(self.fh, download.fh_) self.assertEqual(3, download.chunksize_) self.assertEqual(0, download.progress_) self.assertEqual(None, download.total_size_) self.assertEqual(False, download.done_) self.assertEqual(self.request.uri, download.uri_) status, done = download.next_chunk() self.assertEqual(self.fh.getvalue(), "123") self.assertEqual(False, done) self.assertEqual(3, download.progress_) self.assertEqual(5, download.total_size_) self.assertEqual(3, status.resumable_progress) status, done = download.next_chunk() self.assertEqual(self.fh.getvalue(), "12345") self.assertEqual(True, done) self.assertEqual(5, download.progress_) self.assertEqual(5, download.total_size_)
def pull_file(service, file_id, src_path, dst_path): request = service.files().get_media(fileId=file_id) with open(dst_path, 'w') as f: downloader = MediaIoBaseDownload(f, request) (status, done) = downloader.next_chunk() while done is False: logging.info('downloading... {:.2f}%'.format( status.progress() * 100)) (status, done) = downloader.next_chunk() logging.info('{} <- {}'.format(dst_path, src_path))
def test_media_io_base_download_retries_5xx(self): self.request.http = HttpMockSequence([ ({'status': '500'}, ''), ({'status': '500'}, ''), ({'status': '500'}, ''), ({'status': '200', 'content-range': '0-2/5'}, '123'), ({'status': '503'}, ''), ({'status': '503'}, ''), ({'status': '503'}, ''), ({'status': '200', 'content-range': '3-4/5'}, '45'), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(3, download._chunksize) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) # Set time.sleep and random.random stubs. sleeptimes = [] download._sleep = lambda x: sleeptimes.append(x) download._rand = lambda: 10 status, done = download.next_chunk(num_retries=3) # Check for exponential backoff using the rand function above. self.assertEqual([20, 40, 80], sleeptimes) self.assertEqual(self.fd.getvalue(), '123') self.assertEqual(False, done) self.assertEqual(3, download._progress) self.assertEqual(5, download._total_size) self.assertEqual(3, status.resumable_progress) # Reset time.sleep stub. del sleeptimes[0:len(sleeptimes)] status, done = download.next_chunk(num_retries=3) # Check for exponential backoff using the rand function above. self.assertEqual([20, 40, 80], sleeptimes) self.assertEqual(self.fd.getvalue(), '12345') self.assertEqual(True, done) self.assertEqual(5, download._progress) self.assertEqual(5, download._total_size)
def get(service): try: file_name = raw_input('Which filename to download from cloud?\n') req = service.objects().get(bucket=_BUCKET_NAME,object=file_name,) resp = req.execute() print json.dumps(resp, indent=2) req = service.objects().get_media(bucket=_BUCKET_NAME,object=file_name,) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() if status: print 'Download %d%%.' % int(status.progress() * 100) print 'Download Complete!' reader = csv.reader(open('dict_pw.csv', 'rb')) newD = dict(x for x in reader) key= newD[file_name] print key decrypt_file(fh.getvalue(),key,file_name) except client.AccessTokenRefreshError: print ("Error in the credentials")
def processObject(client, itemname): try: #try to access json object in Google Compute Storage # Get Payload Data req = client.objects().get_media( bucket = errorsBucket, object=itemname) #store info whether a json-object exists in the bucket or not fileExists = True try: resp = req.execute() except: fileExists = False pass #continue only when the object exists if (fileExists): # The BytesIO object may be replaced with any io.Base instance. fh = io.BytesIO() #prepare for reading a json-object downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() #print (fh.getvalue()) #load accessed json-object into dictionary jsonFile = fh.getvalue() #print(jsonFile) jf = open("../" + errorsBucket + "/" + itemname.encode(), 'w') jf.write(jsonFile) jf.close() #store error message into respective errors bucket except oauth2_client.AccessTokenRefreshError: pass
def download(argv): bucket_name, object_name = argv[1][5:].split('/', 1) filename = argv[2] assert bucket_name and object_name service = get_authenticated_service(RO_SCOPE) print 'Building download request...' f = file(filename, 'w') request = service.objects().get_media(bucket=bucket_name, object=object_name) media = MediaIoBaseDownload(f, request, chunksize=CHUNKSIZE) print 'Downloading bucket: %s object: %s to file: %s' % (bucket_name, object_name, filename) progressless_iters = 0 done = False while not done: error = None try: progress, done = media.next_chunk() if progress: print_with_carriage_return( 'Download %d%%.' % int(progress.progress() * 100)) except HttpError, err: error = err if err.resp.status < 500: raise except RETRYABLE_ERRORS, err: error = err
def processObject(client, itemname): try: #try to access json object in Google Compute Storage # Get Payload Data req = client.objects().get_media( bucket = comm.jsonsDir, object=itemname) #store info whether a json-object exists in the bucket or not fileExists = True try: resp = req.execute() except: fileExists = False pass #continue only when the object exists if (fileExists): # The BytesIO object may be replaced with any io.Base instance. fh = io.BytesIO() #prepare for reading a json-object downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() #load accessed json-object into dictionary jsonToDict = json.loads(fh.getvalue())#json.loads(fh.getvalue())#return value #print ("RETURNED VALUE: " + jsonToDict) doDownloadJob(jsonToDict, itemname) #store error message into respective errors bucket except oauth2_client.AccessTokenRefreshError: comm.printException(comm.pathToSaveDownloadErrors, errString="False credentials") pass
def retrieve_content( self, bucket, path, transform=None, generation=None, **kwargs): """Retrieves the content at the specified path. Args: bucket: [string] The bucket to retrieve front. path: [string] The path to the content to retrieve from the bucket. generation: [long] Specifies version of object (or None for current). transform: [callable(string)] transform the downloaded bytes into something else (e.g. a JSON object). If None then the identity. Returns: transformed object. """ self.logger.info('Retrieving path=%s from bucket=%s [generation=%s]', path, bucket, generation) # Get Payload Data request = self.service.objects().get_media( bucket=bucket, object=path, generation=generation, **kwargs) data = io.BytesIO() downloader = MediaIoBaseDownload(data, request, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() if status: self.logger.debug('Download %d%%', int(status.progress() * 100)) result = data.getvalue() return result if transform is None else transform(result)
def download_file(self,file_id, file_name): """Download a Drive file's content to the local filesystem. Args: service: Drive API Service instance. file_id: ID of the Drive file that will downloaded. file_name: used as name for to write content in. """ fd = open(file_name,'w+') request = self._service.files().get_media(fileId=file_id) media_request = MediaIoBaseDownload(fd, request) while True: try: download_progress, done = media_request.next_chunk() except: print('An error occurred') fd.close() return if download_progress: print('Download Progress: %d%%' % int(download_progress.progress() * 100)) if done: print('Download Complete') fd.close() return
def get(self, remote_path, local_path, report_to=None): LOG.debug("Downloading %s from cloud storage (local path: %s)", remote_path, local_path) bucket, name = self._parse_url(remote_path) local_path = os.path.join(local_path, os.path.basename(remote_path)) request = self.cloudstorage.objects().get_media(bucket=bucket, object=name) f = open(local_path, "w") try: media = MediaIoBaseDownload(f, request, chunksize=self.chunk_size) last_progress = 0 done = False while not done: status, done = media.next_chunk() if status: percentage = int(status.progress() * 100) if percentage - last_progress >= self.report_interval: if report_to: report_to(status.resumable_progress, status.total_size) last_progress = percentage finally: f.close() LOG.debug("Finished downloading %s", os.path.basename(local_path)) return local_path
def scan_item_contents(item): if item.get('mimType') == self.FOLDER_MIME: return shared = item.get('shared', None) if shared is None or shared: return # only process files file_id = item.get('id', None) name = item.get('name', None) available = True if file_id and name: f_path = create_temp_name(temp_dir, name) logger.warning("Processing file {}...{}".format(name, f_path)) f = open(f_path, 'wb') try: request = self.client.files().get_media(fileId=file_id) downloader = MediaIoBaseDownload(f, request) done = False while done is False: status, done = downloader.next_chunk() except Exception as ex: logger.error("Unable to download file {}. {}".format(name, ex)) available = False f.close() if available: try: matches.extend(search_content(f_path, expressions)) except Exception as ex: logger.error("Unable to parse content in file {}. {}".format(name, ex)) try: os.remove(f_path) except Exception as ex: logger.error("Unable to clean up temprary file {}. {}".format(f_path, ex))
def main(): """Shows basic usage of the Google Drive API. Creates a Google Drive API service object and outputs the names and IDs for up to 10 files. """ credentials = get_credentials() http = credentials.authorize(httplib2.Http()) service = discovery.build('drive', 'v3', http=http) #file_id = '1cxgbJZKnysKOKBDg-ZbV1E3S4B-iAG7XY-1x7U8Yfsg' # For the grocery doc file_id = '18r3cUWKbMaWVYtNKLJjxZFHB2m7y1QJdkSPlrU197PA' # For the test doc request = service.files().export_media(fileId=file_id, mimeType='text/plain') response = request.execute() fh = open(FILE_LOC, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %.2f%%." % (status.progress() * 100.0)) fh.close() # Now read doc.txt to get information find_date() # Upload the file back update_request = service.files().update(fileId=file_id, media_body=FILE_LOC) update_response = update_request.execute()
def download_file(service, filename): result = [] result = retrieve_all_files(service) for i in result : if i['title'] == filename: tmp = i['id'] break f = open (filename, 'wb') request = service.files().get_media(fileId=tmp) media_request = MediaIoBaseDownload(f, request) while True: try: download_progress, done = media_request.next_chunk() except errors.HttpError, error: print 'An error occurred: %s' % error return #if download_progress: # print 'Download Progress: %d%%' % int(download_progress.progress() * 100) if done: print 'Google download complete' return
def File(gs_path, chunk_size=CHUNK_SIZE): """Download a file from the cloud, and return a file that's readable. Args: gs_path: Fully qualified gfs path, eg, 'gfs://bucket/path/to/FILE'. chunk_size: The chunk_size to download, defaults to CHUNK_SIZE. Returns: An IO stream to be read. """ bucket_name, object_name = gs_path[5:].split('/', 1) logging.info('Downloading file: %s/%s', bucket_name, object_name) credentials = GoogleCredentials.get_application_default() service = discovery.build('storage', 'v1', credentials=credentials) request = service.objects().get_media(bucket=bucket_name, object=object_name) output = StringIO.StringIO() media = MediaIoBaseDownload(output, request, chunksize=chunk_size) done = False while not done: try: _, done = media.next_chunk() except HttpError, err: if err.resp.status < 500: raise except RETRYABLE_ERRORS, err: pass
def download(self, folder_service_callback): uploadFolder = self.helper.get_fileid_by_name(helper.UPLOADFOLDER) # get newest file download url downloadInfo = self.helper.get_newest_file_down_info(uploadFolder) progressless_iters = 0 # download file print('Downloading latest backup ...') filename = folder_service_callback.getTempFolder() + downloadInfo['title'] fh = io.FileIO(filename, 'wb') downloader = MediaIoBaseDownload(fh, downloadInfo['request']) done = False while not done: error = None try: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100), end="\r") except HttpError as err: error = err if err.resp.status < 500: raise except self.RETRYABLE_ERRORS as err: error = err if error: progressless_iters += 1 self.handle_progressless_iter(error, progressless_iters) else: progressless_iters = 0 return filename
def main(argv): #print (argv) #print(argv["object"]) #print(argv["bucket"]) _BUCKET_NAME = argv["bucket"] _FILE1_NAME = argv["object"] http = httplib2.Http() token_uri = '%s/%s/token' % (METADATA_SERVER, SERVICE_ACCOUNT) resp, content = http.request(token_uri, method='GET', body=None, headers={'Metadata-Flavor': 'Google'}) if resp.status == 200: d = json.loads(content) access_token = d['access_token'] # Save the access token credentials = oauth2_client.AccessTokenCredentials(access_token, 'my-user-agent/1.0') client = api_discovery.build('storage', _API_VERSION, http=credentials.authorize(http)) try: # Get Metadata req = client.objects().get( bucket=_BUCKET_NAME, object=_FILE1_NAME) # optional fileExists = True try: resp = req.execute() except HttpError: fileExists = False print (str(fileExists)) except: fileExists = False print (str(fileExists)) if (fileExists): # Get Payload Data req = client.objects().get_media( bucket=_BUCKET_NAME, object=_FILE1_NAME) # optional # The BytesIO object may be replaced with any io.Base instance. fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() returnValue = json.loads(fh.getvalue())#return value #print ("RETURNED VALUE: " + returnValue) '''''' print ("STR ") print (type(returnValue) is str) print ("DICT ") print (type(returnValue) is dict) print ("LIST ") print (type(returnValue) is list) except oauth2_client.AccessTokenRefreshError: print ("False credentials") else: print (str(False) + str(resp.status))
def download_file_as(service, file_id, media_type, file_name): request = service.files().export_media(fileId=file_id, mimeType=media_type) fh = io.FileIO(file_name, mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def download_file(self,file_name,file_id): request = self._service.files().get_media(fileId=file_id) fh = io.FileIO(file_name, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() logger.debug("Download %d%%." % int(status.progress() * 100))
def export_pdf(): credentials = get_credentials() print "Authorizing..." http = credentials.authorize(httplib2shim.Http()) service = discovery.build('drive', 'v3', http=http) # files = service.files().list().execute() # for f in files['files']: # print f['name'] print "Exporting..." request = service.files().export_media(fileId=FILE_ID, mimeType='application/pdf') fh = io.FileIO(FILENAME, 'wb') downloader = MediaIoBaseDownload(fh, request) downloader.next_chunk()
def test_media_io_base_download_handle_4xx(self): self.request.http = HttpMockSequence([({"status": "400"}, "")]) download = MediaIoBaseDownload(fh=self.fh, request=self.request, chunksize=3) try: status, done = download.next_chunk() self.fail("Should raise an exception") except HttpError: pass # Even after raising an exception we can pick up where we left off. self.request.http = HttpMockSequence([({"status": "200", "content-range": "0-2/5"}, "123")]) status, done = download.next_chunk() self.assertEqual(self.fh.getvalue(), "123")
def gdrive_get_file(drive, file_id): request = drive.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download {}%%.".format(int(status.progress() * 100))) return fh
def run(self, bucket: str, obj: str, local_path: str): archive = self.api.objects.get_media(bucket=bucket, object=obj) filename = os.path.join(local_path, os.path.basename(obj)) with FileIO(filename, "wb") as fh: downloader = MediaIoBaseDownload(fh, archive, chunksize=1024 * 1024) complete = False while not complete: _, complete = downloader.next_chunk() return filename
def download(self, filename, bucket_name, object_name): with open(filename, 'wb') as f: req = self.cloud.objects().get_media( bucket=bucket_name, object=object_name) downloader = MediaIoBaseDownload(f, req) done = False while done is False: status, done = downloader.next_chunk() print("Download {}%.".format(int(status.progress() * 100)))
def download_file(service, file_id, file_name): request = service.files().export_media(fileId=file_id, mimeType='text/csv') fh = io.FileIO(os.path.join(OUT_PATH, file_name), 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def download(self, filename, object_id): logging.debug("Starting download of object %s to %s" % (object_id, filename)) with open(filename, "wb") as fd: request = self.service.files().get_media(fileId=object_id) downloader = MediaIoBaseDownload(fd, request, chunksize=CHUNKSIZE) done = False while done is False: status, done = downloader.next_chunk() logging.info("Download %d%%." % int(status.progress() * 100)) logging.info("Object %s has been downloaded as %s" % (object_id, filename))
def _download_file(self, item, filename): """Downloads item to filename.""" id = item['id'] request = self.service.files().export_media(fileId=id, mimeType='application/pdf') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() open(filename, 'wb').write(fh.getvalue())
def Download(self, LocalPath="", progressCallback=None): LocalPath = LocalPath or self.name request = self.__ds.files().get_media(fileId=self.__id) #fh = io.BytesIO() with io.open(LocalPath, "wb") as fh: downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() if progressCallback: progressCallback(status.progress() * 100)
def pullCreditFromDrive(self): filePrefix = CREDIT_FILENAME.rsplit(".", 1)[0] creditFile = self.findSheet(filePrefix) self.creditFileId = creditFile.get('id') if creditFile == None: return request = self.driveService.files().export_media(fileId=creditFile.get('id'), mimeType='text/csv') fh = io.FileIO(CREDIT_FILENAME, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk()
def get_file(service, file_id, file_name, location, exported): if exported: request = service.files().export_media(fileId=file_id, mimeType='application/pdf') file_name += '.pdf' else: request = service.files().get_media(fileId=file_id) fh = io.FileIO(os.path.join(location, file_name), mode='wb') print('downloading -' + str(os.path.join(location, file_name))) downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk()
def _download_file(self, drive_file): destination_path = self._create_destination_path(drive_file) downloaded_bytes_without_file = self.status().downloaded_bytes downloaded_files_without_file = self.status().downloaded_files file_size = int(drive_file['size']) last_exception = None max_retries = 10 for times in range(0, max_retries): if not self.running(): return logger.debug('Downloading {0} to {1}. retry={2}'.format( drive_file, destination_path, times)) try: with open(destination_path, 'wb') as destination_file: request = retry(lambda: self.drive.files().get_media( fileId=drive_file['id'])) downloader = MediaIoBaseDownload(fd=destination_file, request=request, chunksize=10 * 1024 * 1024) while self.running(): logger.debug( 'Downloading chunk of {0} to {1}. retry={2}'. format(drive_file, destination_path, times)) status, done = retry(lambda: downloader.next_chunk()) self._update_status(downloaded_bytes=int( downloaded_bytes_without_file + file_size * status.progress())) if done: destination_file.flush() os.fsync(destination_file.fileno()) self._update_status( downloaded_files=downloaded_files_without_file + 1) return except HttpError as e: e.message = e._get_reason() last_exception = re_raisable() logger.warn( 'Failed to download {0} to {1}. retry={2}, error={3}'. format(drive_file, destination_path, times, last_exception.message)) self.reject(last_exception)
def download_files(service, files_list): today = datetime.now() copy_dest = os.path.join(BACKUP_PATH,today.strftime('%Y%m%d')) os.mkdir(copy_dest) for fid in files_list: file_meta = service.files().get(fileId=fid).execute() request = service.files().get_media(fileId=fid) download_path = os.path.join(ROOT_PATH,file_meta['title']) copy_path = os.path.join(BACKUP_PATH,file_meta['title']) print "download_path:",download_path print "copy_path:",copy_path print "File Size:",file_meta['fileSize'] fh = io.FileIO(download_path,'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print "Download %d%%." % int(status.progress() * 100) shutil.copyfile(download_path,copy_path)
def download_and_delete(self): DOWNLOAD_LOCATION = os.path.join(settings.MEDIA_ROOT, "temp_local") DOWNLOAD_LOCATION = os.path.join(DOWNLOAD_LOCATION, str(uuid.uuid4()) + self.extension) drive_service = self.owned_by.googlecreds.getDrive() request = drive_service.files().get_media(fileId=self.gid) fh = open(DOWNLOAD_LOCATION, "wb") downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download {}%".format(int(status.progress() * 100))) fh.close() local = TempLocalFile(name=self.name, file=DOWNLOAD_LOCATION, uploaded_by=self.uploaded_by) self.delete() return local
def download_file(service, file_id, location, file_name): request = service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False #download io stream: while done is False: try: status, done = downloader.next_chunk() except: fh.close() os.remove(location + file_name) return False print(f'\rDownload {int(status.progress() * 100)}%.') #write file from io stream: with io.open(location + file_name, 'wb') as f: fh.seek(0) f.write(fh.read())
def downloadFile(creds, files): service = build('drive', 'v3', credentials=creds) if not files: print('No files found.') else: print('Files:') for file in files: print(u'{0} ({1})'.format(file['name'], file['id']), file['mimeType']) request = service.files().export_media(fileId=file['id'], mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') fh = io.FileIO('tmp/' + file['name'] + '.xlsx', 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print ("Download %d%%." % int(status.progress() * 100))
def downloadFile(credentials, file_id): http = credentials.authorize(httplib2.Http()) service = discovery.build('drive', 'v3', http=http) results = service.files().list( pageSize=10, fields="nextPageToken, files(id, name)").execute() items = results.get('files', []) #下載檔案至根目錄 #file_id = '1ltXFMCEpGwgMyevrepGTch95VH9Wx1if' request = service.files().get_media(fileId=file_id) fh = io.FileIO(file_id + '.jpg', 'wb') downloader = MediaIoBaseDownload(fh, request) print(downloader) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def FetchFile(): """Shows basic usage of the Drive v3 API. Prints the names and ids of the first 10 files the user has access to. """ store = file.Storage('token.json') creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets('credentials.json', SCOPES) creds = tools.run_flow(flow, store) service = build('drive', 'v3', http=creds.authorize(Http())) # Call the Drive v3 API results = service.files().list( pageSize=10, fields="nextPageToken, files(id, name)").execute() items = results.get('files', []) if not items: print('No files found.') else: print('Files:') for item in items: print('({1}) {0}'.format(item['name'], item['id'])) fileid = "1pcW5wkpwrhhAH2brE6yofQcjilgF7seZ" results = service.files().get_media(fileId=fileid) fh = io.BytesIO() #io.FileIO("download.xlsx", "wb") downloader = MediaIoBaseDownload(fh, results) done = False while not done: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) z = zipfile.ZipFile(fh) for n in z.namelist(): print(n) success, varlist = ParseVariableFile( xmlformatter(z.open("xl/sharedStrings.xml").read())) if not success: return False, None success, spreadsheet = ParseSpreadsheet( varlist, xmlformatter(z.open("xl/worksheets/sheet1.xml").read())) if not success: return False, None return True, spreadsheet
def exportFiles(): # set up google drive api usage # If modifying these scopes, delete the file token.pickle. SCOPES = ['https://www.googleapis.com/auth/drive'] creds = None # The file token.pickle stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. if os.path.exists('token.pickle'): with open('token.pickle', 'rb') as token: creds = pickle.load(token) # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) creds = flow.run_local_server() # Save the credentials for the next run with open('token.pickle', 'wb') as token: pickle.dump(creds, token) service = build('drive', 'v3', credentials=creds) print("DESTINATION FOLDER: ", destinationFolder) for export, exportName in zip(exportList, exportListOfNames): # Now download the files(export as plaintext) print('Fetching File ID: ', exportName, ".txt...") file_id = export request = service.files().export_media(fileId=file_id, mimeType='text/plain') print(export) print("Placing Files: ", os.getcwd()) fh = io.FileIO((destinationFolder + "/" + exportName + ".txt"), 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%.", int(status.progress() * 100))
def main(): store = file.Storage('token.json') creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets('credentials.json', SCOPES) creds = tools.run_flow(flow, store) service = build('drive', 'v3', http=creds.authorize(Http())) file_id = '***' request = service.files().export_media( fileId=file_id, mimeType= 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') fh = io.FileIO('volunteerapps.xlsx', 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print('Volunteer log updated.') print('checking for new applicants...')
def writeFromGDrive(file_id, file_name, dest, md5): request = SERVICE.files().get_media(fileId=file_id) fh = io.FileIO(dest + '/' + file_name, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: try: status, done = downloader.next_chunk() except: fh.close() os.remove(location + filename) print('ERROR downloading file: ' + file_name) print(f'\rDownload {int(status.progress() * 100)}%.', end='') print('\n', flush=True) dest_md5 = md5_check.md5_file(os.path.join(dest + '/' + file_name)) if dest_md5 == md5: print('md5 checks out for ' + file_name) else: print('md5 error- something corrupted for ' + file_name)
def download_from_bucket_to_local(gcs_service, bucket, gcs_path, local_path): if not os.path.exists(os.path.dirname(local_path)): try: os.makedirs(os.path.dirname(local_path)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with open(local_path, 'wb') as f: request = gcs_service.objects().get_media(bucket=bucket, object=gcs_path) media = MediaIoBaseDownload(f, request) done = False while not done: # _ is a placeholder for a progress object that we ignore. # (Our file is small, so we skip reporting progress.) status, done = media.next_chunk() print(status)
def download_file(file_id, path): file_info = get_file_info(file_id) path = os.path.join(path, file_info['name']) request = None # if file_info['mimeType'] in DOC_MIME_TYPES : # request = service().files().export_media(fileId=file_id, mimeType=file_info['mimeType'] ) # else : request = service().files().get_media(fileId=file_id) # fh = io.BytesIO() fh = io.FileIO(path, 'wb') downloader = MediaIoBaseDownload(fh, request, chunksize=2048 * 512) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def download(file_id, mime_type, access_token, SUPPORTED_MIME_TYPES, exception=None): """ Download/Export file from google drive params: fileId: file id from google drive mime_type: file mime types from google drive access_token: access token provided by google drive SUPPORTED_MIME_TYPES: which types of file to download """ credentials = get_credentials(access_token) http = credentials.authorize(httplib2.Http()) service = discovery.build('drive', 'v3', http=http) if mime_type in SUPPORTED_MIME_TYPES: # Directly dowload the file request = service.files().get_media(fileId=file_id) else: export_mime_type = GOOLE_DRIVE_EXPORT_MAP.get(mime_type) if export_mime_type and export_mime_type in SUPPORTED_MIME_TYPES: # Convert the google files to supported file request = service.files().export_media(fileId=file_id, mimeType=export_mime_type) else: if exception: raise exception('Unsupported Mime Type: ' + mime_type) return outfp = tempfile.TemporaryFile("wb+") downloader = MediaIoBaseDownload(outfp, request) done = False while done is False: status, done = downloader.next_chunk() # print('Download %d%%.' % int(status.progress() * 100)) return outfp
def export_to_file(down_file, gdrive_file_type, httpauth, service, path, counter, log_file): value = gdrive_file_type[down_file['mimeType']] if value[0] != 'None': name = sanitize_name(down_file['title']) file_path = path + "/_google/" + name + value[0] # to prevent duplicate file names being saved if os.path.exists(file_path): file_path, name = get_new_file_name(file_path) name = name.split(".")[0] log_and_print(log_file, counter + " File named '" + down_file['title'] + "' already exists. Saving as '" + name + "' instead.") log_and_print(log_file, counter + " Downloading '" + name + "' as '" + name + value[0] + "'...") else: log_and_print(log_file, counter + " Downloading '" + name + "' as '" + name + value[0] + "'...") try: request = service.files().export_media(fileId=down_file['id'], mimeType=value[1]) fh = io.FileIO(file_path, mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() # Print status of download (mainly for larger files) print("%d%%\r" % int(status.progress() * 100), end="", flush=True) fh.close() log_and_print(log_file, counter + " Hashing '" + name + value[0] + "'...") with open(path + "/_google/_hashes.txt", "a") as hashes_file: hashes_file.write(name + value[0] + "\n") hashes_file.write("--MD5: " + hash_file(file_path, "md5") + "\n") hashes_file.write("--SHA1: " + hash_file(file_path, "sha1") + "\n") hashes_file.write("--SHA256: " + hash_file(file_path, "sha256") + "\n") return True except: fh.close() try: if os.path.exists(file_path): os.remove(file_path) except: log_and_print(log_file, counter + " Failed to download '" + name + "'. The user most likely doesn't have permissions to export this file.") log_and_print(log_file, counter + " Please manually remove '" + name + "' as it is an incomplete download.") else: log_and_print(log_file, counter + " Failed to download '" + name + "'. The user most likely doesn't have permissions to export this file.") else: log_and_print(log_file, counter + " Skipping '" + down_file['title'] + "' because it is an unsupported MIME type.")
def download_file_from_gdrive(file_path, drive_file, service): """Downloads file from Google Drive. If file is Google Doc's type, then it will be downloaded with the corresponding non-Google mimetype. Args: path: Directory string, where file will be saved. file: File information object (dictionary), including it's name, ID and mimeType. service: Google Drive service instance. """ file_id = drive_file['id'] file_name = drive_file['name'] if drive_file['mimeType'] in GOOGLE_MIME_TYPES.keys(): if file_name.endswith( GOOGLE_MIME_TYPES[drive_file['mimeType']][1]): file_name = drive_file['name'] else: file_name = '{}{}'.format( drive_file['name'], GOOGLE_MIME_TYPES[drive_file['mimeType']][1]) service.files().update(fileId=file_id, body={ 'name': file_name }).execute() request = service.files().export( fileId=file_id, mimeType=(GOOGLE_MIME_TYPES[drive_file['mimeType']] )[0]).execute() with io.FileIO(os.path.join(file_path, file_name), 'wb') as file_write: file_write.write(request) else: request = service.files().get_media(fileId=file_id) file_io = io.FileIO( os.path.join(file_path, drive_file['name']), 'wb') downloader = MediaIoBaseDownload(file_io, request) done = False while done is False: _, done = downloader.next_chunk()
def do_ocr(input_file, temp_folder): # Upload the file on Goggle Drive folder_id = new_folder_id mime = 'application/vnd.google-apps.document' file_metadata = { 'name': input_file, 'mimeType': mime, 'parents': [folder_id] } print(file_metadata) media = MediaFileUpload(input_file, mimetype=mime, chunksize=256 * 1024, resumable=True) print(media) Imgfile = service.files().create(body=file_metadata, media_body=media, fields='id') response = None while response is None: status, response = Imgfile.next_chunk() time.sleep(1) if status: print("Uploaded %d%%." % int(status.progress() * 100)) print("Upload of {} is complete.".format(input_file)) basename = "".join(os.path.basename(input_file).split(".")[:-1]) filename = temp_folder + "/" + basename + ".txt" time.sleep(10) # Download the file in txt format from Google Drive getTxt = service.files().export_media(fileId=response['id'], mimeType='text/plain') fh = io.FileIO(filename, 'wb') downloader = MediaIoBaseDownload(fh, getTxt) # downloader.next_chunk() done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def get_file(id, username): if 'credentials' not in flask.session: return flask.redirect('authorize') # Load credentials from the session. credentials = google.oauth2.credentials.Credentials(**flask.session['credentials']) drive = googleapiclient.discovery.build(API_SERVICE_NAME, API_VERSION, credentials=credentials) try: file = drive.files().get(fileId=id).execute() name = file['name'] type = file['mimeType'] request = drive.files().get_media(fileId=id) path = os.path.join(app.config['DOWNLOAD_FOLDER'], name) f = open(path, 'wb+') downloader = MediaIoBaseDownload(f, request) done = False while done is False: status, done = downloader.next_chunk() print("Downloaded file... decrypting") user_id = User.query.filter_by(username=username).first().id group_entries = GroupEntry.query.filter_by(user_id=user_id).all() outfile=False for entry in group_entries: print("group_id:",entry.group_id) group_id = Group.query.filter_by(id=entry.group_id).first().id key=entry.key outfile = decrypt(key, path, name, group_id) if not outfile == False: break message = "" color = "" print(outfile) if outfile==False: message="The file could not be decrypted as you do not have access to the group that encrypted it." color = "red" else: message = "The file has been decrypted and is located in your downloads folder." color = "green" return render_template('main.html', username=username, message=message, color=color) except Exception as error: print('An error occurred: ',error) return render_template('main.html', username=username, message="Error decrypting file", color="red")
def test_media_io_base_download_handle_redirects(self): self.request.http = HttpMockSequence([ ({ 'status': '200', 'content-location': 'https://secure.example.net/lion' }, ''), ({ 'status': '200', 'content-range': '0-2/5' }, 'abc'), ]) download = MediaIoBaseDownload(fd=self.fd, request=self.request, chunksize=3) status, done = download.next_chunk() self.assertEqual('https://secure.example.net/lion', download._uri)
def download_file_id(service, file_id, dest): """ Download a file to a destination directory using its ID Parameters ---------- service : googleapiclient.discovery.Resource Google API resource for GDrive v3 file_id : str ID of file on Google Drive dest : str or pathlib.Path Destination filename Returns ------- pathlib.Path Filename written to Raises ------ FileExistsError Raised if file exists in destination but not allowed to overwrite, ValueError Raised if the file given does not exist in Google Drive """ # Create destination if needed dest = Path(dest) if not dest.parent.exists(): dest.parent.mkdir(parents=True, exist_ok=True) # Download... request = service.files().get_media(fileId=file_id) suffix = f'.tmp.{socket.gethostname()}{os.getpid()}' with renamed_upon_completion(dest, suffix=suffix) as tmp: with open(str(tmp), 'wb') as dst: downloader = MediaIoBaseDownload(dst, request) done = False # TODO: would be nice to have _some_ logging indicator of dl speed while done is False: status, done = downloader.next_chunk() return dest
def Download(fileId, path): #check if file is folder results = service.files().get(fileId=fileId, fields="mimeType,trashed").execute() if results['mimeType'] == MIME_FOLDER: listing = getList(None, fileId) listing = itrerateFolder(listing) forFirstTime(listing, getFullPath(fileId) + "/") return True if results['trashed']: print "File is in trash. No need to download" return True #If it is a file then download it results = service.files().get_media(fileId=fileId) #Get file media fh = io.BytesIO() #Stream to write fetched data downloader = MediaIoBaseDownload(fh, results) done = False while done is False: status, done = downloader.next_chunk() #Set path if path: pathToFile = DOWNLOAD_PATH_PREFIX + path else: pathToFile = DOWNLOAD_PATH_PREFIX + getFullPath(fileId) #Create dirs if not os.path.exists(pathToFile[:pathToFile.rfind("/")]): os.makedirs(pathToFile[:pathToFile.rfind("/")]) #Save file try: with open(pathToFile, 'w') as saveFile: saveFile.write(fh.getvalue()) print "File downloaded:", pathToFile except IOError as e: print "Error occured:", e return True
def retrieve_content(self, context, bucket, path, transform=None, generation=None, **kwargs): """Retrieves the content at the specified path. Args: bucket: [string] The bucket to retrieve front. path: [string] The path to the content to retrieve from the bucket. generation: [long] Specifies version of object (or None for current). transform: [callable(string)] transform the downloaded bytes into something else (e.g. a JSON object). If None then the identity. Returns: transformed object. """ self.logger.info('Retrieving path=%s from bucket=%s [generation=%s]', path, bucket, generation) # Get Payload Data bucket = context.eval(bucket) path = context.eval(path) generation = context.eval(generation) request = self.service.objects().get_media(bucket=bucket, object=path, generation=generation, **kwargs) data = io.BytesIO() downloader = MediaIoBaseDownload(data, request, chunksize=1024 * 1024) done = False while not done: status, done = downloader.next_chunk() if status: self.logger.debug('Download %d%%', int(status.progress() * 100)) result = data.getvalue() return result if transform is None else transform(result)
def main(): """ main method """ credentials = get_credentials() http = credentials.authorize(httplib2.Http()) service = discovery.build('drive', 'v3', http=http) """ results = service.files().list( pageSize=10, fields="nextPageToken, files(id, name)").execute() items = results.get('files', []) if not items: print('No files found.') else: print('Files:') for item in items: print('{0} ({1})'.format(item['name'], item['id'])) """ items = getId(service, sys.argv[1]) if not items: print('No files found') exit() else: print('files:') for item in items: print('{0} ({1})'.format(item['name'], item['id'])) if len(items) > 1: print('First file selected') item = items[0] request = service.files().export_media(fileId=item['id'], mimeType='text/plain') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) fileText = fh.getvalue() file = open(item['name']+'.txt', 'wb') file.write(bytes(fileText)) file.close() """
def view_file(file_id): drive_api = build_drive_api_v3() metadata = drive_api.get(fields="name,mimeType", fileId=file_id).execute() request = drive_api.get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() fh.seek(0) return flask.send_file( fh, attachment_filename=metadata['name'], mimetype=metadata['mimeType'] )
def dl_file(file_id, file_name): """Downloads a non-Google app file from Google Drive. Args: file_id: ID of the file to download. file_name: name of the file to download. """ drive_service = initialize_drive_service() request = drive_service.files().get_media(fileId=file_id) #fh = io.BytesIO() fh = io.FileIO(file_name, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False log_file().info("Source Compiler.log", "Downloading " + file_name + " (ID: " + file_id + ").") print("Downloading " + file_name + " (ID: " + file_id + ").") while done is False: status, done = downloader.next_chunk() log_file().info("Source Compiler.log", "Download %d%%." % int(status.progress() * 100)) print("Download %d%%." % int(status.progress() * 100))
def download(self, drive_file_id=""): """ Download files from Google Drive in /tmp/. """ filename = "/tmp/:id.tmp".replace(":id", drive_file_id) credentials = self.get_credentials() http = credentials.authorize(httplib2.Http()) drive_service = discovery.build('drive', 'v3', http=http) request = drive_service.files().get_media(fileId=drive_file_id) fh_fd = io.FileIO(filename, 'wb') downloader = MediaIoBaseDownload(fh_fd, request) done = False try: while not done: _, done = downloader.next_chunk(num_retries=3) fh_fd.flush() os.fsync(fh_fd) return filename except HttpError: rollbar.report_message('Error: Unable to download the file', 'error') return None
def download_images(self, drive_file_id=""): """ Download images from Google Drive in /tmp/. """ hard_path = "/usr/src/app/app/documentator/images/" filename = hard_path + ":id.png".replace(":id", drive_file_id) credentials = self.get_credentials() http = credentials.authorize(httplib2.Http()) file_id = drive_file_id drive_service = discovery.build('drive', 'v3', http=http) request = drive_service.files().get_media(fileId=file_id) fh_fd = io.FileIO(filename, 'wb') downloader = MediaIoBaseDownload(fh_fd, request) done = False try: while not done: _, done = downloader.next_chunk(num_retries=3) fh_fd.flush() os.fsync(fh_fd) except HttpError: rollbar.report_message('Error: Unable to download the image', 'error')
def downloader(csv_location, file_name): # pour telecherger les repenses du google form file_id = '1gFwGMC0_0uzPK62fLsjumVGZXeSTmw1eyE69gtgrLzQ' store = file.Storage('token.json') creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets('credentials.json', SCOPES) creds = tools.run_flow(flow, store) drive_service = build('drive', 'v3', http=creds.authorize(Http())) request = drive_service.files().export_media(fileId=file_id, mimeType='text/csv') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) with io.open(csv_location + file_name, 'wb') as f: fh.seek(0) f.write(fh.read())
def drive_download(sa_secrets_file, file_id, file_name): try: credentials = service_account.Credentials.from_service_account_file( sa_secrets_file, scopes=DRIVE_SCOPES) drive_service = build('drive', 'v3', credentials=credentials) request = drive_service.files().get_media(fileId=file_id) fh = io.FileIO(file_name, "wb") downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() fh.close() return status.progress() * 100 except: raise
def download_file(self, file_id, file_name, full_path=None): if self.service == None: self.service = self.connect() if self.service == None: Logger.info('DriveClient: Connecting to drive failed') return None request = self.service.files().get_media(fileId=file_id) fn = file_name if (full_path is not None): fn = os.path.join(full_path, file_name) fh = io.FileIO(fn, mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() Logger.info("DriveClient: Download {}".format( int(status.progress() * 100))) return fn
def media_download(request, chunksize): data = BytesIO() media = MediaIoBaseDownload(data, request, chunksize=chunksize) retries = 0 done = False while not done: error = None try: progress, done = media.next_chunk() if progress: print 'Download %d%%' % int(progress.progress() * 100) data.seek(0) yield data data.seek(0) data.truncate(0) except HttpError, err: error = err if err.resp.status < 500: raise except (httplib2.HttpLib2Error, IOError), err: error = err