def download_from_google_drive(shareable_url: str, file_name: str, log: logging.Logger, download_path: str = downloads) -> Tuple: """Downloads file from the shareable url. Downloads file from shareable url and saves it in downloads folder. Args: shareable_url: Url of the file. file_name: Filename for the downloaded file. log: Logger object for logging the status. download_path: Path (default: ./downloads/) for saving file. Returns: Boolean value if the file is downloaded or not. Raises: ResponseError: If any unexpected response/error occurs. ResponseNotChunked: If the response is not sending correct `chunks`. Notes: This function is capable of downloading files from Google Drive iff these files are shareable using 'Anyone with the link' link sharing option. """ # You can find the reference code here: # https://stackoverflow.com/a/39225272 try: file_id = shareable_url.split('https://drive.google.com/open?id=')[1] session = requests.Session() response = session.get(dev.DRIVE_DOWNLOAD_URL, params={'id': file_id}, stream=True) token = fetch_confirm_token(response) if token: response = session.get(dev.DRIVE_DOWNLOAD_URL, params={ 'id': file_id, 'confirm': token }, stream=True) # Write file to the disk. with open(os.path.join(download_path, f'{file_name}.mp4'), 'wb') as file: for chunk in response.iter_content(dev.CHUNK_SIZE): if chunk: file.write(chunk) log.info(f'File "{file_name}.mp4" downloaded from Google Drive.') if fz(os.path.join(download_path, f'{file_name}.mp4')).endswith('KB'): log.error('Unusable file downloaded since file size is in KBs.') return None, '[w] Unusable file downloaded.' return True, os.path.join(download_path, f'{file_name}.mp4') except (RequestError, RequestException): log.error( 'File download from Google Drive failed because of poor network ' 'connectivity.') return None, '[e] Error while downloading file'
def access_file_update(access_key: str, secret_key: str, s3_url: str, file_name: str, log: logging.Logger, bucket_name: str = None) -> Tuple: """Access file from S3 bucket. Access and download file from S3 bucket. Args: access_key: AWS access key. secret_key: AWS saccess_key: str, s3_url: Public url for the file. log: Logger object for logging the status. bucket_name: Bucket to search and download from. Notes: This function ensures the file exists on the S3 bucket and then downloads the same. If the file doesn't exist on S3, it'll return None. """ try: s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key) except (ClientError, NoCredentialsError): log.error('Wrong credentials used to access the AWS account.') return None, '[e] Error while downloading file' else: [*status, bucket, file] = check_file(access_key, secret_key, s3_url, log, bucket_name) if status[0]: s3.download_file(bucket, file, os.path.join(downloads, f'{file_name}.mp4')) log.info( f'File "{file_name}.mp4" downloaded from Amazon S3 storage.') if fz(os.path.join(downloads, f'{file_name}.mp4')).endswith('KB'): log.error( 'Unusable file downloaded since file size is in KBs.') return None, '[w] Unusable file downloaded.' return True, os.path.join(downloads, f'{file_name}.mp4') else: log.error( 'File download from Amazon S3 failed because of poor network ' 'connectivity.') return None, '[e] Error while downloading file'
def download_from_azure(account_name: str, account_key: str, container_name: str, blob_name: str, file_name: str, log: logging.Logger, download_path: str = downloads) -> Tuple: """Download file from Microsoft Azure. Download file from Microsoft Azure and store it in downloads folder. Args: account_name: Azure account name. account_key: Azure account key. container_name: Container from which blob needs to be downloaded. blob_name: Blob to download from Microsoft Azure. file_name: Filename for the downloaded file. log: Logger object for logging the status. download_path: Path (default: ./downloads/) for saving file. Returns: Boolean value if the file is downloaded or not. """ # You can find the reference code here: # https://pypi.org/project/azure-storage-blob/ try: connection_string = generate_connection_string(account_name, account_key) blob = BlobClient.from_connection_string(conn_str=connection_string, container_name=container_name, blob_name=blob_name) with open(os.path.join(download_path, f'{file_name}.mp4'), 'wb') as file: data = blob.download_blob() data.readinto(file) log.info(f'File "{file_name}.mp4" downloaded from Microsoft Azure.') if fz(os.path.join(download_path, f'{file_name}.mp4')).endswith('KB'): log.error('Unusable file downloaded since file size is in KBs.') return None, '[w] Unusable file downloaded.' return True, os.path.join(download_path, f'{file_name}.mp4') except Exception: log.error('File download from Microsoft Azure failed because of poor ' 'network connectivity.') return None, '[e] Error while downloading file'
def download_using_ftp(username: str, password: str, public_address: str, remote_file: str, file_name: str, log: logging.Logger, download_path: str = downloads) -> Tuple: """Download/fetch/transfer file using OpenSSH via FTP. Fetch file from remote machine to store it in downloads folder. Args: username: Username of the remote machine. password: Password of the remote machine. public_address: Remote server IP address. remote_file: Remote file to be downloaded/transferred. file_name: Filename for the downloaded file. log: Logger object for logging the status. download_path: Path (default: ./downloads/) for saving file. Returns: Boolean value if the file is downloaded or not. """ # You can find the reference code here: # https://stackoverflow.com/a/56850195 try: os.system(f'sshpass -p {password} scp -o StrictHostKeyChecking=no ' f'{username}@{public_address}:{remote_file} {download_path}') log.info(f'File "{file_name}.mp4" transferred successfully') if fz(os.path.join(download_path, f'{file_name}.mp4')).endswith('KB'): log.error('Unusable file transferred since file size is in KBs.') return None, '[w] Unusable file transferred.' return True, os.path.join(download_path, f'{file_name}.mp4') except OSError: log.error('File transfer via FTP failed because of poor network ' 'connectivity.') return None, '[e] Error while transferring file'
def batch_download_from_azure(account_name: str, account_key: str, container_name: str, access_from: str, access_to: str, log: logging.Logger, timestamp_format: str = '%Y-%m-%d %H:%M:%S', download_path: str = downloads) -> List: """Download multiple files from Microsoft Azure. Download multiple files from Azure Blob container for particular timeframe. Args: account_name: Azure account name. account_key: Azure account key. container_name: Container from which blob needs to be downloaded. blob_name: Blob to download from Microsoft Azure. file_name: Filename for the downloaded file. log: Logger object for logging the status. download_path: Path (default: ./downloads/) for saving file. Returns: List of the directories which hosts the downloaded files. """ _glob = [] # You can find the reference code here: # https://pypi.org/project/azure-storage-blob/ try: connection_string = generate_connection_string(account_name, account_key) container = ContainerClient.from_connection_string( connection_string, container_name=container_name) limit_from = datetime.strptime( access_from, timestamp_format).replace(tzinfo=pytz.UTC) limit_till = datetime.strptime( access_to, timestamp_format).replace(tzinfo=pytz.UTC) container_dir = os.path.join(downloads, container_name) concate_dir = [] files_with_timestamp = {} blobs_list = container.list_blobs() unsup_list = container.list_blobs() unsupported = [ idx.name for idx in unsup_list if not (idx.name).endswith(video_file_extensions) ] unsupported = list( set(map(lambda x: os.path.splitext(x)[1], unsupported))) unsupported = [idx for idx in unsupported if idx is not ''] if len(unsupported) > 1: log.info(f'Unsupported video formats like "{unsupported[0]}", ' f'"{unsupported[1]}", etc. will be skipped.') else: log.info(f'Files ending with "{unsupported[0]}" will be skipped.') for blob in blobs_list: if (blob.name).endswith(video_file_extensions): files_with_timestamp[blob.name] = blob.creation_time sorted_files = sorted(files_with_timestamp.items(), key=lambda xa: xa[1]) for file, timestamp in sorted_files: if timestamp > limit_from and timestamp < limit_till: blob_style_dir = os.path.join(container_dir, os.path.dirname(file)) concate_dir.append(blob_style_dir) if not os.path.isdir(blob_style_dir): os.makedirs(blob_style_dir) download_from_azure(account_name, account_key, container_name, file, os.path.basename(file[:-4]), log, blob_style_dir) _glob.append( os.path.join(blob_style_dir, os.path.basename(file))) if len(concate_dir) > 0: sizes = [fz(s_idx) for s_idx in _glob] temp = [(n, s) for n, s in zip(_glob, sizes)] with open(os.path.join(container_dir, f'{container_name}.csv'), 'a', encoding=dev.DEF_CHARSET) as csv_file: log.info('Logging downloaded files into a CSV file.') _file = csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL) _file.writerow(['Files', 'Size on disk']) _file.writerows(temp) return list(set(concate_dir)) else: return [] except Exception as e: log.exception(e) log.error('File download from Microsoft Azure failed because of poor ' 'network connectivity.') return []
def access_limited_files(access_key: str, secret_key: str, bucket_name: str, access_from: str, access_to: str, log: logging.Logger, timestamp_format: str = '%Y-%m-%d %H:%M:%S') -> List: """Access files from S3 bucket for particular timeframe. Access and download file from S3 bucket for particular timeframe. Args: access_key: AWS access key. secret_key: AWS saccess_key: str, bucket_name: Bucket to search and download from. access_from: Datetime from when to start fetching files. access_to: Datetime till when to fetch files. log: Logger object for logging the status. timestamp_format: Timestamp format (default: %Y-%m-%d %H:%M:%S) Returns: List of the directories which hosts the downloaded files. """ _glob = [] try: s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key) except (ClientError, NoCredentialsError): log.error('Wrong credentials used to access the AWS account.') return [] else: limit_from = datetime.strptime( access_from, timestamp_format).replace(tzinfo=pytz.UTC) limit_till = datetime.strptime( access_to, timestamp_format).replace(tzinfo=pytz.UTC) bucket_dir = os.path.join(downloads, bucket_name) concate_dir = [] files_with_timestamp = {} all_files = s3.list_objects_v2(Bucket=bucket_name) unsupported = [ idx['Key'] for idx in all_files['Contents'] if not idx['Key'].endswith(video_file_extensions) ] unsupported = list( set(map(lambda x: os.path.splitext(x)[1], unsupported))) unsupported = [idx for idx in unsupported if idx is not ''] if len(unsupported) > 1: log.info(f'Unsupported video formats like "{unsupported[0]}", ' f'"{unsupported[1]}", etc. will be skipped.') else: log.info(f'Files ending with "{unsupported[0]}" will be skipped.') for files in all_files['Contents']: if files['Key'].endswith(video_file_extensions): files_with_timestamp[files['Key']] = files['LastModified'] sorted_files = sorted(files_with_timestamp.items(), key=lambda xa: xa[1]) for file, timestamp in sorted_files: if timestamp > limit_from and timestamp < limit_till: s3_style_dir = os.path.join(bucket_dir, os.path.dirname(file)) concate_dir.append(s3_style_dir) if not os.path.isdir(s3_style_dir): os.makedirs(s3_style_dir) s3.download_file( bucket_name, file, os.path.join(s3_style_dir, os.path.basename(file))) log.info(f'File "{file}" downloaded from Amazon S3.') _glob.append(os.path.join(s3_style_dir, os.path.basename(file))) if len(concate_dir) > 0: sizes = [fz(s_idx) for s_idx in _glob] temp = [(n, s) for n, s in zip(_glob, sizes)] with open(os.path.join(bucket_dir, f'{bucket_name}.csv'), 'a', encoding=dev.DEF_CHARSET) as csv_file: log.info('Logging downloaded files into a CSV file.') _file = csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL) _file.writerow(['Files', 'Size on disk']) _file.writerows(temp) return list(set(concate_dir)) else: return []