def download_from_google_drive(shareable_url: str,
                               file_name: str,
                               log: logging.Logger,
                               download_path: str = downloads) -> Tuple:
    """Downloads file from the shareable url.

  Downloads file from shareable url and saves it in downloads folder.

  Args:
    shareable_url: Url of the file.
    file_name: Filename for the downloaded file.
    log: Logger object for logging the status.
    download_path: Path (default: ./downloads/) for saving file.

  Returns:
    Boolean value if the file is downloaded or not.

  Raises:
    ResponseError: If any unexpected response/error occurs.
    ResponseNotChunked: If the response is not sending correct `chunks`.

  Notes:
    This function is capable of downloading files from Google Drive iff
    these files are shareable using 'Anyone with the link' link sharing
    option.
  """
    # You can find the reference code here:
    # https://stackoverflow.com/a/39225272
    try:
        file_id = shareable_url.split('https://drive.google.com/open?id=')[1]
        session = requests.Session()
        response = session.get(dev.DRIVE_DOWNLOAD_URL,
                               params={'id': file_id},
                               stream=True)
        token = fetch_confirm_token(response)
        if token:
            response = session.get(dev.DRIVE_DOWNLOAD_URL,
                                   params={
                                       'id': file_id,
                                       'confirm': token
                                   },
                                   stream=True)
        # Write file to the disk.
        with open(os.path.join(download_path, f'{file_name}.mp4'),
                  'wb') as file:
            for chunk in response.iter_content(dev.CHUNK_SIZE):
                if chunk:
                    file.write(chunk)
        log.info(f'File "{file_name}.mp4" downloaded from Google Drive.')
        if fz(os.path.join(download_path, f'{file_name}.mp4')).endswith('KB'):
            log.error('Unusable file downloaded since file size is in KBs.')
            return None, '[w] Unusable file downloaded.'
        return True, os.path.join(download_path, f'{file_name}.mp4')
    except (RequestError, RequestException):
        log.error(
            'File download from Google Drive failed because of poor network '
            'connectivity.')
        return None, '[e] Error while downloading file'
示例#2
0
def access_file_update(access_key: str,
                       secret_key: str,
                       s3_url: str,
                       file_name: str,
                       log: logging.Logger,
                       bucket_name: str = None) -> Tuple:
    """Access file from S3 bucket.

  Access and download file from S3 bucket.

  Args:
    access_key: AWS access key.
    secret_key: AWS saccess_key: str,
    s3_url: Public url for the file.
    log: Logger object for logging the status.
    bucket_name: Bucket to search and download from.

  Notes:
    This function ensures the file exists on the S3 bucket and then
    downloads the same. If the file doesn't exist on S3, it'll return
    None.
  """
    try:
        s3 = boto3.client('s3',
                          aws_access_key_id=access_key,
                          aws_secret_access_key=secret_key)
    except (ClientError, NoCredentialsError):
        log.error('Wrong credentials used to access the AWS account.')
        return None, '[e] Error while downloading file'
    else:
        [*status, bucket, file] = check_file(access_key, secret_key, s3_url,
                                             log, bucket_name)

        if status[0]:
            s3.download_file(bucket, file,
                             os.path.join(downloads, f'{file_name}.mp4'))
            log.info(
                f'File "{file_name}.mp4" downloaded from Amazon S3 storage.')

            if fz(os.path.join(downloads, f'{file_name}.mp4')).endswith('KB'):
                log.error(
                    'Unusable file downloaded since file size is in KBs.')
                return None, '[w] Unusable file downloaded.'

            return True, os.path.join(downloads, f'{file_name}.mp4')
        else:
            log.error(
                'File download from Amazon S3 failed because of poor network '
                'connectivity.')
            return None, '[e] Error while downloading file'
def download_from_azure(account_name: str,
                        account_key: str,
                        container_name: str,
                        blob_name: str,
                        file_name: str,
                        log: logging.Logger,
                        download_path: str = downloads) -> Tuple:
    """Download file from Microsoft Azure.

  Download file from Microsoft Azure and store it in downloads folder.

  Args:
    account_name: Azure account name.
    account_key: Azure account key.
    container_name: Container from which blob needs to be downloaded.
    blob_name: Blob to download from Microsoft Azure.
    file_name: Filename for the downloaded file.
    log: Logger object for logging the status.
    download_path: Path (default: ./downloads/) for saving file.

  Returns:
    Boolean value if the file is downloaded or not.
  """
    # You can find the reference code here:
    # https://pypi.org/project/azure-storage-blob/
    try:
        connection_string = generate_connection_string(account_name,
                                                       account_key)
        blob = BlobClient.from_connection_string(conn_str=connection_string,
                                                 container_name=container_name,
                                                 blob_name=blob_name)
        with open(os.path.join(download_path, f'{file_name}.mp4'),
                  'wb') as file:
            data = blob.download_blob()
            data.readinto(file)
        log.info(f'File "{file_name}.mp4" downloaded from Microsoft Azure.')
        if fz(os.path.join(download_path, f'{file_name}.mp4')).endswith('KB'):
            log.error('Unusable file downloaded since file size is in KBs.')
            return None, '[w] Unusable file downloaded.'
        return True, os.path.join(download_path, f'{file_name}.mp4')
    except Exception:
        log.error('File download from Microsoft Azure failed because of poor '
                  'network connectivity.')
        return None, '[e] Error while downloading file'
def download_using_ftp(username: str,
                       password: str,
                       public_address: str,
                       remote_file: str,
                       file_name: str,
                       log: logging.Logger,
                       download_path: str = downloads) -> Tuple:
    """Download/fetch/transfer file using OpenSSH via FTP.

  Fetch file from remote machine to store it in downloads folder.

  Args:
    username: Username of the remote machine.
    password: Password of the remote machine.
    public_address: Remote server IP address.
    remote_file: Remote file to be downloaded/transferred.
    file_name: Filename for the downloaded file.
    log: Logger object for logging the status.
    download_path: Path (default: ./downloads/) for saving file.

  Returns:
    Boolean value if the file is downloaded or not.
  """
    # You can find the reference code here:
    # https://stackoverflow.com/a/56850195
    try:
        os.system(f'sshpass -p {password} scp -o StrictHostKeyChecking=no '
                  f'{username}@{public_address}:{remote_file} {download_path}')
        log.info(f'File "{file_name}.mp4" transferred successfully')
        if fz(os.path.join(download_path, f'{file_name}.mp4')).endswith('KB'):
            log.error('Unusable file transferred since file size is in KBs.')
            return None, '[w] Unusable file transferred.'
        return True, os.path.join(download_path, f'{file_name}.mp4')
    except OSError:
        log.error('File transfer via FTP failed because of poor network '
                  'connectivity.')
        return None, '[e] Error while transferring file'
def batch_download_from_azure(account_name: str,
                              account_key: str,
                              container_name: str,
                              access_from: str,
                              access_to: str,
                              log: logging.Logger,
                              timestamp_format: str = '%Y-%m-%d %H:%M:%S',
                              download_path: str = downloads) -> List:
    """Download multiple files from Microsoft Azure.

  Download multiple files from Azure Blob container for particular
  timeframe.

  Args:
    account_name: Azure account name.
    account_key: Azure account key.
    container_name: Container from which blob needs to be downloaded.
    blob_name: Blob to download from Microsoft Azure.
    file_name: Filename for the downloaded file.
    log: Logger object for logging the status.
    download_path: Path (default: ./downloads/) for saving file.

  Returns:
    List of the directories which hosts the downloaded files.
  """
    _glob = []
    # You can find the reference code here:
    # https://pypi.org/project/azure-storage-blob/
    try:
        connection_string = generate_connection_string(account_name,
                                                       account_key)
        container = ContainerClient.from_connection_string(
            connection_string, container_name=container_name)
        limit_from = datetime.strptime(
            access_from, timestamp_format).replace(tzinfo=pytz.UTC)
        limit_till = datetime.strptime(
            access_to, timestamp_format).replace(tzinfo=pytz.UTC)
        container_dir = os.path.join(downloads, container_name)
        concate_dir = []
        files_with_timestamp = {}
        blobs_list = container.list_blobs()
        unsup_list = container.list_blobs()
        unsupported = [
            idx.name for idx in unsup_list
            if not (idx.name).endswith(video_file_extensions)
        ]
        unsupported = list(
            set(map(lambda x: os.path.splitext(x)[1], unsupported)))
        unsupported = [idx for idx in unsupported if idx is not '']
        if len(unsupported) > 1:
            log.info(f'Unsupported video formats like "{unsupported[0]}", '
                     f'"{unsupported[1]}", etc. will be skipped.')
        else:
            log.info(f'Files ending with "{unsupported[0]}" will be skipped.')
        for blob in blobs_list:
            if (blob.name).endswith(video_file_extensions):
                files_with_timestamp[blob.name] = blob.creation_time
        sorted_files = sorted(files_with_timestamp.items(),
                              key=lambda xa: xa[1])
        for file, timestamp in sorted_files:
            if timestamp > limit_from and timestamp < limit_till:
                blob_style_dir = os.path.join(container_dir,
                                              os.path.dirname(file))
                concate_dir.append(blob_style_dir)
                if not os.path.isdir(blob_style_dir):
                    os.makedirs(blob_style_dir)
                download_from_azure(account_name, account_key, container_name,
                                    file, os.path.basename(file[:-4]), log,
                                    blob_style_dir)
                _glob.append(
                    os.path.join(blob_style_dir, os.path.basename(file)))
        if len(concate_dir) > 0:
            sizes = [fz(s_idx) for s_idx in _glob]
            temp = [(n, s) for n, s in zip(_glob, sizes)]
            with open(os.path.join(container_dir, f'{container_name}.csv'),
                      'a',
                      encoding=dev.DEF_CHARSET) as csv_file:
                log.info('Logging downloaded files into a CSV file.')
                _file = csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL)
                _file.writerow(['Files', 'Size on disk'])
                _file.writerows(temp)
            return list(set(concate_dir))
        else:
            return []
    except Exception as e:
        log.exception(e)
        log.error('File download from Microsoft Azure failed because of poor '
                  'network connectivity.')
        return []
示例#6
0
def access_limited_files(access_key: str,
                         secret_key: str,
                         bucket_name: str,
                         access_from: str,
                         access_to: str,
                         log: logging.Logger,
                         timestamp_format: str = '%Y-%m-%d %H:%M:%S') -> List:
    """Access files from S3 bucket for particular timeframe.

  Access and download file from S3 bucket for particular timeframe.

  Args:
    access_key: AWS access key.
    secret_key: AWS saccess_key: str,
    bucket_name: Bucket to search and download from.
    access_from: Datetime from when to start fetching files.
    access_to: Datetime till when to fetch files.
    log: Logger object for logging the status.
    timestamp_format: Timestamp format (default: %Y-%m-%d %H:%M:%S)

  Returns:
    List of the directories which hosts the downloaded files.
  """
    _glob = []
    try:
        s3 = boto3.client('s3',
                          aws_access_key_id=access_key,
                          aws_secret_access_key=secret_key)
    except (ClientError, NoCredentialsError):
        log.error('Wrong credentials used to access the AWS account.')
        return []
    else:
        limit_from = datetime.strptime(
            access_from, timestamp_format).replace(tzinfo=pytz.UTC)
        limit_till = datetime.strptime(
            access_to, timestamp_format).replace(tzinfo=pytz.UTC)
        bucket_dir = os.path.join(downloads, bucket_name)
        concate_dir = []
        files_with_timestamp = {}

        all_files = s3.list_objects_v2(Bucket=bucket_name)
        unsupported = [
            idx['Key'] for idx in all_files['Contents']
            if not idx['Key'].endswith(video_file_extensions)
        ]
        unsupported = list(
            set(map(lambda x: os.path.splitext(x)[1], unsupported)))
        unsupported = [idx for idx in unsupported if idx is not '']

        if len(unsupported) > 1:
            log.info(f'Unsupported video formats like "{unsupported[0]}", '
                     f'"{unsupported[1]}", etc. will be skipped.')
        else:
            log.info(f'Files ending with "{unsupported[0]}" will be skipped.')

        for files in all_files['Contents']:
            if files['Key'].endswith(video_file_extensions):
                files_with_timestamp[files['Key']] = files['LastModified']

        sorted_files = sorted(files_with_timestamp.items(),
                              key=lambda xa: xa[1])

        for file, timestamp in sorted_files:
            if timestamp > limit_from and timestamp < limit_till:
                s3_style_dir = os.path.join(bucket_dir, os.path.dirname(file))
                concate_dir.append(s3_style_dir)
                if not os.path.isdir(s3_style_dir):
                    os.makedirs(s3_style_dir)
                s3.download_file(
                    bucket_name, file,
                    os.path.join(s3_style_dir, os.path.basename(file)))
                log.info(f'File "{file}" downloaded from Amazon S3.')
                _glob.append(os.path.join(s3_style_dir,
                                          os.path.basename(file)))

        if len(concate_dir) > 0:
            sizes = [fz(s_idx) for s_idx in _glob]
            temp = [(n, s) for n, s in zip(_glob, sizes)]
            with open(os.path.join(bucket_dir, f'{bucket_name}.csv'),
                      'a',
                      encoding=dev.DEF_CHARSET) as csv_file:
                log.info('Logging downloaded files into a CSV file.')
                _file = csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL)
                _file.writerow(['Files', 'Size on disk'])
                _file.writerows(temp)
            return list(set(concate_dir))

        else:
            return []