Пример #1
0
    def get(self, filepath_id):
        fid = int(filepath_id)

        if not validate_filepath_access_by_user(self.current_user, fid):
            raise HTTPError(
                403, "%s doesn't have access to "
                "filepath_id: %s" % (self.current_user.email, str(fid)))

        relpath = filepath_id_to_rel_path(fid)
        fp_info = get_filepath_information(fid)
        fname = basename(relpath)

        if fp_info['filepath_type'] in ('directory', 'html_summary_dir'):
            # This is a directory, we need to list all the files so NGINX
            # can download all of them
            to_download = self._list_dir_files_nginx(fp_info['fullpath'])
            self._write_nginx_file_list(to_download)
            fname = '%s.zip' % fname
        else:
            self._write_nginx_placeholder_file(relpath)
            self.set_header('Content-Type', 'application/octet-stream')
            self.set_header('Content-Transfer-Encoding', 'binary')
            self.set_header('X-Accel-Redirect', '/protected/' + relpath)

        self._set_nginx_headers(fname)
        self.finish()
Пример #2
0
    def get(self, filepath_id):
        fid = int(filepath_id)

        if not validate_filepath_access_by_user(self.current_user, fid):
            raise HTTPError(
                403, "%s doesn't have access to "
                "filepath_id: %s" % (self.current_user.email, str(fid)))

        relpath = filepath_id_to_rel_path(fid)
        fp_info = get_filepath_information(fid)
        fname = basename(relpath)

        if fp_info['filepath_type'] in ('directory', 'html_summary_dir'):
            # This is a directory, we need to list all the files so NGINX
            # can download all of them
            to_download = self._list_dir_files_nginx(fp_info['fullpath'])
            self._write_nginx_file_list(to_download)
            fname = '%s.zip' % fname
        else:
            self._write_nginx_placeholder_file(relpath)
            self.set_header('Content-Type', 'application/octet-stream')
            self.set_header('Content-Transfer-Encoding', 'binary')
            self.set_header('X-Accel-Redirect', '/protected/' + relpath)
            aid = filepath_id_to_object_id(fid)
            if aid is not None:
                fname = '%d_%s' % (aid, fname)

        self._set_nginx_headers(fname)
        self.finish()
Пример #3
0
    try:
        size = getsize(finfo['fullpath'])
    except (FileNotFoundError, PermissionError):
        return finfo, None, None

    checksum = compute_checksum(finfo['fullpath'])

    return finfo['filepath_id'], checksum, size


# get all filepaths and their filepath information; takes ~10 min
with TRN:
    TRN.add("SELECT filepath_id FROM qiita.filepath")
    files = []
    for fid in TRN.execute_fetchflatten():
        files.append(get_filepath_information(fid))

# just get the filepath ids that haven't been processed, the file format
# of this file is filepath_id[tab]checksum[tab]filesize
fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv')
processed = []
if exists(fpath):
    with open(fpath, 'r') as f:
        processed = [
            int(line.split('\t')[0]) for line in f.read().split('\n')
            if line != ''
        ]
files_curr = [f for f in files if f['filepath_id'] not in processed]

# let's use 20 processor and in each iteration use 120 files
fids = 120
Пример #4
0
    fids = TRN.execute_fetchflatten()

fpath = join(dirname(abspath(__file__)), 'support_files', 'patches',
             'python_patches', '74.py.cache.tsv')
cache = dict()
if exists(fpath):
    df = pd.read_csv(fpath,
                     sep='\t',
                     index_col=0,
                     dtype=str,
                     names=['filepath_id', 'checksum', 'fp_size'])
    cache = df.to_dict('index')

for fid in fids:
    if fid not in cache:
        finfo = get_filepath_information(fid)
        try:
            size = getsize(finfo['fullpath'])
        except FileNotFoundError:
            size = 0

        try:
            checksum = compute_checksum(finfo['fullpath'])
        except FileNotFoundError:
            checksum = ''
    else:
        checksum = cache[fid]['checksum']
        size = cache[fid]['fp_size']

    with TRN:
        sql = """UPDATE qiita.filepath
Пример #5
0
             FROM qiita.filepath"""
    TRN.add(sql)
    fids = TRN.execute_fetchflatten()


fpath = join(dirname(abspath(__file__)), 'support_files', 'patches',
             'python_patches', '74.py.cache.tsv')
cache = dict()
if exists(fpath):
    df = pd.read_csv(fpath, sep='\t', index_col=0, dtype=str,
                     names=['filepath_id', 'checksum', 'fp_size'])
    cache = df.to_dict('index')

for fid in fids:
    if fid not in cache:
        finfo = get_filepath_information(fid)
        try:
            size = getsize(finfo['fullpath'])
        except FileNotFoundError:
            size = 0

        try:
            checksum = compute_checksum(finfo['fullpath'])
        except FileNotFoundError:
            checksum = ''
    else:
        checksum = cache[fid]['checksum']
        size = cache[fid]['fp_size']

    with TRN:
        sql = """UPDATE qiita.filepath
Пример #6
0
    try:
        size = getsize(finfo['fullpath'])
    except (FileNotFoundError, PermissionError):
        return finfo, None, None

    checksum = compute_checksum(finfo['fullpath'])

    return finfo['filepath_id'], checksum, size


# get all filepaths and their filepath information; takes ~10 min
with TRN:
    TRN.add("SELECT filepath_id FROM qiita.filepath")
    files = []
    for fid in TRN.execute_fetchflatten():
        files.append(get_filepath_information(fid))


# just get the filepath ids that haven't been processed, the file format
# of this file is filepath_id[tab]checksum[tab]filesize
fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv')
processed = []
if exists(fpath):
    with open(fpath, 'r') as f:
        processed = [int(line.split('\t')[0])
                     for line in f.read().split('\n') if line != '']
files_curr = [f for f in files if f['filepath_id'] not in processed]

# let's use 20 processor and in each iteration use 120 files
fids = 120
processors = 20