示例#1
0
 def merge_part_files(self, file_dir, left_file, right_file, new_file):
     '''file_dir: 分片文件所在目录  
        left_file: 前段分片
        right_file: 后段分片
        new_file: 合并后的新分片文件
     '''
     try:
         absolute_new_file = os.path.join(file_dir, new_file)
         absolute_left_file = os.path.join(file_dir, left_file)
         absolute_right_file = os.path.join(file_dir, right_file)
         with open(absolute_left_file, mode='ab') as lf:
             with open(absolute_right_file, mode='rb') as rf:
                 while True:
                     buf = rf.read(1 * 1024 * 1024)
                     if len(buf) == 0:
                         break
                     lf.write(buf)
                     lf.flush()
         Utils.try_remove(absolute_new_file)
         shutil.move(absolute_left_file, absolute_new_file)
         self.aios_print('文件合并完成:', f'[{left_file}]', f'[{right_file}]')
         file_size = os.path.getsize(absolute_new_file)
         self.aios_print('新文件名称:', f'[{new_file}]', file_size, 'block:',
                         int(file_size / (5 * 1024 * 1024)))
     finally:
         if os.path.exists(absolute_right_file):
             os.rename(absolute_right_file,
                       absolute_right_file + '.deleted')
示例#2
0
    def single_file_handler(self, single_chunk_files):
        '''单分片文件处理方法
        '''
        succ_list = []
        err_list = []
        wait_add_tasks = []
        wait_update_tasks = []
        taskdao = TaskDAO()

        tenant_id = g.tenant_id if hasattr(g, 'tenant_id') else 0
        batches = _.uniq(
            [Utils.get_batch(i.get('dir_path')) for i in single_chunk_files])
        task_ids = [i.get('file_key') for i in single_chunk_files]
        exist_tasks = taskdao.get_tasks(tenant_id, batches, task_ids)
        exist_tasks_map = {}
        for exist_task in exist_tasks:
            exist_tasks_map[exist_task.task_id] = exist_task.id

        for single_chunk_file in single_chunk_files:
            try:
                file_key = single_chunk_file.get('file_key')
                dir_path = single_chunk_file.get('dir_path')
                file_name = single_chunk_file.get('file_name')
                tenant_id = single_chunk_file.get('tenant_id')
                user_id = single_chunk_file.get('user_id')

                merged_file = os.path.join(dir_path, file_name)
                final_merged_file = os.path.join(dir_path, f'{file_key}.1')
                shutil.move(final_merged_file, merged_file)

                task_json = {
                    'created_by': user_id,
                    'tenant_id': tenant_id,
                    'task_id': file_key,
                    'chunks': '1',
                    'status': TASK_STATUS_MERGED,
                    'size': os.path.getsize(merged_file),
                    'link': {
                        'host': Utils.get_host(dir_path)
                    },
                    'batch': Utils.get_batch(dir_path)
                }
                if exist_tasks_map.get(file_key):
                    task_json['id'] = exist_tasks_map.get(file_key)
                    wait_update_tasks.append(task_json)
                else:
                    wait_add_tasks.append(task_json)
                succ_list.append({'file_key': file_key, 'curr_chunk': 1})
            except Exception as err:
                err_list.append({'file_key': file_key, 'curr_chunk': 1})

        if len(wait_add_tasks):
            taskdao.bulk_add(wait_add_tasks)
        if len(wait_update_tasks):
            taskdao.bulk_update(wait_update_tasks)
        return succ_list, err_list
示例#3
0
    def get_standard_sub_dir(self, sub_dir):
        '''
            #### 获取标准的容器内部文件挂载的子目录,不存在则创建
        '''
        sub_dir = sub_dir.strip(os.path.sep).replace('/', os.path.sep)
        # 在容器内部可以直接访问的子目录的绝对路径
        absolute_dir_path = os.path.join(Utils.mount_point(),
                                         sub_dir.strip(os.path.sep))
        Utils.try_makedirs(absolute_dir_path)

        return absolute_dir_path
示例#4
0
    def start_thread_save_db(self, msg_list):
        taskdao = TaskDAO()

        # 多线程将缓存保存到数据库
        def start_thread(x):
            batch = Utils.get_batch(x)
            thread_key = taskdao.thread_key.format(batch)
            if not self.aios_redis.get(thread_key):
                threading.Thread(target=taskdao.save_to_db,
                                 args=(g.user_id, g.tenant_id, batch),
                                 daemon=True).start()
                self.aios_print('启动定时调度线程', batch)
                self.aios_redis.set(thread_key, 'Running',
                                    taskdao.THREAD_KEY_TIMEOUT)
            if self.aios_redis.ttl(thread_key) < 10:
                threading.Thread(target=taskdao.save_to_db,
                                 args=(g.user_id, g.tenant_id, batch),
                                 daemon=True).start()
                self.aios_print('线程已存在,失效时间快到了,重新启动定时调度线程', batch)
                self.aios_redis.set(thread_key, 'Running',
                                    taskdao.THREAD_KEY_TIMEOUT)
            else:
                self.aios_print('线程已存在,不需要开启', batch)

        # 启动多个线程
        _.chain(msg_list). \
            map_(lambda x: Utils.get_host(x.get('dir_path'))). \
            uniq(). \
            for_each(start_thread). \
            value()
示例#5
0
def delete(sub_path):
    """
    @@@
    #### 文件删除
        in: path
        参数: {
                # 文件在NFS文件存储区的存放目录,可以以<挂载点>/<sub_path>找到
                "sub_path": '<模块名>/<子目录>/.../<文件名>' 
                }
        返回值: 204 | 200
    @@@
    """
    try:
        if not sub_path or not os.path.basename(sub_path):
            return bad_request('sub_path')

        store_path = os.path.join(Utils.mount_point(), sub_path)

        if os.path.exists(store_path):
            os.remove(store_path)
            if not os.listdir(os.path.dirname(store_path)):
                os.rmdir(os.path.dirname(store_path))
            if not os.listdir(os.path.dirname(os.path.dirname(store_path))):
                os.rmdir(os.path.dirname(os.path.dirname(store_path)))

            return standard_response(None, 204)
        else:
            return standard_response(None)
    except Exception as err:
        return standard_expection(str(err))
示例#6
0
def simple_multi_files_upload():
    """简单文件批量上传,建议文件大小尽量不要超过20MB
    """
    try:
        if len(request.files) == 0:
            return standard_expection('没有任何文件')
        link_dict_list = []
        wait_add_records = []
        user_id = g.user_id if hasattr(g, 'user_id') else 0
        for file_key in request.files:
            file = request.files[file_key]
            [(file_name, file_extension)] = re.findall('(\S+)\.(\S+)$',
                                                       file.filename)

            full_dir_path = os.path.join(Utils.mount_point(), str(user_id))
            if not os.path.exists(full_dir_path):
                os.makedirs(full_dir_path)

            task_id = str(uuid.uuid4())
            file_name = '{}.{}'.format(task_id, file_extension)

            save_path = os.path.join(full_dir_path, file_name)
            file.save(save_path)

            host = save_path.replace(current_app.config['FILE_STORAGE_DIR'],
                                     '', 1)

            http = '{}/{}'.format(current_app.config['FILE_STORAGE_HOST'],
                                  host)

            link_dict = {'http': http.replace('\\', '/'), 'host': host}

            wait_add_records.append({
                'task_id':
                task_id,
                'chunks':
                -1,
                'status':
                TASK_STATUS_NONE,
                'size':
                os.path.getsize(save_path),
                'link':
                link_dict,
                'created_by':
                g.user_id if hasattr(g, 'user_id') else 0,
                'tenant_id':
                g.tenant_id if hasattr(g, 'tenant_id') else 0
            })
            link_dict_list.append(link_dict)

        if len(wait_add_records):
            TaskModel.bulk_insert_mappings(wait_add_records)
        return standard_response(link_dict_list, 201)

    except Exception as err:
        return standard_expection(str(err))
示例#7
0
    def notify_thread_stop(self, msg_list):
        taskdao = TaskDAO()

        def stop_thread(x):
            batch = Utils.get_batch(x)
            thread_key = taskdao.thread_key.format(batch)
            self.aios_redis.set(thread_key, 'Stop')

        _.chain(msg_list). \
            map_(lambda x: Utils.get_host(x.get('dir_path'))). \
            uniq(). \
            for_each(stop_thread). \
            value()
示例#8
0
 def start_thread(x):
     batch = Utils.get_batch(x)
     thread_key = taskdao.thread_key.format(batch)
     if not self.aios_redis.get(thread_key):
         threading.Thread(target=taskdao.save_to_db,
                          args=(g.user_id, g.tenant_id, batch),
                          daemon=True).start()
         self.aios_print('启动定时调度线程', batch)
         self.aios_redis.set(thread_key, 'Running',
                             taskdao.THREAD_KEY_TIMEOUT)
     if self.aios_redis.ttl(thread_key) < 10:
         threading.Thread(target=taskdao.save_to_db,
                          args=(g.user_id, g.tenant_id, batch),
                          daemon=True).start()
         self.aios_print('线程已存在,失效时间快到了,重新启动定时调度线程', batch)
         self.aios_redis.set(thread_key, 'Running',
                             taskdao.THREAD_KEY_TIMEOUT)
     else:
         self.aios_print('线程已存在,不需要开启', batch)
示例#9
0
def simple_file_upload():
    """
    @@@
    #### 文件分片上传接口
        in: body
        参数: {
                "file": 文件对象

                # 文件在NFS文件存储区的存放目录,可以以<挂载点>/<sub_dir>找到
                "sub_dir": '<数据源id>/<数据集id>/<虚拟目录id>/<虚拟目录id>/...' 
                }
        返回值: {
                "data": 'ok'
                }, 201
    @@@
    """
    try:
        file = request.files['file']

        arr = os.path.splitext(file.filename)
        file_extension = arr[len(arr) - 1]

        if not file:
            return bad_request('file')

        user_id = g.user_id if hasattr(g, 'user_id') else 0

        full_dir_path = os.path.join(Utils.mount_point(), str(user_id))
        if not os.path.exists(full_dir_path):
            os.makedirs(full_dir_path)

        file_name = '{}{}'.format(str(uuid.uuid4()), file_extension)
        file.save(os.path.join(full_dir_path, file_name))

        host = os.path.join(full_dir_path, file_name)

        http = '{}/{}'.format(current_app.config['FILE_STORAGE_HOST'], host)

        link_dict = {'http': http.replace('\\', '/'), 'host': host}
        return standard_response(link_dict, 201)

    except Exception as err:
        return standard_expection(str(err))
示例#10
0
 def stop_thread(x):
     batch = Utils.get_batch(x)
     thread_key = taskdao.thread_key.format(batch)
     self.aios_redis.set(thread_key, 'Stop')
示例#11
0
    def porter_running(self,
                       task_id,
                       total_chunk,
                       part_file,
                       merged_file,
                       tenant_id,
                       user_id,
                       timeout=60):
        try:
            from run import app as inner_app
            from app import aios_redis

            with inner_app.app_context():
                file_cache = FileCache(task_id)
                chunk = 1
                start = time.time()
                task = self.get_task(tenant_id, task_id)
                if task is None:
                    args = {
                        'task_id': task_id,
                        'tenant_id': tenant_id,
                        'created_by': user_id
                    }
                    task = TaskModel(**args)
                task.status = TASK_STATUS_MERGING
                task.save()

                wait_remove_files = []
                with open(merged_file, mode='ab+') as dst_f:
                    print(datetime.datetime.now(), f"{task_id} start while")
                    while chunk <= total_chunk and time.time(
                    ) - start <= timeout:
                        # 刷新等待时间
                        start = time.time()
                        src_part_file = f'{part_file}{chunk}'
                        # 不能根据文件是否存在来检测是否可以写入,因为并发时文件存在但是正在写入硬盘中,可能内容没写完整
                        # 在src_part_file文件save之后,标记分片状态,此处可以实时检查标记状态
                        # print(datetime.datetime.now(), f"get redis chunk. {task_id} get chunk: {chunk}",
                        #       {file_cache.get_chunk_status(chunk)})
                        if file_cache.get_chunk_status(chunk):
                            with open(src_part_file, mode='rb') as src_f:
                                src_f_binary = src_f.read()
                                dst_f.write(src_f_binary)
                            # 数据库中记录分片序号
                            task.chunks = '{},{}'.format(task.chunks,
                                                         chunk).strip(',')
                            task.save()

                            file_cache.inc_counter()
                            chunk += 1
                            # 删除分片文件
                            wait_remove_files.append(src_part_file)
                        else:
                            time.sleep(0.5)

                    print(datetime.datetime.now(), "chunk:", chunk,
                          "total_chunk:", total_chunk, "end while")

                host = merged_file.replace(
                    current_app.config['FILE_STORAGE_DIR'],
                    '').replace(os.path.sep, '/')
                http = f'{current_app.config["FILE_STORAGE_HOST"]}/{host}'
                task.link = {'http': http.replace('\\', '/'), 'host': host}
                task.size = os.path.getsize(merged_file)
                task.status = TASK_STATUS_MERGED
                task.save()
                # 通知主进程文件合并进度
                file_cache.set_task_completed(task.link)
                print('⭐porter_running⭐', 'finished', task.link)

                if len(wait_remove_files):
                    for wait_remove_file in wait_remove_files:
                        Utils.try_remove(wait_remove_file)
        except Exception as e:
            print(e)
示例#12
0
    def multi_file_handler(self, msg):
        '''子进程处理单个文件的写入
        '''
        try:
            # 上传进程内部加载sqlalchemy时需要控制每个进程的数据库连接池大小
            os.environ['SQLALCHEMY_POOL_SIZE'] = '1'
            from run import app as inner_app
            from app import aios_redis

            with inner_app.app_context():
                self.aios_redis = aios_redis

                file_key = msg.get('file_key')
                dir_path = msg.get('dir_path')
                file_name = msg.get('file_name')
                curr_chunk = msg.get('curr_chunk')
                total_chunks = msg.get('total_chunks')
                tenant_id = msg.get('tenant_id')
                user_id = msg.get('user_id')
                cache_expired_time = msg.get('cache_expired_time')

                wait_lock = f'plus_uploader:lock:{file_key}'
                while not self.aios_redis.setnx(wait_lock,
                                                f'lock.{curr_chunk}'):
                    # self.aios_print(file_key, curr_chunk, 'task waiting...')
                    time.sleep(0.001)
                else:
                    self.lock = f'{wait_lock} by {curr_chunk}'
                    self.aios_print(file_key, curr_chunk,
                                    f'current lock: lock.{curr_chunk}')
                    # 锁独占状态
                    # wait_lock超时时间 = 已存在但未合并的分片个数 * 单个分片预估的失效时间
                    parts = len(
                        _.filter_(
                            os.listdir(dir_path),
                            lambda x: '-' not in x and '.deleted' not in x))
                    count = max(parts, 1)
                    self.aios_print(f'文件存放目录{dir_path}, 实时已存在的分片数: {parts}')
                    self.aios_redis.expire(wait_lock,
                                           cache_expired_time * count)
                    # self.aios_print(f'修正的lock超时时间, {cache_expired_time * count}')
                    # 更新任务状态
                    args = {
                        'task_id': file_key,
                        'tenant_id': tenant_id,
                        'created_by': user_id,
                        'updated_by': user_id,
                        'batch': Utils.get_batch(dir_path),
                        'status': TASK_STATUS_NONE,
                        'chunks': '',
                        'size': 0,
                        'link': {
                            'host': Utils.get_host(dir_path)
                        }
                    }
                    taskdao = TaskDAO()
                    # task_json = taskdao.get_task_from_cache(tenant_id, file_key)
                    task_json = taskdao.get_task(tenant_id,
                                                 file_key,
                                                 json=True)
                    if task_json is None:
                        task_json = taskdao.add(args)

                    if task_json['status'] != TASK_STATUS_MERGED:
                        task_json['status'] = TASK_STATUS_MERGING
                        # taskdao.update(task_json)
                        TaskModel.bulk_update_mappings([task_json])

                        # 合并分片
                        # 合并后的完整文件路径
                        merged_file = os.path.join(dir_path, file_name)

                        merge_process = self.partation_merge(
                            dir_path, file_key, total_chunks)
                        merge_process = _.map_(
                            merge_process,
                            lambda x: x.replace(f'{file_key}.', ''))
                        except_complete_name = f'1-{total_chunks}' if total_chunks > 1 else '1'
                        # 检测是否完整 ['1-701'] except_complete_name 1-701
                        self.aios_print('检测是否完整', merge_process,
                                        'except_complete_name',
                                        except_complete_name)
                        if except_complete_name in merge_process:
                            for f in os.listdir(dir_path):
                                self.aios_print('比较文件名', f,
                                                except_complete_name)
                                if f.startswith(f'{file_key}.') and (
                                        not f.endswith(except_complete_name)):
                                    Utils.try_remove(os.path.join(dir_path, f))
                                    self.aios_print('删除残留异常文件', f)
                            # 修改合并后的文件名称
                            final_merged_file = os.path.join(
                                dir_path, f'{file_key}.{except_complete_name}')
                            shutil.move(final_merged_file, merged_file)
                            self.aios_print('文件改名', final_merged_file, '>>',
                                            merged_file)
                            # 记录任务状态
                            task_json['chunks'] = ','.join(
                                [str(i + 1) for i in range(total_chunks)])
                            task_json['status'] = TASK_STATUS_MERGED
                            task_json['size'] = os.path.getsize(merged_file)

                            task_json['link'] = {
                                'host': Utils.get_host(dir_path)
                            }

                            # 合并完成
                            self.aios_print(f'{curr_chunk}.合并完成')
                        else:
                            # ['1', '3-5', '10']
                            covert_process = []
                            for section in merge_process:
                                if '-' in section:
                                    [left, right] = section.split('-')
                                    covert_process.extend([
                                        str(i)
                                        for i in range(int(left),
                                                       int(right) + 1)
                                    ])
                                else:
                                    covert_process.append(section)

                            task_json['chunks'] = ','.join(covert_process)
                            # 不完整
                            self.aios_print(f'{curr_chunk}.不完整,继续等待')
                        # 保存到缓存
                        # taskdao.update(task_json)
                        self.aios_print(f'更新状态, {task_json}')
                        TaskModel.bulk_update_mappings([task_json])
                    else:
                        # 已经合并完成后,不需要再次合并,直接退出
                        self.aios_print(f'{curr_chunk}.已经合并完成后, 不需要再次合并')
                    # 释放锁
                    self.aios_redis.delete(wait_lock)
                    self.aios_print(f'结束.{curr_chunk}/{total_chunks}')

                    # 清理被标记的可删除分片
                    for file in os.listdir(dir_path):
                        if file.endswith('.deleted'):
                            self.aios_print('清理被标记的可删除分片', file)
                            Utils.try_remove(os.path.join(dir_path, file))

                return {'file_key': file_key, 'curr_chunk': curr_chunk}, None
        except Exception as err:
            import traceback
            traceback.print_exc()
            print('multi_file_handler', err)
            return None, {
                'file_key': msg['file_key'],
                'curr_chunk': msg['curr_chunk']
            }
示例#13
0
def file_upload_async():
    print("start", request.form)
    task_id = request.form.get('task_id')
    file_cache = FileCache(task_id)
    key = None
    fileHandler = FileHandler()

    file = request.files['file']
    sub_dir = request.form.get('sub_dir') or "/upload"
    # chunkNumber从1开始
    chunk = request.form.get('chunkNumber', type=int, default=1)
    totalChunks = request.form.get('totalChunks', type=int)

    if not file:
        return bad_request('file')
    if not sub_dir:
        return bad_request('sub_dir')
    if not task_id:
        return bad_request('task_id')
    if not totalChunks:
        return bad_request('totalChunks')

    try:
        user_id = g.user_id if hasattr(g, 'user_id') else 0
        absolute_dir_path = os.path.join(Utils.get_upload_path(), str(user_id),
                                         str(task_id))
        Utils.try_makedirs(absolute_dir_path)

        if file_cache.get_chunk_status(chunk):
            return standard_expection(
                f'当前文件{task_id}的第{chunk}个分片已经上传, 请勿重复操作!')

        tenant_id = g.tenant_id if hasattr(g, 'tenant_id') else 0
        exist_task = TaskModel.query.filter(
            TaskModel.tenant_id == tenant_id,
            TaskModel.task_id == task_id).first()
        if exist_task and exist_task.status == TASK_STATUS_MERGED:
            return standard_expection(f'当前文件{task_id}已经上传完成, 请勿重复操作!')

        file.save(os.path.join(absolute_dir_path, f'{task_id}{chunk}'))
        file_cache.set_chunk_status(chunk)

        key = task_id
        fileHandler.log_print(key, chunk, f'{chunk}/{totalChunks}')
        # 第一个分片时开启后台线程监控分片状态,并行合并
        if chunk == 1:
            target_filename = request.form.get('filename')
            merged_file = os.path.join(absolute_dir_path, target_filename)
            part_file = os.path.join(absolute_dir_path, key)

            args = (key, totalChunks, part_file, merged_file, tenant_id,
                    user_id)
            threading.Thread(target=fileHandler.porter_running,
                             args=args,
                             daemon=True).start()

        while not file_cache.lock():
            # print(datetime.datetime.now(), f"{task_id} set lock waiting chunk {chunk}")
            fileHandler.log_print(key, chunk, 'task waiting')
            time.sleep(0.2)
        else:
            # print(datetime.datetime.now(), f"{task_id} set lock success chunk {chunk}")
            # ============================独占状态============================
            # 初始化需要被处理的分片序号,逐一pick掉,在最后一片时轮询检查后台线程合并状态
            file_cache.ready_chunks(totalChunks)
            # 从chunks列表移除当前分片序号
            chunks_values = file_cache.get_ready_chunks()
            _.remove(chunks_values, lambda x: x == chunk)
            file_cache.set_ready_chunks(chunks_values)
            # 释放文件task锁,以便中间分片能及时响应客户端
            file_cache.release_lock()
            # ============================独占状态 END============================
            # 最后一个分片时
            if len(chunks_values) == 0:
                task = None
                # 轮询检查分片是否完整
                is_completed = False
                start = time.time()
                while is_completed is False and time.time(
                ) - start <= current_app.config['REDIS_CACHE_EXPIRE_FILE']:
                    time.sleep(0.5)
                    counter = file_cache.get_counter()
                    if counter is None:
                        # 有可能此时后台合并线程还没有来得及创建counter_key
                        continue
                    if isinstance(counter, bytes):
                        counter = int(counter)
                    print(
                        datetime.datetime.now(),
                        f'#### counter {counter}, total chunks {totalChunks}')
                    # 期望的分片列表 [1...totalChunks]
                    # totalChunks: [i + 1 for i in range(totalChunks)]
                    # 当前的分片列表 [x...counter]
                    # counter: [i + 1 for i in range(counter)]
                    expect_partitions = sorted(
                        [i + 1 for i in range(totalChunks)])
                    current_partitions = sorted(
                        [i + 1 for i in range(counter)])
                    is_completed = expect_partitions == current_partitions
                    fileHandler.log_print(
                        key, chunk, 'is_completed:{}'.format(is_completed))
                else:
                    # 多人同时上传文件时,porter_running可能出现延迟,需要等待一会
                    wait_start = time.time()
                    task_link = file_cache.get_task_completed()

                    print(
                        datetime.datetime.now(),
                        f'#### task link {task_link}, wait_start: {wait_start}'
                    )
                    while is_completed is True and time.time(
                    ) - wait_start <= 2000 and task_link is None:
                        time.sleep(0.5)
                        task_link = file_cache.get_task_completed()
                        # print(datetime.datetime.now(), '#### task link', task_link)

                    if task_link is None:
                        return standard_expection('文件检测超时.')
                    return standard_response(task_link, 200)
            else:
                # 中间分片,则结束
                fileHandler.log_print(key, chunk, 'is_completed:False')
                print(datetime.datetime.now(), "is_completed:False", key,
                      chunk)
                return standard_response(str(chunk), 200)
    except Exception as err:
        print("error: ", err)
        import traceback
        traceback.print_exc()
        return standard_expection('上传失败!')
    finally:
        file_cache.release_lock()
        print("end", request.form)
示例#14
0
def send_static_file(path):
    dir_path = os.path.join(Utils.mount_point(),
                            current_app.config['UPLOAD_FOLDER'])
    return send_from_directory(dir_path, path, as_attachment=True)