Пример #1
0
def sending_process(file_name, in_transport):

    try:
        # 先询问查找最合适的主机;
        src_size = os.path.getsize(file_name)
        host_index = get_best_server(src_size)

        if host_index is None:
            print("未找到合适主机:", file_name)
            make_log("WARNING", "未找到合适主机:" + file_name)
            raise NotImplementedError

        # 建立连接;
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # 绑定服务器IP地址;
        host = host_list[host_index]

        sock.connect((host, port))

        file_size = os.stat(file_name).st_size

        # 发送文件大小和文件名,注意这里不发送文件名的标志位,在本地表示为目标主机的编号;
        bite_format = '128sl' if get_system_bytes() else '128sq'
        file_head = struct.pack(bite_format,
                                os.path.basename(file_name).encode(),
                                file_size)

        sock.send(file_head)

        print("\n开始传输文件:", file_name)

        read_file = open(file_name, "rb")

        sended_size = 0
        while True:
            # process_bar(float(sended_size) / file_size)

            file_data = read_file.read(10240)

            if not file_data:
                break

            sock.send(file_data)

            sended_size += len(file_data)

        read_file.close()

        # 传输完毕后删除原压缩文件;
        os.remove(file_name)
        print()
        print("sending over:", file_name, '\n')

        make_log("INFO", "数据发送完毕: %s" % file_name)
        sock.close()
    except Exception as e:
        print(e)
    finally:
        in_transport.remove(file_name)
Пример #2
0
def answer():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    host = socket.gethostname()

    sock.bind((host, answer_port))
    sock.listen(5)

    while True:
        connection, address = sock.accept()
        symbol = connection.recv(1024).decode()

        src_size = int(symbol[1:])

        computer_status = get_status(src_size)
        connection.send(computer_status.encode())

        make_log("INFO", "客户机请求,返回系统状态------")

        connection.close()
Пример #3
0
def compress_folder(folder, in_compress):

    try:
        folder = os.path.abspath(folder)

        # 压缩文件;
        make_log("INFO", "开始压缩数据:" + folder)
        print("开始压缩数据:", folder)

        create_zip(folder)

        # 删除该文件夹
        # shutil.rmtree(folder)

        make_log("INFO", "数据压缩完毕:" + folder)
        print("\n压缩完毕:", folder)

    except Exception as e:
        print(e)

    finally:
        # 在in_compress列表中删除该元素;
        in_compress.remove(folder)
Пример #4
0
def monitor(thread_num=5):

    # 定义正在处理中的文件;
    in_monitor = []

    while True:
        time.sleep(3)

        if not os.path.exists(mask_file):
            make_log("ERROR", "mask文件不存在")
            print("mask文件不存在!")
            exit(1)

        # 读取mask文件,如果为空,则继续监控,否则处理文件;
        # mask_str代表了所有需要处理的文件;
        with open(mask_file, 'r') as read_mask:
            mask_str = read_mask.readlines()

        # new_monitor代表了所有未进行处理的文件;
        new_monitor = [file for file in mask_str if file not in in_monitor]

        # 计算最大可新建的线程数;
        free_thread_nums = thread_num - thread_nums("monitor")

        max_thread_num = free_thread_nums if free_thread_nums \
                                             < len(new_monitor) else len(new_monitor)

        # 开启max_thread_num个线程用于处理数据;
        for i in range(max_thread_num):
            compress_thread = threading.Thread(target=monitor_data,
                                               args=(new_monitor[i].strip(), in_monitor),
                                               name="monitor")

            # 将该文件夹放入in_compress列表中;
            in_monitor.append(new_monitor[i])

            compress_thread.start()
Пример #5
0
# -*- coding: UTF-8 -*-
import threading
from data_queue import monitor
from answer import answer
from m_socket import receive
from logs import make_log
"""
answer_thread 用于接收客户端请求并返回系统的状态,帮助客户机决定要向哪台主机发送数据;占用端口号为12346
receive_thread 用于接收客户端传输的文件,并向mask_file中写入记录;
mask_thread 用于监控mask_file文件,并进行数据清洗和数据处理;
"""

if __name__ == '__main__':
    # 开启线程用于返回系统状态;
    answer_thread = threading.Thread(target=answer, args=(), name='answer')
    answer_thread.start()
    make_log("INFO", "监听程序已开启-------------")
    print("监听程序已开启-------------")

    # 开启线程用于接收文件;
    receive_thread = threading.Thread(target=receive, args=(), name='receive')
    receive_thread.start()
    make_log("INFO", "文件接受程序已开启--------------")
    print("文件接受程序已开启--------------")

    # 开启线程用于监控mask文件;
    mask_thread = threading.Thread(target=monitor, args=(5, ), name='mask')
    mask_thread.start()
    make_log("INFO", "文件处理程序已开启--------------")
    print("文件处理程序已开启--------------")
Пример #6
0
"""
总文件,检测待发送文件,传输文件;
"""
from transport import transport
from compress import compress
from logs import make_log
import threading

if __name__ == '__main__':

    # 开启压缩数据线程,用于压缩数据;
    compress_thread = threading.Thread(target=compress, args=(5, ))
    compress_thread.start()
    print("数据压缩进程已启动,准备压缩数据")
    make_log("INFO", "数据压缩进程已启动,准备压缩数据")

    # 开启传输数据线程,用于传输数据;
    transport_thread = threading.Thread(target=transport, args=(5, ))
    transport_thread.start()
    print("数据传输进程已启动,准备传输数据")
    make_log("INFO", "数据传输进程已启动,准备压缩数据")


Пример #7
0
def monitor_data(file, in_monitor):
    try:
        print("数据清洗:", file)
        make_log("INFO", "数据清洗:" + file)

        # todo 解析数据库名字和日期;利用这些信息清洗和入库;
        basename = os.path.basename(file)

        database_name = get_database(basename)
        month = get_date(basename)

        # 解压缩数据,并返回压缩后的文件夹;
        unzip_dir = zip_file.unzip_file(file)

        # todo 清洗,入库;待调试;
        # merge_dir = process_dir(unzip_dir, date)

        # hive(merge_dir, database_name, date)
        time.sleep(15)

        # make_log("INFO", "清洗完成:" + file)

        write_protect.write_lock.acquire()
        with open(mask_file, 'r') as read_mask:
            # 重新读取文件;因为在别的线程可能会修改文件;
            mask_str = read_mask.readlines()

        mask_str = [mask for mask in mask_str if mask.strip() != file]

        with open(mask_file, 'w') as write_mask:
            write_mask.write(''.join(mask_str))
        write_protect.write_lock.release()

        # 删除所有的文件,包括压缩文件,解压文件以及清洗后的文件;
        shutil.rmtree(os.path.dirname(file))

        print("清洗完毕:" + file)

        make_log("INFO", "清洗完毕:" + file)

    except subprocess.CalledProcessError:
        print("数据入库未完成:", file)
        make_log("ERROR", "数据入库未完成:" + file)

    except FileNotFoundError:
        print("file not found")

        make_log("ERROR", "文件不存在" + file)

        # 删除该文件的记录;
        write_protect.write_lock.acquire()
        with open(mask_file, 'r') as read_mask:
            # 重新读取文件;因为在别的线程可能会修改文件;
            mask_str = read_mask.readlines()

        mask_str.remove(file)

        with open(mask_file, 'w') as write_mask:
            write_mask.write(''.join(mask_str))
        write_protect.write_lock.release()
    finally:
        in_monitor.remove(file + '\n')
Пример #8
0
def receive_thread(connection):

    try:
        connection.settimeout(600)

        file_info_size = struct.calcsize('128sl')

        buf = connection.recv(file_info_size)

        if buf:
            file_name, file_size = struct.unpack('128sl', buf)

            file_name = file_name.decode().strip('\00')

            # 查找最小的目录用于存储文件;
            disk_list = get_min_disk()

            # todo 生产时替换;
            # 在receive下用时间戳创建新的文件夹,防止命名冲突;
            # file_new_dir = os.path.join('/HDATA', str(disk_list),
            #                             'receive', get_database(file_name), file_name[:-4])
            file_new_dir = os.path.join('receive', get_database(file_name),
                                        file_name[:-4])
            # print(file_name, file_new_dir)
            if not os.path.exists(file_new_dir):
                os.makedirs(file_new_dir)

            file_new_name = os.path.join(file_new_dir, file_name)

            received_size = 0

            w_file = open(file_new_name, 'wb')

            print("start receiving file:", file_name)
            make_log("INFO", "开始接收文件:" + file_name)

            out_contact_times = 0

            while not received_size == file_size:
                r_data = connection.recv(10240)
                received_size += len(r_data)
                w_file.write(r_data)
                # process_bar.process_bar(float(received_size) / file_size)
                # if file_size - received_size > 10240:
                #     r_data = connection.recv(10240)
                #     received_size += len(r_data)
                #
                # else:
                #     r_data = connection.recv(file_size - received_size)
                #     received_size = file_size

                # 记录未接收到数据的次数;
                if not r_data:
                    out_contact_times += 1
                else:
                    out_contact_times = 0

                # 1000次未接收到数据,可断开;
                if out_contact_times == 1000:
                    connection.close()
                    w_file.close()
                    raise socket.timeout

                #     # 删除掉未接收完毕的数据;
                #     print('连接断开,将清除未传输的文件')
                #     make_log("ERROR", "连接断开,清除未完成的文件")
                #     shutil.rmtree(file_new_dir)
                #     exit(1)
                #
                # w_file.write(r_data)

            w_file.close()

            print("接收完成!\n")
            make_log("INFO", "传输完成: %s" % file_new_dir)

            # 写到记录文件里;
            # 每个文件记录为一行,第一个代表文件名,第二个代表数据库名;
            # 好处:
            # 1.不需要对入库过程上锁,以免造成同时写入库文件的错误;
            # 2.当系统重启时可以继续执行文件清洗和入库过程;
            # print('##'+os.path.abspath(file_new_name))

            # 注意这里的写保护;

            write_protect.write_lock.acquire()
            with open(mask_file, 'a') as record_mask:
                # print("#######################")
                record_mask.write(os.path.abspath(file_new_name) + '\n')
            write_protect.write_lock.release()

        connection.close()

    except socket.timeout:
        print("连接超时!")
    finally:
        connection.close()