Exemplo n.º 1
0
    def make_db(self, scan_path, out_file):
        print("make_db")
        print(scan_path)
        print(out_file)

        header = Header()
        fdict = Dict()
        fdata = Data()

        self.__start_scan(scan_path, fdata)

        print(len(fdata.records))

        #--- make dict
        for record in fdata.records:
            fdict.append_record(record.fid, record.pid)

        print(len(fdict.records_map))

        #--- update header data
        header.dict_start = Header.SIZE
        header.dict_size = fdict.get_bdata_size()

        header.data_start = Header.SIZE + header.dict_size

        # fdata_bin = fdata.pack()
        # header.data_size = len(fdata_bin)

        #---
        fdata_bin = b""
        # fdata_start = header.data_start
        fdata_start = 0
        for record in fdata.records:
            rb = record.pack()

            #--- update dict addr
            fdict.set_addr(record.fid, fdata_start, len(rb))

            fdata_bin += rb
            fdata_start += len(rb)

        header.data_size = len(fdata_bin)

        # flat: 1496, gzip: 8182
        # gzip_fdata_bin = gzip.compress(fdata_bin,9)

        # print("-"*80)
        # print("flat: {:>4}, gzip: {:>4}".format(len(gzip_fdata_bin), len(fdata_bin)))
        # print("-"*80)

        #--- make binary
        bdata = b""
        bdata += header.pack()
        bdata += fdict.pack()
        bdata += fdata_bin

        #--- write file
        with open(out_file, "wb") as fd:
            fd.write(bdata)
Exemplo n.º 2
0
class Writer(object):
    def __init__(self, scan_path, out_file):
        self.scan_path = scan_path
        self.out_file = out_file

        self.header = Header()
        self.fdata = Data()

    def start(self):
        # fd = open(self.out_file, "wb")
        # fd.seek(Header.SIZE)

        etimer = ETimer()
        Writer.rescan(self.scan_path, self.fdata, 0, 1)
        etimer.elapsed("finish scan")

        # end_of_data_addr = fd.tell()

        # print("data writed, cur pos: ", end_of_data_addr)
        print("files: ", len(self.fdata.records))

        # etimer.elapsed("start pack dict")
        # bin_fdict = self.fdict.pack()
        # etimer.elapsed("finish pack dict")

        # fd.write(bin_fdict)

        bin_data = self.fdata.pack()

        self.header.data_start = Header.SIZE
        self.header.data_size = len(bin_data)
        # self.header.dict_start = end_of_data_addr
        # self.header.dict_size = self.fdict.get_bdata_size()
        self.header.total_records = len(self.fdata.records)

        self.header.print_header()

        bin_header = self.header.pack()
        # fd.seek(0)
        # fd.write(bin_header)

        # fd.close()

        #--- start write
        with gzip.open(self.out_file, "wb") as fd:
            fd.write(bin_header)
            fd.write(bin_data)

    @staticmethod
    def rescan(scan_path, fdata, parent_id, last_id):

        fid = last_id

        for f in os.listdir(scan_path):

            full_path = os.path.join(scan_path, f)

            if os.path.isfile(full_path):
                st = os.stat(full_path)
                record = fdata.append_file(f, parent_id, fid, st)

                fid += 1

            elif os.path.isdir(full_path):
                st = os.stat(full_path)
                record = fdata.append_dir(f, parent_id, fid, st)

                fid += 1

                fid = Writer.rescan(full_path, fdata, record.fid, fid)
            else:
                pass

        return fid
Exemplo n.º 3
0
class Store(object):
    def __init__(self):

        self.header = Header()
        self.fdata = Data()
        self.fdict = Dict()

        self.__fd = None

    def make_db(self, scan_path, out_file):
        print("make_db")
        print(scan_path)
        print(out_file)

        self.__fd = open(out_file, "wb")
        self.__fd.seek(Header.SIZE)

        #--- write files data
        # fdict = []

        self.__start_scan(scan_path, self.__fd, self.fdict)

        end_of_data_addr = self.__fd.tell()

        print("data writed, cur pos: ", self.__fd.tell())

        bin_fdict = self.fdict.pack()

        self.__fd.write(bin_fdict)

        self.header.data_start = Header.SIZE
        self.header.data_size = end_of_data_addr - Header.SIZE
        self.header.dict_start = end_of_data_addr
        self.header.dict_size = self.fdict.get_bdata_size()
        print(self.header)

        bin_header = self.header.pack()
        self.__fd.seek(0)
        self.__fd.write(bin_header)
        self.__fd.close()
        self.__fd = None

        # header = Header()
        # fdict = Dict()
        # fdata = Data()

        # self.__start_scan(scan_path, fdata)

        # print(len(fdata.records))

        # #--- make dict
        # for record in fdata.records:
        # 	fdict.append_record(record.fid, record.pid)

        # print(len(fdict.records_map))

        # #--- update header data
        # header.dict_start = Header.SIZE
        # header.dict_size = fdict.get_bdata_size()

        # header.data_start = Header.SIZE + header.dict_size

        # # fdata_bin = fdata.pack()
        # # header.data_size = len(fdata_bin)

        # #---
        # fdata_bin = b""
        # # fdata_start = header.data_start
        # fdata_start = 0
        # for record in fdata.records:
        # 	rb = record.pack()

        # 	#--- update dict addr
        # 	fdict.set_addr(record.fid, fdata_start, len(rb))

        # 	fdata_bin += rb
        # 	fdata_start += len(rb)

        # header.data_size = len(fdata_bin)

        # # flat: 1496, gzip: 8182
        # # gzip_fdata_bin = gzip.compress(fdata_bin,9)

        # # print("-"*80)
        # # print("flat: {:>4}, gzip: {:>4}".format(len(gzip_fdata_bin), len(fdata_bin)))
        # # print("-"*80)

        # #--- make binary
        # bdata = b""
        # bdata += header.pack()
        # bdata += fdict.pack()
        # bdata += fdata_bin

        # #--- write file
        # with open(out_file, "wb") as fd:
        # 	fd.write(bdata)

    # def __start_scan(self, scan_path, fdata: Data):
    # 	Store.rescan(scan_path, fdata, 0)

    def __start_scan(self, scan_path, fd, fdict: Dict):

        Store.rescan(scan_path, fd, fdict, self.fdata, 0)

    @staticmethod
    def rescan(spath, fd, fdict: Dict, fdata: Data, parent_id):
        for f in os.listdir(spath):

            full_path = os.path.join(spath, f)

            if os.path.isfile(full_path):
                record = fdata.append_file(f, parent_id)
                brecord = record.pack()

                #--- make index
                curr_pos = fd.tell()
                fdict.append_record(record.fid, record.pid, curr_pos,
                                    len(brecord))

                #--- write bdata
                fd.write(brecord)

            else:
                record = fdata.append_dir(f, parent_id)
                brecord = record.pack()

                #--- make index
                curr_pos = fd.tell()
                fdict.append_record(record.fid, record.pid, curr_pos,
                                    len(brecord))

                #--- write bdata
                fd.write(brecord)

                Store.rescan(full_path, fd, fdict, fdata, record.fid)

    def read_db(self, file_path):
        print("read_db")
        print(file_path)

        #--- open file
        with open(file_path, "rb") as fd:

            #--- header
            bheader = fd.read(Header.SIZE)
            header = Header(bheader)
            print(header)

            #--- dict
            fd.seek(header.dict_start)
            bdict = fd.read(header.dict_size)
            fdict = Dict(bdict)
            # for r in fdict.records_map.values():
            # 	print(r)

            #--- data
            # for r in fdict.records_map.values():
            # 	if r.pid == 0:
            # 		# print(r.fid, r.pid)
            # 		start = r.daddr
            # 		size = r.dsize
            # 		fd.seek(start)
            # 		bfile_data = fd.read(size)
            # 		record = DataRecord(bfile_data)
            # 		print(record)

            Store.reprint(fd, fdict, 0, 0)

            # bdata = fd.read(header.data_size)
            # fdata = Data(bdata)

            # print()
            # print("root files: ")
            # for r in fdict.records_map.values():
            # 	if r.pid == 0:
            # 		ff = fdata.get_record(r.daddr, r.dsize)
            # 		print(ff)

    @staticmethod
    def reprint(fd, fdict, parent_id, ind):

        for r in fdict.records_map.values():
            if r.pid == parent_id:
                start = r.daddr
                size = r.dsize
                fd.seek(start)
                bfile_data = fd.read(size)
                record = DataRecord(bfile_data)
                print(ind * "\t" + record.name)

                if record.ftype == 2:
                    Store.reprint(fd, fdict, r.fid, ind + 1)
Exemplo n.º 4
0
class Writer(object):
	def __init__(self, scan_path, out_file):
		self.scan_path = scan_path
		self.out_file = out_file

		self.header = Header()
		self.fdict = Dict()


	def start(self):
		fd = open(self.out_file, "wb")
		fd.seek(Header.SIZE)

		etimer = ETimer()
		# Writer.rescan(self.scan_path, fd, self.fdict, 0, 1)
		self.__start_scan(self.scan_path, fd, self.fdict, 0, 1)
		etimer.elapsed("finish scan")

		end_of_data_addr = fd.tell()

		print("data writed, cur pos: ", end_of_data_addr)
		print("files: ", self.fdict.records_count)

		etimer.elapsed("start pack dict")
		bin_fdict = self.fdict.pack()
		etimer.elapsed("finish pack dict")
		# print(len(bin_fdict))

		# etimer.elapsed("start pack files")
		# bbb = b""
		# vvv = []
		# for r in FILES_LIST:
		# 	# bbb += 
		# 	vvv.append(r.pack())

		# bbb.join(vvv)
		# etimer.elapsed("finish pack files")


		fd.write(bin_fdict)


		self.header.data_start = Header.SIZE
		self.header.data_size = end_of_data_addr - Header.SIZE
		self.header.dict_start = end_of_data_addr
		self.header.dict_size = self.fdict.get_bdata_size()
		self.header.total_records = self.fdict.records_count

		self.header.print_header()

		bin_header = self.header.pack()
		fd.seek(0)
		fd.write(bin_header)

		fd.close()


	@dtimeit
	def __start_scan(self, scan_path, fd, fdict: Dict, parent_id, last_id):
		Writer.rescan(scan_path, fd, fdict, parent_id, last_id)



	@staticmethod
	def rescan(scan_path, fd, fdict: Dict, parent_id, last_id):

		
		fid = last_id

		for f in os.listdir(scan_path):
		
			full_path = os.path.join(scan_path, f)
			
			
			if os.path.isfile(full_path):
				st = os.stat(full_path)		
				record = Writer.make_file(f, parent_id, fid, st)
				brecord = record.pack()

				#--- make index
				curr_pos = fd.tell()
				fdict.append_record(record.fid, record.pid, curr_pos, len(brecord))

				#--- write bdata
				fd.write(brecord)
				
				fid += 1

			elif os.path.isdir(full_path):
				st = os.stat(full_path)	
				record = Writer.make_dir(f, parent_id, fid, st)
				brecord = record.pack()

				#--- make index
				curr_pos = fd.tell()
				fdict.append_record(record.fid, record.pid, curr_pos, len(brecord))

				#--- write bdata
				fd.write(brecord)
				fid +=1

				fid = Writer.rescan(full_path, fd, fdict, record.fid, fid)
			else:
				pass
			
		return fid




	@staticmethod
	def make_file(name, pid, fid, st):
		return Writer.make_ffile(name, pid, fid, st, 1)


	@staticmethod
	def make_dir(name, pid, fid, st):
		return Writer.make_ffile(name, pid, fid, st, 2)


	@staticmethod
	def make_ffile(name, pid, fid, st, ftype):
		record = DataRecord()
		record.name = name
		record.ftype = ftype
		record.pid = pid
		record.fid = fid

		record.size = st.st_size
		record.st_mode = st.st_mode

		record.st_uid = st.st_uid
		record.st_gid = st.st_gid

		record.st_ctime = st.st_ctime
		record.st_atime = st.st_atime
		record.st_mtime = st.st_mtime
	
		return record