Пример #1
0
def compress_inverted_index(inverted_index, filename):
    def write_posting_gamma(postinglist):
        '''
		Writing the postinglist into f.
		:param postinglist A value type is indexing.PostingList
		:param f  A opened file
		'''
        resultpl = pregamma_handle_didlist(postinglist)
        gammadata = gamma_list(resultpl)
        length = len(gammadata)
        f.write(struct.pack('I', length))
        for i in range(0, len(gammadata), 8):
            if i + 8 < len(gammadata):
                f.write(struct.pack('B', int(gammadata[i:i + 8], 2)))
            else:
                # padding
                f.write(struct.pack('B', int(gammadata[i:], 2)))

    #  write inverted index into file
    with open(filename, 'wb') as f:
        for key, postlist in inverted_index.items():
            offset = f.tell()
            SingleStringDict.add_word(word=key,
                                      df=postlist.df,
                                      post_list_id=offset)
            try:
                write_posting_gamma(postlist)
            except Exception, e:
                print inverted_index[key]
Пример #2
0
def compress_inverted_index(inverted_index, filename):
	def write_posting_gamma(postinglist):
		'''
		Writing the postinglist into f.
		:param postinglist A value type is indexing.PostingList
		:param f  A opened file
		'''
		resultpl = pregamma_handle_didlist(postinglist)
		gammadata = gamma_list(resultpl)
		length = len(gammadata)
		f.write(struct.pack('I', length))
		for i in range(0, len(gammadata), 8):
			if i + 8 < len(gammadata):
				f.write(struct.pack('B', int(gammadata[i:i + 8], 2)))
			else:
	            # padding
				f.write(struct.pack('B', int(gammadata[i:], 2)))
	#  write inverted index into file
	with open(filename, 'wb') as f:
		for key, postlist in inverted_index.items():
			offset = f.tell()
			SingleStringDict.add_word(word = key, df = postlist.df, post_list_id = offset)
			try:
				write_posting_gamma(postlist)
			except Exception, e:
				print inverted_index[key]
Пример #3
0
def decompress_inverted_index(filename):
    invertedindex = {}
    pldict = SingleStringDict.decompress(filename)
    with open(filename, 'rb') as plf:
        for word, offset in pldict.items():
            # TODO :there are something cause the
            invertedindex[word] = seek_inverted_index_file(filename, offset)

    return (pldict, invertedindex)
Пример #4
0
def decompress_inverted_index(filename):
	invertedindex = {}
	pldict = SingleStringDict.decompress(filename)
	with open(filename, 'rb') as plf:
		for word, offset in pldict.items():
			# TODO :there are something cause the 
			invertedindex[word] = seek_inverted_index_file(filename, offset)
			
			
	return (pldict, invertedindex)
Пример #5
0
def decompress_dict(filename):
    return SingleStringDict.decompress(filename)
Пример #6
0
            else:
                # padding
                f.write(struct.pack('B', int(gammadata[i:], 2)))

    #  write inverted index into file
    with open(filename, 'wb') as f:
        for key, postlist in inverted_index.items():
            offset = f.tell()
            SingleStringDict.add_word(word=key,
                                      df=postlist.df,
                                      post_list_id=offset)
            try:
                write_posting_gamma(postlist)
            except Exception, e:
                print inverted_index[key]
    SingleStringDict.compress(filename)


def pregamma_handle_didlist(postlinglist):
    def prehandle_dtlist(plist):
        pl = sorted(plist)[::-1]
        for i in range(len(plist) - 1):
            pl[i] -= pl[i + 1]
        pl = pl[::-1]
        # Note: If the first element is 0, the Gamma cannot represent, so +1
        pl[0] += 1
        return pl

    resultpl = []
    pl = sorted(postlinglist.docitemmap.values(), key=lambda x: x.id)
    did = pl[0].id
Пример #7
0
def decompress_dict(filename):
	return SingleStringDict.decompress(filename)
Пример #8
0
		for i in range(0, len(gammadata), 8):
			if i + 8 < len(gammadata):
				f.write(struct.pack('B', int(gammadata[i:i + 8], 2)))
			else:
	            # padding
				f.write(struct.pack('B', int(gammadata[i:], 2)))
	#  write inverted index into file
	with open(filename, 'wb') as f:
		for key, postlist in inverted_index.items():
			offset = f.tell()
			SingleStringDict.add_word(word = key, df = postlist.df, post_list_id = offset)
			try:
				write_posting_gamma(postlist)
			except Exception, e:
				print inverted_index[key]
	SingleStringDict.compress(filename)

def pregamma_handle_didlist(postlinglist):
	def prehandle_dtlist(plist):
		pl = sorted(plist)[::-1]
		for i in range(len(plist) - 1):
			pl[i] -=  pl[i + 1]
		pl = pl[::-1]
		# Note: If the first element is 0, the Gamma cannot represent, so +1
		pl[0] += 1
		return pl

	resultpl = []
	pl = sorted(postlinglist.docitemmap.values(), key= lambda x: x.id)
	did = pl[0].id