Exemplo n.º 1
0
def length_buffer(fileName, bufferSize):
    """
    Given a file and a bufferSize, reads it sequentially regarding a 
    buffer. Returns the number of bytes read.
    """
    sum = 0
    currentPositionInFile = 0

    fileSize = os.stat(fileName).st_size
    file = open(fileName, "r+b")

    # Initialize variables for the buffer
    currentPositionInBuffer = 0
    buffer = None

    while True:
        line = b''
        while line is not None and b'\n' not in line:
            # Create buffer for the first time or re-buffer
            if not buffer or usedWholeBuffer(currentPositionInBuffer,
                                             bufferSize):
                file.seek(currentPositionInFile)
                buffer = file.read(bufferSize)
                currentPositionInBuffer = 0
            tempLine, currentPositionInBuffer = readln_buffer(
                buffer, currentPositionInBuffer)
            # If found EOF
            if tempLine == b'':
                line = None
            else:
                line += tempLine
                currentPositionInFile += len(tempLine)
        if line is None:
            break
        sum += len(line)

    file.close()
    return sum
Exemplo n.º 2
0
def rrmerge_Buffer_Char(file_list, outputFile, bufferSize):
    """
    Merges the files inside a list and writes them into outputFile
    using the buffered approach for reading and the char by char
     approach for writing.
    """
    files_to_read = initializeFileObjectsBuffer(file_list, bufferSize)
    file_to_write = open(outputFile, 'w+b')
    while not all([x.isClosed for x in files_to_read]):
        for file in files_to_read:
            if not file.isClosed:
                line = b''
                while line is not None and b'\n' not in line:
                    if cannotUseLastBuffer(file.bufferInitPos, file.readPos, bufferSize) or usedWholeBuffer(file.bufferPos, bufferSize):
                        file.readPos += file.bufferPos
                        file.readBuffer = file.fileObject.read(bufferSize)
                        file.bufferInitPos = file.readPos
                        file.bufferPos = 0
                    tempLine, file.bufferPos = readln_buffer(file.readBuffer, file.bufferPos)
                    if tempLine == b'':
                        line = None
                    else:
                        line += tempLine
                        file.readPos += len(tempLine)
                if not line:
                    file.isClosed = True
                    file.fileObject.close()
                else:
                    writeln_char(file_to_write, line)
Exemplo n.º 3
0
def rrmerge_buffer_mmap(file_list, outputFile, bufferSize, writePosition):
    """
    Merges the files inside a list and writes them into outputFile
    using the buffered approach for reading and the mapped approach
    for writing.
    """
    files_to_read = initializeFileObjectsBuffer(file_list, bufferSize)
    totalSize = 0

    for rFile in files_to_read:
        rFileSize = os.fstat(rFile.fileObject.fileno()).st_size
        totalSize += rFileSize

    file_to_write = open(outputFile, 'w+b')
    file_to_write.write(totalSize * b'\0')

    mapping, actualFilePosition, actualBufferSize = getNewMapRegion(writePosition, bufferSize, totalSize, file_to_write, 1)

    while not all([x.isClosed for x in files_to_read]):
        for file in files_to_read:
            if not file.isClosed:
                line = b''
                while line is not None and b'\n' not in line:
                    if cannotUseLastBuffer(file.bufferInitPos, file.readPos, bufferSize) or usedWholeBuffer(file.bufferPos, bufferSize):
                        file.readPos += file.bufferPos
                        file.readBuffer = file.fileObject.read(bufferSize)
                        file.bufferInitPos = file.readPos
                        file.bufferPos = 0
                    tempLine, file.bufferPos = readln_buffer(file.readBuffer, file.bufferPos)
                    if tempLine == b'':
                        line = None
                    else:
                        line += tempLine
                        file.readPos += len(tempLine)
                if not line:
                    file.isClosed = True
                    file.fileObject.close()
                else:
                    mapping, writePosition, actualFilePosition = writeln_mmap(mapping, writePosition, actualFilePosition, actualBufferSize, totalSize, file_to_write, line)
    
    mapping.close
    file_to_write.close()