示例#1
0
def process_archive_retrieval_job(job,chunk_size,output_path,friendly_name=False):
    global chunk_count

    filepos_limit = job.archive_size_in_bytes - 1
    pad_length = len(str(job.archive_size_in_bytes // chunk_size)) + 1
    current_pos = 0
    job_archive_hash = job.archive_sha256_tree_hash
    chunk_count = 0
    failed_parts = {}
    running_treehash = TreeHash(algo=hashlib.sha256)
    try:
        while current_pos < filepos_limit:
            chunk_count += 1
            end_pos = current_pos + (chunk_size - 1)
            if end_pos > filepos_limit:
                end_pos = filepos_limit

            if not process_archive_retrieval_range(job, output_path, current_pos, end_pos, friendly_name, running_treehash):
                failed_parts["part_" + str(chunk_count).zfill(pad_length)]=[current_pos, end_pos]
            current_pos = end_pos + 1

        if (not failed_parts) and (running_treehash.hexdigest() != job.archive_sha256_tree_hash):
            failed_parts = None
            failed_parts["all"] = True
    except:
        failed_parts["all"] = True
        raise

    return failed_parts
def main():
    global Debug
    # Parse command line options
    parser = argparse.ArgumentParser()
    parser.add_argument("--file", help="File Name", required=True)
    parser.add_argument("--rangesize", help="Size of the range", required=True)
    parser.add_argument("--chunksize", help="Size of the chunks to use for checksumming.", default=DEFAULT_CHUNK_SIZE)
    parser.add_argument("--debug", help="Print Debug messages", action="store_true")
    args = parser.parse_args()

    Debug = args.debug

    treehash = TreeHash(algo=hashlib.sha256)

    statinfo = os.stat(args.file)
    end_pos = statinfo.st_size 
    range = int(args.rangesize)
    chunksize = int(args.chunksize)

    current_pos = 0
    while current_pos < end_pos:
        current_end_pos = current_pos + range
        if current_end_pos > end_pos:
            current_end_pos = end_pos

        running_treehash_on_file_range(treehash, args.file, current_pos, current_end_pos, chunksize)
        current_pos = current_end_pos 

    print("TreeHash for " + args.file + " is " + treehash.hexdigest())
示例#3
0
 def test_update(self):
     treehash = TreeHash()
     treehash.update(TEST_DATA)
     self.assertEqual(
         hashlib.sha256(TEST_DATA).hexdigest(),
         treehash.hexdigest()
     )
示例#4
0
 def test_update(self):
     tree_hash = TreeHash(TEST_CHUNK)
     tree_hash.update(TEST_DATA, TEST_INDEX)
     self.assertEqual(
         hashlib.sha256(TEST_DATA).hexdigest(),
         tree_hash.hexdigest()
     )
示例#5
0
 def test_md5(self):
     tree_hash = TreeHash(TEST_CHUNK, algo=hashlib.md5)
     tree_hash.update(TEST_DATA, TEST_INDEX)
     self.assertEqual(
         hashlib.md5(TEST_DATA).hexdigest(),
         tree_hash.hexdigest()
     )
示例#6
0
def treehash_on_file_range(treehash, filename, start, end, hash_chunk_size=DEFAULT_HASH_CHUNK_SIZE):

    infile = open(filename, "rb")

    infile.seek(start)

    if Debug:
        print("Treehash: Start: " + str(start) + ", End: " + str(end))

    treehash_local = TreeHash(algo=hashlib.sha256)
    current_pos = start
    end += 1
    while current_pos < end:
        read_size = end - current_pos
        if read_size > hash_chunk_size:
            read_size = hash_chunk_size

        chunk = infile.read(read_size)
        if treehash:
            treehash.update(chunk)
        treehash_local.update(chunk)

        current_pos += read_size
    infile.close()

    return treehash_local.hexdigest()
示例#7
0
def hash_file(fname):
    treehash = TreeHash(algo=hashlib.sha256)
    with open(fname, 'rb') as f:
        while True:
            buf = f.read(1024 * 1024)
            if not buf:
                break
            treehash.update(buf)
    return treehash.hexdigest()
示例#8
0
    def test_tree(self):
        hashlib_result = hashlib.sha256(
            hashlib.sha256(TEST_DATA).digest() +
            hashlib.sha256(TEST_DATA).digest()
        ).hexdigest()

        tree_hash = TreeHash(2, block_size=1)
        tree_hash.update(TEST_DATA, 0)
        tree_hash.update(TEST_DATA, 1)
        self.assertEqual(hashlib_result,
                         tree_hash.hexdigest())
def process_archive_retrieval_job(job,chunk_size,output_path,friendly_name=False):
    global chunk_count 

    filepos_limit = job.archive_size_in_bytes - 1
    current_pos = 0
    job_archive_hash = job.archive_sha256_tree_hash
    chunk_count = 0
    archive_file_name = output_path + "/" + job.id + ".archive"
    archive_file = open(archive_file_name, "wb")
    treehash = TreeHash(algo=hashlib.sha256)
    while current_pos < filepos_limit:
        end_pos = current_pos + (chunk_size - 1)
        if end_pos > filepos_limit:
            end_pos = filepos_limit

        range_string = "bytes=" + str(current_pos) + "-" + str(end_pos)

        response = job.get_output(
            range=range_string
        )

        if Debug:
            print("process_archive_retrieval_job: job.get_output() response: " + str(response))

        if HTTP_SUCCESS_LOW <= response['status'] <= HTTP_SUCCESS_HIGH:
            chunk_count += 1

            if Debug:
                #print("Writing chunk " + str(chunk_count) + " " + range_string + " Checksum: " + response['checksum'] + " ContentRange: " + response['contentRange'] + " AcceptRanges: " + response['acceptRanges'] + " ContentType: " + response['contentType'] + " ArchiveDescription: " + response['archiveDescription'])
                print("Writing chunk " + str(chunk_count) + " " + range_string + " Checksum: " + response['checksum'])

            #archive_file.write(response['body'].read())
            chunk_bytes=response['body'].read()
            archive_file.write(chunk_bytes)

            if Debug:
                chunk_file = open(archive_file_name + ".chunk." + str(chunk_count), "wb")
                chunk_file.write(chunk_bytes)
                chunk_file.close

            section_hash = sha256_on_file_range(archive_file_name, current_pos, end_pos)
            running_treehash_on_file_range(treehash, archive_file_name, current_pos, end_pos)

            if Debug:
                print("Local checksum of chunk " + str(chunk_count) + ": " + section_hash)
                print("Current running treehash is  " + treehash.hexdigest())
            
            current_pos = end_pos + 1
        else:
            print("Response unsuccessful. Retrying")

    archive_file.close
示例#10
0
    def run(self):
        client = self.get_boto_client()
        logging.info("Initiating job to upload")
        upload_job = client.initiate_multipart_upload(
            vaultName=self.vault,
            archiveDescription=self.file_name,
            partSize=str(UPLOAD_PART_SIZE))
        upload_id = upload_job['uploadId']

        treehash = TreeHash(block_size=1024**2)

        cur_file = open(self.file_location, 'rb')

        i = 0
        for i in range(self.numof_parts - 1):
            self.cur_part += 1
            self.update_task()

            data = cur_file.read(UPLOAD_PART_SIZE)
            treehash.update(data)

            cur_range = 'bytes %d-%d/*' % (i * UPLOAD_PART_SIZE,
                                           (i + 1) * UPLOAD_PART_SIZE - 1)
            client.upload_multipart_part(vaultName=self.vault,
                                         uploadId=upload_id,
                                         range=cur_range,
                                         body=data)

        self.cur_part += 1
        self.update_task()

        data = cur_file.read(UPLOAD_PART_SIZE)
        treehash.update(data)

        cur_range = 'bytes %d-%d/*' % (i * UPLOAD_PART_SIZE,
                                       self.file_size - 1)
        client.upload_multipart_part(vaultName=self.vault,
                                     uploadId=upload_id,
                                     range=cur_range,
                                     body=data)

        cur_file.close()

        hash_res = treehash.hexdigest()
        client.complete_multipart_upload(vaultName=self.vault,
                                         uploadId=upload_id,
                                         archiveSize=str(self.file_size),
                                         checksum=hash_res)
示例#11
0
def main():

    global Debug

    # Parse command line options
    parser = argparse.ArgumentParser()
    parser.add_argument("--file", help="file to use for hash tests", required=True)
    parser.add_argument("--outputpath", help="Path to store output", default=DEFAULT_OUTPUT_PATH)
    parser.add_argument("--chunksize", help="Size of the chunks to use for download. Only valid of the job is ArchiveRetrieval.", default=DEFAULT_CHUNK_SIZE)
    parser.add_argument("--debug", help="Print Debug messages", action="store_true")
    args = parser.parse_args()

    Debug = args.debug
    ProcessJob_old.set_debug(Debug)
    chunksize = int(args.chunksize)
    statinfo = os.stat(args.file)

    if not ProcessJob_old.is_power_of_2(chunksize):
        print("Chunksize " + str(chunksize) + " is not a power of two. The next closest power of two is " + str(ProcessJob_old.next_power_of_2(chunksize)))
        print("Exiting.")
        sys.exit(1)

    if chunksize > statinfo.st_size:
        chunksize = statinfo.st_size


    current_pos = 0

    chunk_count = 0
    treehash = TreeHash(algo=hashlib.sha256)
    while current_pos < statinfo.st_size:
        chunk_count += 1
        end_pos = current_pos + chunksize - 1
        if end_pos > statinfo.st_size:
            end_pos = statinfo.st_size
        if Debug:
            print("Processing chunk " + str(chunk_count) + " range " + str(current_pos) + " to " + str(end_pos) )
        section_hash = ProcessJob_old.sha256_on_file_range(args.file, current_pos, end_pos)
        ProcessJob_old.running_treehash_on_file_range(treehash, args.file, current_pos, end_pos)

        print("Range: " + str(current_pos) + " to " + str(end_pos))
        print("Local checksum of chunk " + str(chunk_count) + ": " + section_hash)
        print("Current running treehash is " + treehash.hexdigest())
        
        current_pos = end_pos + 1
示例#12
0
def sha256_on_file_range(filename, start, end, hash_chunk_size=DEFAULT_HASH_CHUNK_SIZE):

    sha256 = hashlib.sha256()
    infile = open(filename, "rb")

    treehash = TreeHash(algo=hashlib.sha256)
    infile.seek(start)

    if Debug:
        print("Running Hash: Start: " + str(start) + ", End: " + str(end))

    current_pos = start
    end += 1
    while current_pos < end:
        read_size = end - current_pos
        if read_size > hash_chunk_size:
            read_size = hash_chunk_size

        chunk = infile.read(read_size)

        sha256.update(chunk)
        treehash.update(chunk)
        current_pos += read_size
    infile.close()

    if Debug:
        print("Running hash for this section (" + str(start) + " to " + str(end) + ") is " + sha256.hexdigest())
        print("Tree hash for this section (" + str(start) + " to " + str(end) + ") is " + treehash.hexdigest())

    return sha256.hexdigest()
示例#13
0
    def upload(self, client):

        if (self._inventory_entry.get_state() == FileState.IN_PROGRESS):
            self._upload_id = self._inventory_entry.get_upload_id()
        else:
            tmp_upload = client.initiate_multipart_upload(
                vaultName=self._vaultName,
                archiveDescription=self._inventory_entry.get_fileName(),
                partSize=str(self._partSize))
            self._upload_id = tmp_upload['uploadId']

        if self._partSize < self._fileSizeBytes:
            self._inventory_entry.set_state_from_upload(
                self, FileState.IN_PROGRESS)

        partBegin = self._partNumUploading * self._partSize
        data = b""
        with open(self._inventory_entry.get_filePath(), "rb") as f:
            if partBegin:
                data = f.read(partBegin)
            treehash = TreeHash(data=data, block_size=self._partSize)
            while partBegin < self._fileSizeBytes:
                partEnd = partBegin + self._partSize - 1
                if partEnd > self._fileSizeBytes:
                    partEnd = self._fileSizeBytes - 1

                part = f.read(self._partSize)
                treehash.update(part)

                if not self._startTime:
                    self._startTime = time.time()

                self._upload_part(client, part, partBegin, partEnd)
                partBegin = partEnd + 1
                self._partNumUploading += 1

                if partEnd < self._fileSizeBytes:
                    self._inventory_entry.set_state_from_upload(
                        self, FileState.IN_PROGRESS)

        completed_treehash = treehash.hexdigest()
        response = client.complete_multipart_upload(
            vaultName=self._vaultName,
            uploadId=self._upload_id,
            archiveSize=str(self._fileSizeBytes),
            checksum=completed_treehash)

        self._endTime = time.time()

        cli.cli_progress(self._inventory_entry.get_fileName(),
                         self.formattedFileSize(),
                         self.formattedPartSize(),
                         self._startTime,
                         self._fileSizeBytes-1,
                         self._fileSizeBytes-1)

        # Sanity check that's probably unnecessary.
        if treehash.hexdigest() != response['checksum']:
            raise Exception('checksum mismatch')

        self._checksum = response['checksum']
        self._http_status = response['ResponseMetadata']['HTTPStatusCode']
        self._archive_id = response['archiveId']
        self._upload_location = response['location']
        # cli.pp(json.dumps(self, default=lambda o: o.__dict__))

        self._inventory_entry.set_state_from_upload(self, FileState.UPLOADED)
示例#14
0
    def handle(self, *args, **options):
        part_size = 8388608

        print 'Contacting Amazon AWS...'
        glacier = boto3.client('glacier')
        multipart_upload = glacier.initiate_multipart_upload(
            vaultName=settings.GLACIER_VAULT_NAME, partSize=str(part_size))
        print 'Connected to Glacier Vault "' + settings.GLACIER_VAULT_NAME + '"'
        upload_id = multipart_upload['uploadId']
        treehash_archive = TreeHash()
        db = influxdb.InfluxDBClient(settings.INFLUXDB_URI, 8086, 'root',
                                     'root', 'seads')

        archive_size = 0
        for device in Device.objects.all():
            start = datetime.fromtimestamp(0)
            end = datetime.now() - timedelta(days=31 *
                                             device.data_retention_policy)
            start = (datetime.now() - start).total_seconds()
            start = 0
            end = int((datetime.now() - end).total_seconds())
            end = time.time() - end
            print 'Trying ' + str(device) + '...'
            print 'Data Retention Policy: ' + str(
                device.data_retention_policy) + ' Months'
            series = 'device.' + str(device.serial)
            try:
                query = 'select * from ' + series + ' where time > ' + str(
                    start) + 's and time < ' + str(end) + 's'
                points = db.query(query)
            except:
                print 'No data found for ' + series + '. Skipping.'
                continue
            print "Uploading " + series + "..."
            print "Querying from " + str(datetime.fromtimestamp(
                int(start))) + " to " + str(datetime.fromtimestamp(int(end)))
            # store points in temporary file, break into 8MB parts
            with open('/tmp/temp_archive', 'wb') as f:
                f.write(json.dumps(points))
            bytes_read = 0
            bytes_sent = 0
            with open('/tmp/temp_archive', 'rb') as f:
                treehash_part = TreeHash()
                part = f.read(part_size)
                treehash_part.update(part)
                bytes_read += len(part)
                while part:
                    response = glacier.upload_multipart_part(
                        vaultName=settings.GLACIER_VAULT_NAME,
                        uploadId=upload_id,
                        range='bytes ' + str(bytes_sent) + '-' +
                        str(bytes_read - 1) + '/*',
                        body=part,
                        checksum=treehash_part.hexdigest())
                    bytes_sent += len(part)
                    part = f.read(part_size)
                    treehash_part.update(part)
                    bytes_read += len(part)
            archive_size += 1
            print "Successfully uploaded " + str(
                bytes_sent) + " bytes to Glacier"
            print "Deleting points from database..."
            # drop from fanout series as well
            series = db.query('list series')[0]['points']
            rg = re.compile('device.' + str(device.serial))
            for s in series:
                if rg.search(s[1]):
                    db.query('delete from ' + s[1] + ' where time > ' +
                             str(start) + 's and time < ' + str(end) + 's')
            print "[DONE]"
        try:
            with open('/tmp/temp_archive', 'rb') as f:
                treehash_archive.update(f.read())
            response = glacier.complete_multipart_upload(
                vaultName=settings.GLACIER_VAULT_NAME,
                uploadId=upload_id,
                archiveSize=str(archive_size),
                checksum=treehash_archive.hexdigest())
            with open(settings.STATIC_PATH + 'archive_ids.log', 'a') as f:
                line = {
                    'archiveId': response['archiveId'],
                    'timeEnd': str(end)
                }
                f.write(json.dumps(line))
                f.write(';')
            os.remove('/tmp/temp_archive')
            print "Archival Successful"
        except:
            print "No data to archive. Exiting."
示例#15
0
 def test_update(self):
     treehash = TreeHash()
     treehash.update(TEST_DATA)
     self.assertEqual(
         hashlib.sha256(TEST_DATA).hexdigest(), treehash.hexdigest())
示例#16
0
    potential_zip = entry.name + '.zip'
    if entry.is_dir() and re.match(r'\d+[_\-]\d+[_\-]\d+', entry.name) and potential_zip not in decoded_archived: 
        logger.info(entry.name + ' matches the pattern \d+[_\-]\d+[_\-]\d+ and is not currently in Glacier as a zip file.')        
        not_archived.append({'Zipfile' : potential_zip, 'Path' : (picture_path + '\\' + entry.name)})
        logger.info(potential_zip + ' added to list of archives that need to be created uploaded.')

for n in not_archived:
    logger.info('Processing ' + str(n))
    with zipfile.ZipFile(n['Zipfile'], 'w', allowZip64=True) as zf:
        logger.info('Writing zipfile ' + n['Path'])
        writeZipfile(n['Path'], zf)
    
    with open(n['Zipfile'], 'rb') as file:
        expected_fullhash = TreeHash()
        expected_fullhash.update(file.read())
        expected_fullhash_value = expected_fullhash.hexdigest()
        logger.debug('File: ' + n['Zipfile'] + '. Expected full file hash is ' + str(expected_fullhash_value))
        
        file.seek(0)
        #Check for multipart upload in progress
        logger.debug('File: ' + n['Zipfile'] + '. Entering getExistingUploads, passing glacier object and target_vault_name: ' + target_vault_name)
        existing_uploads = getExistingUploads(glacier, target_vault_name)
    
        start_byte = 0
        file_size = 0
        chunksize = 8388608
        fullhash = TreeHash()
        description = '{"Path":"' + n['Zipfile'] + ', "ExpectedTreeHash":"' + expected_fullhash_value + '"}'
        logger.info('File: ' + n['Zipfile'] + '. Starting file upload process. File description: ' + description)
        logger.info('File: ' + n['Zipfile'] + '. Chunksize: ' + str(chunksize))
        
示例#17
0
        if opt['Verbose']: print("## upload id " + upid)
        if upid[0] != '-':
            break

    (rp, scnt, ft, awsids) = (0, 0, open(fn, 'rb'),
                              awstmp + ' --upload-id {}'.format(upid))
    for x in range(0, fs, csize):
        rs = ft.read(csize)
        (fl, fp) = (len(rs), opt['TempFile'].format(fcnt, scnt))
        with open(fp, 'wb') as fw:
            fw.write(rs)
        mp = TreeHash()
        mp.update(rs)

        (s,r)=cmd(awsids.format('upload-multipart-part')+' --body \'{}\''.format(fp)+\
                  ' --range \"bytes {}-{}/*\" --checksum \"{}\"'.format(rp,rp+fl-1,mp.hexdigest()))
        if s != 0: errorexit(r)
        if opt['Verbose']:
            print('##  done part {:2}, {:6.2f} GB ({:12} b)'.format(
                scnt, fl / gbyte, fl),
                  flush=True)
        elif scnt == 0:
            print('##  done part {:2}'.format(scnt), end='', flush=True)
        else:
            print(' {:2}'.format(scnt), end='', flush=True)
        (rp, scnt) = (rp + fl, scnt + 1)
        os.remove(fp)
    ft.close()

    print('')
    (s, r) = cmd(
示例#18
0
def main():

    # Parse command line options
    parser = argparse.ArgumentParser()
    parser.add_argument("--file", help="File to upload", required=True)
    parser.add_argument("--size", help="Chunk size in bytes", required=True)
    parser.add_argument("--account", help="Account ID", default="-")
    parser.add_argument("--vault", help="Vault Name", required=True)
    args = parser.parse_args()

    in_file = open(args.file, "rb")
    in_file_size = os.path.getsize(args.file)
    #in_file_sha256 = hashlib.sha256(open(args.file, "rb").read()).hexdigest()

    treehash = TreeHash(algo=hashlib.sha256)
    with open(args.file, "rb") as treehash_input:
        while True:
            data = treehash_input.read(hash_chunks)
            treehash.update(data)
            if len(data) < hash_chunks:
                break
    in_file_tree_sha256 = treehash.hexdigest()

    chunk_size = int(args.size)
    if chunk_size < min_chunk_size:
        print("Supplied chunk size (" + args.size + ") is less than minimum. Setting chunk size to " + str(min_chunk_size))
        chunk_size = min_chunk_size

    glacier = boto3.resource('glacier')
    client = boto3.client('glacier')
    #

    multiupload_request = client.initiate_multipart_upload(
        vaultName=args.vault,
        archiveDescription=args.file,
        partSize=str(chunk_size)
    )

    multipart_upload = glacier.MultipartUpload(args.account, args.vault, multiupload_request['uploadId'])

    print("MultiUpload ID: " + multiupload_request['uploadId'])
    print("Size: " + str(in_file_size))
    #print("Hash: " + in_file_sha256)
    print("Tree Hash: " + in_file_tree_sha256)

    position = 0
    chunk = in_file.read(chunk_size)
    while chunk:
        print("Length: " + str(len(chunk)))
        print("Current range: bytes " + str(position) + "-" + str(position + len(chunk) - 1) + "/*")
        response = multipart_upload.upload_part(
                range="bytes " + str(position) + "-" + str(position + len(chunk) - 1) + "/*",
                body=chunk
        )
        print("Uploaded Checksum: " + response['checksum'])

        position += len(chunk)
        chunk = in_file.read(chunk_size)

    response = multipart_upload.complete(
        archiveSize=str(in_file_size),
        checksum=in_file_tree_sha256
    )

    print("Upload Complete.")
    print("Location: " + response['location'])
    print("Checksum: " + response['checksum'])
    print("Archive ID: " + response['archiveId'])

    print("Account ID: " + multipart_upload.account_id)
    print("Vault name: " + multipart_upload.vault_name)
    print("ID: " + multipart_upload.id)