def main(): print("use git lfs to calculate the sha and save it to index.db") cnt = 0 result = git_obj.execute('git lfs ls-files -l') for line in result.split("\n"): sha, path_str = re.split(r" [*-] ", line) path = pathlib.Path(path_str).absolute() if path.stat().st_size <= 1000: continue file_obj, created = File.get_or_create( path=str(path), defaults={ "st_size": path.stat().st_size, "st_ctime": datetime.datetime.fromtimestamp(path.stat().st_ctime), }) if not file_obj.sha256sum: file_obj.sha256sum = sha file_obj.save() if file_obj.is_del: file_obj.is_del = False file_obj.save() assert file_obj.sha256sum == sha assert file_obj.st_size == path.stat().st_size cnt += 1 if cnt % 100 == 0: print(f" {cnt} handled") print( f"finished {len(File.filter(sha256sum=None))} file still have no sha256sum" )
def create_object(self, obj): file = request.files['file'] file.stream.seek(0, os.SEEK_END) fsize = file.tell() if fsize == 0: raise BadRequest("Uploaded file is empty") sha256 = calc_hash(file.stream, hashlib.sha256(), lambda h: h.hexdigest()) file.stream.seek(0, os.SEEK_SET) fmagic = magic.from_buffer(file.stream.read()) # Create file first so we can add it without worrying about race conditions thanks to get_or_create db_file = File() db_file.file_name = secure_filename(request.files['file'].filename) db_file.file_size = fsize db_file.file_type = fmagic db_file.parents = [] db_file.crc32 = crc32_sum(file.stream) db_file.md5 = calc_hash(file.stream, hashlib.md5(), lambda h: h.hexdigest()) db_file.sha1 = calc_hash(file.stream, hashlib.sha1(), lambda h: h.hexdigest()) db_file.sha256 = sha256 db_file.dhash = sha256 db_file.sha512 = calc_hash(file.stream, hashlib.sha512(), lambda h: h.hexdigest()) db_file.humanhash = Humanhash._humanhash(sha256) db_file.ssdeep = calc_hash(file.stream, ssdeep.Hash(), lambda h: h.digest()) db_file.upload_time = datetime.now() if app_config.malwarecage.enable_maintenance and g.auth_user.login == app_config.malwarecage.admin_login: db_file.upload_time = obj.data.get("upload_time", datetime.now()) db_file, is_file_new = File.get_or_create(db_file, file) return db_file, is_file_new