def raw_sha256_crypt(secret, salt, rounds): "perform raw sha256-crypt; returns encoded checksum, normalized salt & rounds" #run common crypt routine result, salt, rounds = raw_sha_crypt(secret, salt, rounds, sha256) out = h64.encode_transposed_bytes(result, _256_offsets) assert len(out) == 43, "wrong length: %r" % (out,) return out, salt, rounds
def raw_sha256_crypt(secret, salt, rounds): "perform raw sha256-crypt; returns encoded checksum, normalized salt & rounds" #run common crypt routine result, salt, rounds = raw_sha_crypt(secret, salt, rounds, sha256) out = h64.encode_transposed_bytes(result, _256_offsets) assert len(out) == 43, "wrong length: %r" % (out, ) return out, salt, rounds
def _calc_checksum_builtin(self, secret): if isinstance(secret, unicode): secret = secret.encode("utf-8") rounds = self.rounds #NOTE: this uses a different format than the hash... result = u"%s$sha1$%s" % (self.salt, rounds) result = result.encode("ascii") r = 0 while r < rounds: result = hmac_sha1(secret, result) r += 1 return h64.encode_transposed_bytes(result, self._chk_offsets).decode("ascii")
def _calc_checksum_builtin(self, secret): if isinstance(secret, unicode): secret = secret.encode("utf-8") if _BNULL in secret: raise uh.exc.NullPasswordError(self) rounds = self.rounds # NOTE: this seed value is NOT the same as the config string result = (u("%s$sha1$%s") % (self.salt, rounds)).encode("ascii") # NOTE: this algorithm is essentially PBKDF1, modified to use HMAC. keyed_hmac = get_keyed_prf("hmac-sha1", secret)[0] for _ in irange(rounds): result = keyed_hmac(result) return h64.encode_transposed_bytes(result, self._chk_offsets).decode("ascii")
def _raw_md5_crypt(pwd, salt, use_apr=False): """perform raw md5-crypt calculation this function provides a pure-python implementation of the internals for the MD5-Crypt algorithms; it doesn't handle any of the parsing/validation of the hash strings themselves. :arg pwd: password chars/bytes to encrypt :arg salt: salt chars to use :arg use_apr: use apache variant :returns: encoded checksum chars """ # NOTE: regarding 'apr' format: # really, apache? you had to invent a whole new "$apr1$" format, # when all you did was change the ident incorporated into the hash? # would love to find webpage explaining why just using a portable # implementation of $1$ wasn't sufficient. *nothing else* was changed. #=================================================================== # init & validate inputs #=================================================================== # validate secret # XXX: not sure what official unicode policy is, using this as default if isinstance(pwd, unicode): pwd = pwd.encode("utf-8") assert isinstance(pwd, bytes), "pwd not unicode or bytes" if _BNULL in pwd: raise uh.exc.NullPasswordError(md5_crypt) pwd_len = len(pwd) # validate salt - should have been taken care of by caller assert isinstance(salt, unicode), "salt not unicode" salt = salt.encode("ascii") assert len(salt) < 9, "salt too large" # NOTE: spec says salts larger than 8 bytes should be truncated, # instead of causing an error. this function assumes that's been # taken care of by the handler class. # load APR specific constants if use_apr: magic = _APR_MAGIC else: magic = _MD5_MAGIC #=================================================================== # digest B - used as subinput to digest A #=================================================================== db = md5(pwd + salt + pwd).digest() #=================================================================== # digest A - used to initialize first round of digest C #=================================================================== # start out with pwd + magic + salt a_ctx = md5(pwd + magic + salt) a_ctx_update = a_ctx.update # add pwd_len bytes of b, repeating b as many times as needed. a_ctx_update(repeat_string(db, pwd_len)) # add null chars & first char of password # NOTE: this may have historically been a bug, # where they meant to use db[0] instead of B_NULL, # but the original code memclear'ed db, # and now all implementations have to use this. i = pwd_len evenchar = pwd[:1] while i: a_ctx_update(_BNULL if i & 1 else evenchar) i >>= 1 # finish A da = a_ctx.digest() #=================================================================== # digest C - for a 1000 rounds, combine A, S, and P # digests in various ways; in order to burn CPU time. #=================================================================== # NOTE: the original MD5-Crypt implementation performs the C digest # calculation using the following loop: # ##dc = da ##i = 0 ##while i < rounds: ## tmp_ctx = md5(pwd if i & 1 else dc) ## if i % 3: ## tmp_ctx.update(salt) ## if i % 7: ## tmp_ctx.update(pwd) ## tmp_ctx.update(dc if i & 1 else pwd) ## dc = tmp_ctx.digest() ## i += 1 # # The code Passlib uses (below) implements an equivalent algorithm, # it's just been heavily optimized to pre-calculate a large number # of things beforehand. It works off of a couple of observations # about the original algorithm: # # 1. each round is a combination of 'dc', 'salt', and 'pwd'; and the exact # combination is determined by whether 'i' a multiple of 2,3, and/or 7. # 2. since lcm(2,3,7)==42, the series of combinations will repeat # every 42 rounds. # 3. even rounds 0-40 consist of 'hash(dc + round-specific-constant)'; # while odd rounds 1-41 consist of hash(round-specific-constant + dc) # # Using these observations, the following code... # * calculates the round-specific combination of salt & pwd for each round 0-41 # * runs through as many 42-round blocks as possible (23) # * runs through as many pairs of rounds as needed for remaining rounds (17) # * this results in the required 42*23+2*17=1000 rounds required by md5_crypt. # # this cuts out a lot of the control overhead incurred when running the # original loop 1000 times in python, resulting in ~20% increase in # speed under CPython (though still 2x slower than glibc crypt) # prepare the 6 combinations of pwd & salt which are needed # (order of 'perms' must match how _c_digest_offsets was generated) pwd_pwd = pwd + pwd pwd_salt = pwd + salt perms = [ pwd, pwd_pwd, pwd_salt, pwd_salt + pwd, salt + pwd, salt + pwd_pwd ] # build up list of even-round & odd-round constants, # and store in 21-element list as (even,odd) pairs. data = [(perms[even], perms[odd]) for even, odd in _c_digest_offsets] # perform 23 blocks of 42 rounds each (for a total of 966 rounds) dc = da blocks = 23 while blocks: for even, odd in data: dc = md5(odd + md5(dc + even).digest()).digest() blocks -= 1 # perform 17 more pairs of rounds (34 more rounds, for a total of 1000) for even, odd in data[:17]: dc = md5(odd + md5(dc + even).digest()).digest() #=================================================================== # encode digest using appropriate transpose map #=================================================================== return h64.encode_transposed_bytes(dc, _transpose_map).decode("ascii")
def raw_sun_md5_crypt(secret, rounds, salt): "given secret & salt, return encoded sun-md5-crypt checksum" global MAGIC_HAMLET assert isinstance(secret, bytes) assert isinstance(salt, bytes) # validate rounds if rounds <= 0: rounds = 0 real_rounds = 4096 + rounds # NOTE: spec seems to imply max 'rounds' is 2**32-1 # generate initial digest to start off round 0. # NOTE: algorithm 'salt' includes full config string w/ trailing "$" result = md5(secret + salt).digest() assert len(result) == 16 # NOTE: many things in this function have been inlined (to speed up the loop # as much as possible), to the point that this code barely resembles # the algorithm as described in the docs. in particular: # # * all accesses to a given bit have been inlined using the formula # rbitval(bit) = (rval((bit>>3) & 15) >> (bit & 7)) & 1 # # * the calculation of coinflip value R has been inlined # # * the conditional division of coinflip value V has been inlined as # a shift right of 0 or 1. # # * the i, i+3, etc iterations are precalculated in lists. # # * the round-based conditional division of x & y is now performed # by choosing an appropriate precalculated list, so that it only # calculates the 7 bits which will actually be used. # X_ROUNDS_0, X_ROUNDS_1, Y_ROUNDS_0, Y_ROUNDS_1 = _XY_ROUNDS # NOTE: % appears to be *slightly* slower than &, so we prefer & if possible round = 0 while round < real_rounds: # convert last result byte string to list of byte-ints for easy access rval = [byte_elem_value(c) for c in result].__getitem__ # build up X bit by bit x = 0 xrounds = X_ROUNDS_1 if (rval((round >> 3) & 15) >> (round & 7)) & 1 else X_ROUNDS_0 for i, ia, ib in xrounds: a = rval(ia) b = rval(ib) v = rval((a >> (b % 5)) & 15) >> ((b >> (a & 7)) & 1) x |= ((rval((v >> 3) & 15) >> (v & 7)) & 1) << i # build up Y bit by bit y = 0 yrounds = Y_ROUNDS_1 if (rval(((round + 64) >> 3) & 15) >> (round & 7)) & 1 else Y_ROUNDS_0 for i, ia, ib in yrounds: a = rval(ia) b = rval(ib) v = rval((a >> (b % 5)) & 15) >> ((b >> (a & 7)) & 1) y |= ((rval((v >> 3) & 15) >> (v & 7)) & 1) << i # extract x'th and y'th bit, xoring them together to yeild "coin flip" coin = ((rval(x >> 3) >> (x & 7)) ^ (rval(y >> 3) >> (y & 7))) & 1 # construct hash for this round h = md5(result) if coin: h.update(MAGIC_HAMLET) h.update(unicode(round).encode("ascii")) result = h.digest() round += 1 # encode output return h64.encode_transposed_bytes(result, _chk_offsets)
def raw_sun_md5_crypt(secret, rounds, salt): """given secret & salt, return encoded sun-md5-crypt checksum""" global MAGIC_HAMLET assert isinstance(secret, bytes) assert isinstance(salt, bytes) # validate rounds if rounds <= 0: rounds = 0 real_rounds = 4096 + rounds # NOTE: spec seems to imply max 'rounds' is 2**32-1 # generate initial digest to start off round 0. # NOTE: algorithm 'salt' includes full config string w/ trailing "$" result = md5(secret + salt).digest() assert len(result) == 16 # NOTE: many things in this function have been inlined (to speed up the loop # as much as possible), to the point that this code barely resembles # the algorithm as described in the docs. in particular: # # * all accesses to a given bit have been inlined using the formula # rbitval(bit) = (rval((bit>>3) & 15) >> (bit & 7)) & 1 # # * the calculation of coinflip value R has been inlined # # * the conditional division of coinflip value V has been inlined as # a shift right of 0 or 1. # # * the i, i+3, etc iterations are precalculated in lists. # # * the round-based conditional division of x & y is now performed # by choosing an appropriate precalculated list, so that it only # calculates the 7 bits which will actually be used. # X_ROUNDS_0, X_ROUNDS_1, Y_ROUNDS_0, Y_ROUNDS_1 = _XY_ROUNDS # NOTE: % appears to be *slightly* slower than &, so we prefer & if possible round = 0 while round < real_rounds: # convert last result byte string to list of byte-ints for easy access rval = [ byte_elem_value(c) for c in result ].__getitem__ # build up X bit by bit x = 0 xrounds = X_ROUNDS_1 if (rval((round>>3) & 15)>>(round & 7)) & 1 else X_ROUNDS_0 for i, ia, ib in xrounds: a = rval(ia) b = rval(ib) v = rval((a >> (b % 5)) & 15) >> ((b>>(a&7)) & 1) x |= ((rval((v>>3)&15)>>(v&7))&1) << i # build up Y bit by bit y = 0 yrounds = Y_ROUNDS_1 if (rval(((round+64)>>3) & 15)>>(round & 7)) & 1 else Y_ROUNDS_0 for i, ia, ib in yrounds: a = rval(ia) b = rval(ib) v = rval((a >> (b % 5)) & 15) >> ((b>>(a&7)) & 1) y |= ((rval((v>>3)&15)>>(v&7))&1) << i # extract x'th and y'th bit, xoring them together to yeild "coin flip" coin = ((rval(x>>3) >> (x&7)) ^ (rval(y>>3) >> (y&7))) & 1 # construct hash for this round h = md5(result) if coin: h.update(MAGIC_HAMLET) h.update(unicode(round).encode("ascii")) result = h.digest() round += 1 # encode output return h64.encode_transposed_bytes(result, _chk_offsets)
def _raw_sha2_crypt(pwd, salt, rounds, use_512=False): """perform raw sha256-crypt / sha512-crypt this function provides a pure-python implementation of the internals for the SHA256-Crypt and SHA512-Crypt algorithms; it doesn't handle any of the parsing/validation of the hash strings themselves. :arg pwd: password chars/bytes to encrypt :arg salt: salt chars to use :arg rounds: linear rounds cost :arg use_512: use sha512-crypt instead of sha256-crypt mode :returns: encoded checksum chars """ #=================================================================== # init & validate inputs #=================================================================== # NOTE: the setup portion of this algorithm scales ~linearly in time # with the size of the password, making it vulnerable to a DOS from # unreasonably large inputs. the following code has some optimizations # which would make things even worse, using O(pwd_len**2) memory # when calculating digest P. # # to mitigate these two issues: 1) this code switches to a # O(pwd_len)-memory algorithm for passwords that are much larger # than average, and 2) Passlib enforces a library-wide max limit on # the size of passwords it will allow, to prevent this algorithm and # others from being DOSed in this way (see passlib.exc.PasswordSizeError # for details). # validate secret if isinstance(pwd, unicode): # XXX: not sure what official unicode policy is, using this as default pwd = pwd.encode("utf-8") assert isinstance(pwd, bytes) if _BNULL in pwd: raise uh.exc.NullPasswordError(sha512_crypt if use_512 else sha256_crypt) pwd_len = len(pwd) # validate rounds assert 1000 <= rounds <= 999999999, "invalid rounds" # NOTE: spec says out-of-range rounds should be clipped, instead of # causing an error. this function assumes that's been taken care of # by the handler class. # validate salt assert isinstance(salt, unicode), "salt not unicode" salt = salt.encode("ascii") salt_len = len(salt) assert salt_len < 17, "salt too large" # NOTE: spec says salts larger than 16 bytes should be truncated, # instead of causing an error. this function assumes that's been # taken care of by the handler class. # load sha256/512 specific constants if use_512: hash_const = hashlib.sha512 hash_len = 64 transpose_map = _512_transpose_map else: hash_const = hashlib.sha256 hash_len = 32 transpose_map = _256_transpose_map #=================================================================== # digest B - used as subinput to digest A #=================================================================== db = hash_const(pwd + salt + pwd).digest() #=================================================================== # digest A - used to initialize first round of digest C #=================================================================== # start out with pwd + salt a_ctx = hash_const(pwd + salt) a_ctx_update = a_ctx.update # add pwd_len bytes of b, repeating b as many times as needed. a_ctx_update(repeat_string(db, pwd_len)) # for each bit in pwd_len: add b if it's 1, or pwd if it's 0 i = pwd_len while i: a_ctx_update(db if i & 1 else pwd) i >>= 1 # finish A da = a_ctx.digest() #=================================================================== # digest P from password - used instead of password itself # when calculating digest C. #=================================================================== if pwd_len < 96: # this method is faster under python, but uses O(pwd_len**2) memory; # so we don't use it for larger passwords to avoid a potential DOS. dp = repeat_string(hash_const(pwd * pwd_len).digest(), pwd_len) else: # this method is slower under python, but uses a fixed amount of memory. tmp_ctx = hash_const(pwd) tmp_ctx_update = tmp_ctx.update i = pwd_len-1 while i: tmp_ctx_update(pwd) i -= 1 dp = repeat_string(tmp_ctx.digest(), pwd_len) assert len(dp) == pwd_len #=================================================================== # digest S - used instead of salt itself when calculating digest C #=================================================================== ds = hash_const(salt * (16 + byte_elem_value(da[0]))).digest()[:salt_len] assert len(ds) == salt_len, "salt_len somehow > hash_len!" #=================================================================== # digest C - for a variable number of rounds, combine A, S, and P # digests in various ways; in order to burn CPU time. #=================================================================== # NOTE: the original SHA256/512-Crypt specification performs the C digest # calculation using the following loop: # ##dc = da ##i = 0 ##while i < rounds: ## tmp_ctx = hash_const(dp if i & 1 else dc) ## if i % 3: ## tmp_ctx.update(ds) ## if i % 7: ## tmp_ctx.update(dp) ## tmp_ctx.update(dc if i & 1 else dp) ## dc = tmp_ctx.digest() ## i += 1 # # The code Passlib uses (below) implements an equivalent algorithm, # it's just been heavily optimized to pre-calculate a large number # of things beforehand. It works off of a couple of observations # about the original algorithm: # # 1. each round is a combination of 'dc', 'ds', and 'dp'; determined # by the whether 'i' a multiple of 2,3, and/or 7. # 2. since lcm(2,3,7)==42, the series of combinations will repeat # every 42 rounds. # 3. even rounds 0-40 consist of 'hash(dc + round-specific-constant)'; # while odd rounds 1-41 consist of hash(round-specific-constant + dc) # # Using these observations, the following code... # * calculates the round-specific combination of ds & dp for each round 0-41 # * runs through as many 42-round blocks as possible # * runs through as many pairs of rounds as possible for remaining rounds # * performs once last round if the total rounds should be odd. # # this cuts out a lot of the control overhead incurred when running the # original loop 40,000+ times in python, resulting in ~20% increase in # speed under CPython (though still 2x slower than glibc crypt) # prepare the 6 combinations of ds & dp which are needed # (order of 'perms' must match how _c_digest_offsets was generated) dp_dp = dp+dp dp_ds = dp+ds perms = [dp, dp_dp, dp_ds, dp_ds+dp, ds+dp, ds+dp_dp] # build up list of even-round & odd-round constants, # and store in 21-element list as (even,odd) pairs. data = [ (perms[even], perms[odd]) for even, odd in _c_digest_offsets] # perform as many full 42-round blocks as possible dc = da blocks, tail = divmod(rounds, 42) while blocks: for even, odd in data: dc = hash_const(odd + hash_const(dc + even).digest()).digest() blocks -= 1 # perform any leftover rounds if tail: # perform any pairs of rounds pairs = tail>>1 for even, odd in data[:pairs]: dc = hash_const(odd + hash_const(dc + even).digest()).digest() # if rounds was odd, do one last round (since we started at 0, # last round will be an even-numbered round) if tail & 1: dc = hash_const(dc + data[pairs][0]).digest() #=================================================================== # encode digest using appropriate transpose map #=================================================================== return h64.encode_transposed_bytes(dc, transpose_map).decode("ascii")
def test_encode_transposed_bytes(self): for result, input, offsets in self.encode_transposed + self.encode_transposed_dups: tmp = h64.encode_transposed_bytes(input, offsets) out = h64.decode_bytes(tmp) self.assertEqual(out, result)
def raw_md5_crypt(secret, salt, apr=False): """perform raw md5-crypt calculation :arg secret: password, bytes or unicode (encoded to utf-8) :arg salt: salt portion of hash, bytes or unicode (encoded to ascii), clipped to max 8 bytes. :param apr: flag to use apache variant :returns: encoded checksum as unicode """ #NOTE: regarding 'apr' format: # really, apache? you had to invent a whole new "$apr1$" format, # when all you did was change the ident incorporated into the hash? # would love to find webpage explaining why just using a portable # implementation of $1$ wasn't sufficient. *nothing* else was changed. #validate secret #FIXME: can't find definitive policy on how md5-crypt handles non-ascii. if isinstance(secret, unicode): secret = secret.encode("utf-8") #validate salt if isinstance(salt, unicode): salt = salt.encode("ascii") if len(salt) > 8: salt = salt[:8] #primary hash = secret+id+salt+... h = md5(secret) h.update(B_APR_MAGIC if apr else B_MD5_MAGIC) h.update(salt) # primary hash - add len(secret) chars of tmp hash, # where temp hash is md5(secret+salt+secret) tmp = md5(secret + salt + secret).digest() assert len(tmp) == 16 slen = len(secret) h.update(tmp * (slen // 16) + tmp[:slen % 16]) # primary hash - add null chars & first char of secret !?! # # this may have historically been a bug, # where they meant to use tmp[0] instead of '\x00', # but the code memclear'ed the buffer, # and now all implementations have to use this. # # sha-crypt replaced this step with # something more useful, anyways idx = len(secret) evenchar = secret[:1] while idx > 0: h.update(B_NULL if idx & 1 else evenchar) idx >>= 1 result = h.digest() #next: # do 1000 rounds of md5 to make things harder. # each round we do digest of round-specific content, # where content is formed from concatenation of... # secret if round % 2 else result # salt if round % 3 # secret if round % 7 # result if round % 2 else secret # #NOTE: # instead of doing this directly, this implementation # pre-computes all the combinations of strings & md5 hash objects # that will be needed, in order to perform round operations as fast as possible # (so that each round consists of one hash create/copy + 1 update + 1 digest) # #TODO: might be able to optimize even further by removing need for tests, since # if/then pattern is easily predicatble - # pattern is 7-0-1-0-3-0 (where 1 bit = mult 2, 2 bit = mult 3, 3 bit = mult 7) secret_secret = secret * 2 salt_secret = salt + secret salt_secret_secret = salt + secret * 2 secret_hash = md5(secret).copy secret_secret_hash = md5(secret_secret).copy secret_salt_hash = md5(secret + salt).copy secret_salt_secret_hash = md5(secret + salt_secret).copy for idx in xrange(1000): if idx & 1: if idx % 3: if idx % 7: h = secret_salt_secret_hash() else: h = secret_salt_hash() elif idx % 7: h = secret_secret_hash() else: h = secret_hash() h.update(result) else: h = md5(result) if idx % 3: if idx % 7: h.update(salt_secret_secret) else: h.update(salt_secret) elif idx % 7: h.update(secret_secret) else: h.update(secret) result = h.digest() #encode resulting hash return h64.encode_transposed_bytes(result, _chk_offsets).decode("ascii")
def _raw_sha2_crypt(pwd, salt, rounds, use_512=False): """perform raw sha256-crypt / sha512-crypt this function provides a pure-python implementation of the internals for the SHA256-Crypt and SHA512-Crypt algorithms; it doesn't handle any of the parsing/validation of the hash strings themselves. :arg pwd: password chars/bytes to encrypt :arg salt: salt chars to use :arg rounds: linear rounds cost :arg use_512: use sha512-crypt instead of sha256-crypt mode :returns: encoded checksum chars """ #=================================================================== # init & validate inputs #=================================================================== # validate secret if isinstance(pwd, unicode): # XXX: not sure what official unicode policy is, using this as default pwd = pwd.encode("utf-8") assert isinstance(pwd, bytes) if _BNULL in pwd: raise uh.exc.NullPasswordError( sha512_crypt if use_512 else sha256_crypt) pwd_len = len(pwd) # validate rounds assert 1000 <= rounds <= 999999999, "invalid rounds" # NOTE: spec says out-of-range rounds should be clipped, instead of # causing an error. this function assumes that's been taken care of # by the handler class. # validate salt assert isinstance(salt, unicode), "salt not unicode" salt = salt.encode("ascii") salt_len = len(salt) assert salt_len < 17, "salt too large" # NOTE: spec says salts larger than 16 bytes should be truncated, # instead of causing an error. this function assumes that's been # taken care of by the handler class. # load sha256/512 specific constants if use_512: hash_const = hashlib.sha512 hash_len = 64 transpose_map = _512_transpose_map else: hash_const = hashlib.sha256 hash_len = 32 transpose_map = _256_transpose_map #=================================================================== # digest B - used as subinput to digest A #=================================================================== db = hash_const(pwd + salt + pwd).digest() #=================================================================== # digest A - used to initialize first round of digest C #=================================================================== # start out with pwd + salt a_ctx = hash_const(pwd + salt) a_ctx_update = a_ctx.update # add pwd_len bytes of b, repeating b as many times as needed. a_ctx_update(repeat_string(db, pwd_len)) # for each bit in pwd_len: add b if it's 1, or pwd if it's 0 i = pwd_len while i: a_ctx_update(db if i & 1 else pwd) i >>= 1 # finish A da = a_ctx.digest() #=================================================================== # digest P from password - used instead of password itself # when calculating digest C. #=================================================================== if pwd_len < 64: # method this is faster under python, but uses O(pwd_len**2) memory # so we don't use it for larger passwords, to avoid a potential DOS. dp = repeat_string(hash_const(pwd * pwd_len).digest(), pwd_len) else: tmp_ctx = hash_const(pwd) tmp_ctx_update = tmp_ctx.update i = pwd_len - 1 while i: tmp_ctx_update(pwd) i -= 1 dp = repeat_string(tmp_ctx.digest(), pwd_len) assert len(dp) == pwd_len #=================================================================== # digest S - used instead of salt itself when calculating digest C #=================================================================== ds = hash_const(salt * (16 + byte_elem_value(da[0]))).digest()[:salt_len] assert len(ds) == salt_len, "salt_len somehow > hash_len!" #=================================================================== # digest C - for a variable number of rounds, combine A, S, and P # digests in various ways; in order to burn CPU time. #=================================================================== # NOTE: the original SHA256/512-Crypt specification performs the C digest # calculation using the following loop: # ##dc = da ##i = 0 ##while i < rounds: ## tmp_ctx = hash_const(dp if i & 1 else dc) ## if i % 3: ## tmp_ctx.update(ds) ## if i % 7: ## tmp_ctx.update(dp) ## tmp_ctx.update(dc if i & 1 else dp) ## dc = tmp_ctx.digest() ## i += 1 # # The code Passlib uses (below) implements an equivalent algorithm, # it's just been heavily optimized to pre-calculate a large number # of things beforehand. It works off of a couple of observations # about the original algorithm: # # 1. each round is a combination of 'dc', 'ds', and 'dp'; determined # by the whether 'i' a multiple of 2,3, and/or 7. # 2. since lcm(2,3,7)==42, the series of combinations will repeat # every 42 rounds. # 3. even rounds 0-40 consist of 'hash(dc + round-specific-constant)'; # while odd rounds 1-41 consist of hash(round-specific-constant + dc) # # Using these observations, the following code... # * calculates the round-specific combination of ds & dp for each round 0-41 # * runs through as many 42-round blocks as possible # * runs through as many pairs of rounds as possible for remaining rounds # * performs once last round if the total rounds should be odd. # # this cuts out a lot of the control overhead incurred when running the # original loop 40,000+ times in python, resulting in ~20% increase in # speed under CPython (though still 2x slower than glibc crypt) # prepare the 6 combinations of ds & dp which are needed # (order of 'perms' must match how _c_digest_offsets was generated) dp_dp = dp + dp dp_ds = dp + ds perms = [dp, dp_dp, dp_ds, dp_ds + dp, ds + dp, ds + dp_dp] # build up list of even-round & odd-round constants, # and store in 21-element list as (even,odd) pairs. data = [(perms[even], perms[odd]) for even, odd in _c_digest_offsets] # perform as many full 42-round blocks as possible dc = da blocks, tail = divmod(rounds, 42) while blocks: for even, odd in data: dc = hash_const(odd + hash_const(dc + even).digest()).digest() blocks -= 1 # perform any leftover rounds if tail: # perform any pairs of rounds pairs = tail >> 1 for even, odd in data[:pairs]: dc = hash_const(odd + hash_const(dc + even).digest()).digest() # if rounds was odd, do one last round (since we started at 0, # last round will be an even-numbered round) if tail & 1: dc = hash_const(dc + data[pairs][0]).digest() #=================================================================== # encode digest using appropriate transpose map #=================================================================== return h64.encode_transposed_bytes(dc, transpose_map).decode("ascii")
def _raw_md5_crypt(pwd, salt, use_apr=False): """perform raw md5-crypt calculation this function provides a pure-python implementation of the internals for the MD5-Crypt algorithms; it doesn't handle any of the parsing/validation of the hash strings themselves. :arg pwd: password chars/bytes to encrypt :arg salt: salt chars to use :arg use_apr: use apache variant :returns: encoded checksum chars """ # NOTE: regarding 'apr' format: # really, apache? you had to invent a whole new "$apr1$" format, # when all you did was change the ident incorporated into the hash? # would love to find webpage explaining why just using a portable # implementation of $1$ wasn't sufficient. *nothing else* was changed. #=================================================================== # init & validate inputs #=================================================================== # validate secret # XXX: not sure what official unicode policy is, using this as default if isinstance(pwd, unicode): pwd = pwd.encode("utf-8") assert isinstance(pwd, bytes), "pwd not unicode or bytes" if _BNULL in pwd: raise uh.exc.NullPasswordError(md5_crypt) pwd_len = len(pwd) # validate salt - should have been taken care of by caller assert isinstance(salt, unicode), "salt not unicode" salt = salt.encode("ascii") assert len(salt) < 9, "salt too large" # NOTE: spec says salts larger than 8 bytes should be truncated, # instead of causing an error. this function assumes that's been # taken care of by the handler class. # load APR specific constants if use_apr: magic = _APR_MAGIC else: magic = _MD5_MAGIC #=================================================================== # digest B - used as subinput to digest A #=================================================================== db = md5(pwd + salt + pwd).digest() #=================================================================== # digest A - used to initialize first round of digest C #=================================================================== # start out with pwd + magic + salt a_ctx = md5(pwd + magic + salt) a_ctx_update = a_ctx.update # add pwd_len bytes of b, repeating b as many times as needed. a_ctx_update(repeat_string(db, pwd_len)) # add null chars & first char of password # NOTE: this may have historically been a bug, # where they meant to use db[0] instead of B_NULL, # but the original code memclear'ed db, # and now all implementations have to use this. i = pwd_len evenchar = pwd[:1] while i: a_ctx_update(_BNULL if i & 1 else evenchar) i >>= 1 # finish A da = a_ctx.digest() #=================================================================== # digest C - for a 1000 rounds, combine A, S, and P # digests in various ways; in order to burn CPU time. #=================================================================== # NOTE: the original MD5-Crypt implementation performs the C digest # calculation using the following loop: # ##dc = da ##i = 0 ##while i < rounds: ## tmp_ctx = md5(pwd if i & 1 else dc) ## if i % 3: ## tmp_ctx.update(salt) ## if i % 7: ## tmp_ctx.update(pwd) ## tmp_ctx.update(dc if i & 1 else pwd) ## dc = tmp_ctx.digest() ## i += 1 # # The code Passlib uses (below) implements an equivalent algorithm, # it's just been heavily optimized to pre-calculate a large number # of things beforehand. It works off of a couple of observations # about the original algorithm: # # 1. each round is a combination of 'dc', 'salt', and 'pwd'; and the exact # combination is determined by whether 'i' a multiple of 2,3, and/or 7. # 2. since lcm(2,3,7)==42, the series of combinations will repeat # every 42 rounds. # 3. even rounds 0-40 consist of 'hash(dc + round-specific-constant)'; # while odd rounds 1-41 consist of hash(round-specific-constant + dc) # # Using these observations, the following code... # * calculates the round-specific combination of salt & pwd for each round 0-41 # * runs through as many 42-round blocks as possible (23) # * runs through as many pairs of rounds as needed for remaining rounds (17) # * this results in the required 42*23+2*17=1000 rounds required by md5_crypt. # # this cuts out a lot of the control overhead incurred when running the # original loop 1000 times in python, resulting in ~20% increase in # speed under CPython (though still 2x slower than glibc crypt) # prepare the 6 combinations of pwd & salt which are needed # (order of 'perms' must match how _c_digest_offsets was generated) pwd_pwd = pwd+pwd pwd_salt = pwd+salt perms = [pwd, pwd_pwd, pwd_salt, pwd_salt+pwd, salt+pwd, salt+pwd_pwd] # build up list of even-round & odd-round constants, # and store in 21-element list as (even,odd) pairs. data = [ (perms[even], perms[odd]) for even, odd in _c_digest_offsets] # perform 23 blocks of 42 rounds each (for a total of 966 rounds) dc = da blocks = 23 while blocks: for even, odd in data: dc = md5(odd + md5(dc + even).digest()).digest() blocks -= 1 # perform 17 more pairs of rounds (34 more rounds, for a total of 1000) for even, odd in data[:17]: dc = md5(odd + md5(dc + even).digest()).digest() #=================================================================== # encode digest using appropriate transpose map #=================================================================== return h64.encode_transposed_bytes(dc, _transpose_map).decode("ascii")
def raw_md5_crypt(secret, salt, apr=False): """perform raw md5-crypt calculation :arg secret: password, bytes or unicode (encoded to utf-8) :arg salt: salt portion of hash, bytes or unicode (encoded to ascii), clipped to max 8 bytes. :param apr: flag to use apache variant :returns: encoded checksum as unicode """ #NOTE: regarding 'apr' format: # really, apache? you had to invent a whole new "$apr1$" format, # when all you did was change the ident incorporated into the hash? # would love to find webpage explaining why just using a portable # implementation of $1$ wasn't sufficient. *nothing* else was changed. #validate secret #FIXME: can't find definitive policy on how md5-crypt handles non-ascii. if isinstance(secret, unicode): secret = secret.encode("utf-8") #validate salt if isinstance(salt, unicode): salt = salt.encode("ascii") if len(salt) > 8: salt = salt[:8] #primary hash = secret+id+salt+... h = md5(secret) h.update(B_APR_MAGIC if apr else B_MD5_MAGIC) h.update(salt) # primary hash - add len(secret) chars of tmp hash, # where temp hash is md5(secret+salt+secret) tmp = md5(secret + salt + secret).digest() assert len(tmp) == 16 slen = len(secret) h.update(tmp * (slen//16) + tmp[:slen % 16]) # primary hash - add null chars & first char of secret !?! # # this may have historically been a bug, # where they meant to use tmp[0] instead of '\x00', # but the code memclear'ed the buffer, # and now all implementations have to use this. # # sha-crypt replaced this step with # something more useful, anyways idx = len(secret) evenchar = secret[:1] while idx > 0: h.update(B_NULL if idx & 1 else evenchar) idx >>= 1 result = h.digest() #next: # do 1000 rounds of md5 to make things harder. # each round we do digest of round-specific content, # where content is formed from concatenation of... # secret if round % 2 else result # salt if round % 3 # secret if round % 7 # result if round % 2 else secret # #NOTE: # instead of doing this directly, this implementation # pre-computes all the combinations of strings & md5 hash objects # that will be needed, in order to perform round operations as fast as possible # (so that each round consists of one hash create/copy + 1 update + 1 digest) # #TODO: might be able to optimize even further by removing need for tests, since # if/then pattern is easily predicatble - # pattern is 7-0-1-0-3-0 (where 1 bit = mult 2, 2 bit = mult 3, 3 bit = mult 7) secret_secret = secret*2 salt_secret = salt+secret salt_secret_secret = salt + secret*2 secret_hash = md5(secret).copy secret_secret_hash = md5(secret_secret).copy secret_salt_hash = md5(secret+salt).copy secret_salt_secret_hash = md5(secret+salt_secret).copy for idx in xrange(1000): if idx & 1: if idx % 3: if idx % 7: h = secret_salt_secret_hash() else: h = secret_salt_hash() elif idx % 7: h = secret_secret_hash() else: h = secret_hash() h.update(result) else: h = md5(result) if idx % 3: if idx % 7: h.update(salt_secret_secret) else: h.update(salt_secret) elif idx % 7: h.update(secret_secret) else: h.update(secret) result = h.digest() #encode resulting hash return h64.encode_transposed_bytes(result, _chk_offsets).decode("ascii")