def pollard_pm1(n, B=10, seed=1234): """Use Pollard's p-1 method to try to extract a factor of n. The returned factor may be a composite number. The search is performed up to a smoothness bound B; if no factor is found, None is returned. The p-1 algorithm is a Monte Carlo method whose outcome can be affected by changing the random seed value. Example usage ============= With the default smoothness bound, this number can't be cracked: >>> pollard_pm1(21477639576571) Increasing the smoothness bound helps: >>> pollard_pm1(21477639576571, 2000) 4410317L References ========== Richard Crandall & Carl Pomerance (2005), "Prime Numbers: A Computational Perspective", Springer, 2nd edition, 236-238 """ from math import log random.seed(seed + B) a = random.randint(2, n-1) for p in sieve.primerange(2, B): e = int(log(B, p)) a = pow(a, p**e, n) g = numbers.gcd(a-1, n) if 1 < g < n: return g else: return None
def pollard_pm1(n, B=10, seed=1234): """Use Pollard's p-1 method to try to extract a factor of n. The returned factor may be a composite number. The search is performed up to a smoothness bound B; if no factor is found, None is returned. The p-1 algorithm is a Monte Carlo method whose outcome can be affected by changing the random seed value. Example usage ============= With the default smoothness bound, this number can't be cracked: >>> pollard_pm1(21477639576571) Increasing the smoothness bound helps: >>> pollard_pm1(21477639576571, 2000) 4410317 References ========== Richard Crandall & Carl Pomerance (2005), "Prime Numbers: A Computational Perspective", Springer, 2nd edition, 236-238 """ from math import log prng = random.Random(seed + B) a = prng.randint(2, n - 1) for p in sieve.primerange(2, B): e = int(log(B, p)) a = pow(a, p**e, n) g = igcd(a - 1, n) if 1 < g < n: return g else: return None
def pollard_pm1(n, B=10, seed=1234): """ Use Pollard's p-1 method to try to extract a nontrivial factor of ``n``. The returned factor may be a composite number. If no factor is found, ``None`` is returned. The search is performed up to a smoothness bound ``B``. Choosing a larger B increases the likelihood of finding a large factor. The p-1 algorithm is a Monte Carlo method whose outcome can be affected by changing the random seed value. Example usage ============= With the default smoothness bound, this number can't be cracked: >>> from sympy.ntheory import pollard_pm1 >>> pollard_pm1(21477639576571) Increasing the smoothness bound helps: >>> pollard_pm1(21477639576571, B=2000) 4410317 References ========== - Richard Crandall & Carl Pomerance (2005), "Prime Numbers: A Computational Perspective", Springer, 2nd edition, 236-238 """ prng = random.Random(seed + B) a = prng.randint(2, n-1) for p in sieve.primerange(2, B): e = int(math.log(B, p)) a = pow(a, p**e, n) g = igcd(a-1, n) if 1 < g < n: return int(g) else: return None
def trial(n, candidates=None): """ Factor n as far as possible through trial division, taking candidate factors from the given list. If no list of candidate factors is given, the prime numbers in the interval [2, sqrt(n)] are used, which guarantees a complete factorization. The returned value is a list [(p1, e1), ...] such that n = p1**e1 * p2**e2 * ... If n could not be completely factored using numbers in the given range, the last p might be composite. Example usage ============= A complete factorization: >>> trial(36960) [(2, 5), (3, 1), (5, 1), (7, 1), (11, 1)] This won't find the factors 7 and 11: >>> trial(36960, [2, 3, 5]) [(2, 5), (3, 1), (5, 1), (77, 1)] """ if n == 1: return [] if candidates is None: candidates = sieve.primerange(2, int(n**0.5)+1) factors = [] for k in candidates: m = multiplicity(k, n) if m != 0: n //= k**m factors = factors + [(k, m)] if isprime(n): return factors + [(int(n), 1)] elif n == 1: return factors return factors + [(int(n), 1)]
def trial(n, candidates=None): """ Factor n as far as possible through trial division, taking candidate factors from the given list. If no list of candidate factors is given, the prime numbers in the interval [2, sqrt(n)] are used, which guarantees a complete factorization. The returned value is a list [(p1, e1), ...] such that n = p1**e1 * p2**e2 * ... If n could not be completely factored using numbers in the given range, the last p might be composite. Example usage ============= A complete factorization: >>> trial(36960) [(2, 5), (3, 1), (5, 1), (7, 1), (11, 1)] This won't find the factors 7 and 11: >>> trial(36960, [2, 3, 5]) [(2, 5), (3, 1), (5, 1), (77, 1)] """ if n == 1: return [] if candidates is None: candidates = sieve.primerange(2, int(n**0.5) + 1) factors = [] for k in candidates: m = multiplicity(k, n) if m != 0: n //= k**m factors = factors + [(k, m)] if isprime(n): return factors + [(int(n), 1)] elif n == 1: return factors return factors + [(int(n), 1)]
def pollard_pm1(n, B=10, seed=1234): """ Use Pollard's p-1 method to try to extract a nontrivial factor of ``n``. The returned factor may be a composite number. If no factor is found, ``None`` is returned. The search is performed up to a smoothness bound ``B``. Choosing a larger B increases the likelihood of finding a large factor. The p-1 algorithm is a Monte Carlo method whose outcome can be affected by changing the random seed value. Example usage ============= With the default smoothness bound, this number can't be cracked: >>> from sympy.ntheory import pollard_pm1 >>> pollard_pm1(21477639576571) Increasing the smoothness bound helps: >>> pollard_pm1(21477639576571, B=2000) 4410317 References ========== - Richard Crandall & Carl Pomerance (2005), "Prime Numbers: A Computational Perspective", Springer, 2nd edition, 236-238 """ prng = random.Random(seed + B) a = prng.randint(2, n - 1) for p in sieve.primerange(2, B): e = int(math.log(B, p)) a = pow(a, p**e, n) g = igcd(a - 1, n) if 1 < g < n: return int(g) else: return None
def factorint(n, limit=None, use_trial=True, use_rho=True, use_pm1=True, verbose=False, visual=None): r""" Given a positive integer ``n``, ``factorint(n)`` returns a dict containing the prime factors of ``n`` as keys and their respective multiplicities as values. For example: >>> from sympy.ntheory import factorint >>> factorint(2000) # 2000 = (2**4) * (5**3) {2: 4, 5: 3} >>> factorint(65537) # This number is prime {65537: 1} For input less than 2, factorint behaves as follows: - ``factorint(1)`` returns the empty factorization, ``{}`` - ``factorint(0)`` returns ``{0:1}`` - ``factorint(-n)`` adds ``-1:1`` to the factors and then factors ``n`` Partial Factorization: If ``limit`` (> 3) is specified, the search is stopped after performing trial division up to (and including) the limit (or taking a corresponding number of rho/p-1 steps). This is useful if one has a large number and only is interested in finding small factors (if any). Note that setting a limit does not prevent larger factors from being found early; it simply means that the largest factor may be composite. Since checking for perfect power is relatively cheap, it is done regardless of the limit setting. This number, for example, has two small factors and a huge semi-prime factor that cannot be reduced easily: >>> from sympy.ntheory import isprime >>> a = 1407633717262338957430697921446883 >>> f = factorint(a, limit=10000) >>> f == {991: 1, 202916782076162456022877024859L: 1, 7: 1} True >>> isprime(max(f)) False This number has a small factor and a residual perfect power whose base is greater than the limit: >>> factorint(3*101**7, limit=5) {3: 1, 101: 7} Visual Factorization: If ``visual`` is set to ``True``, then it will return a visual factorization of the integer. For example: >>> from sympy import pprint >>> pprint(factorint(4200, visual=True)) 3 1 2 1 2 *3 *5 *7 Note that this is achieved by using the evaluate=False flag in Mul and Pow. If you do other manipulations with an expression where evaluate=False, it may evaluate. Therefore, you should use the visual option only for visualization, and use the normal dictionary returned by visual=False if you want to perform operations on the factors. You can easily switch between the two forms by sending them back to factorint: >>> from sympy import Mul, Pow >>> regular = factorint(1764); regular {2: 2, 3: 2, 7: 2} >>> pprint(factorint(regular)) 2 2 2 2 *3 *7 >>> visual = factorint(1764, visual=True); pprint(visual) 2 2 2 2 *3 *7 >>> print factorint(visual) {2: 2, 3: 2, 7: 2} If you want to send a number to be factored in a partially factored form you can do so with a dictionary or unevaluated expression: >>> factorint(factorint({4: 2, 12: 3})) # twice to toggle to dict form {2: 10, 3: 3} >>> factorint(Mul(4, 12, evaluate=False)) {2: 4, 3: 1} The table of the output logic is: ====== ====== ======= ======= Visual ------ ---------------------- Input True False other ====== ====== ======= ======= dict mul dict mul n mul dict dict mul mul dict dict ====== ====== ======= ======= Notes ===== Algorithm: The function switches between multiple algorithms. Trial division quickly finds small factors (of the order 1-5 digits), and finds all large factors if given enough time. The Pollard rho and p-1 algorithms are used to find large factors ahead of time; they will often find factors of the order of 10 digits within a few seconds: >>> factors = factorint(12345678910111213141516) >>> for base, exp in sorted(factors.items()): ... print base, exp ... 2 2 2507191691 1 1231026625769 1 Any of these methods can optionally be disabled with the following boolean parameters: - ``use_trial``: Toggle use of trial division - ``use_rho``: Toggle use of Pollard's rho method - ``use_pm1``: Toggle use of Pollard's p-1 method ``factorint`` also periodically checks if the remaining part is a prime number or a perfect power, and in those cases stops. If ``verbose`` is set to ``True``, detailed progress is printed. See Also ======== smoothness, smoothness_p, divisors """ factordict = {} if visual and not isinstance(n, Mul) and not isinstance(n, dict): factordict = factorint(n, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) elif isinstance(n, Mul): factordict = dict([(int(k), int(v)) for k, v in n.as_powers_dict().items()]) elif isinstance(n, dict): factordict = n if factordict and (isinstance(n, Mul) or isinstance(n, dict)): # check it for k in factordict.keys(): if isprime(k): continue e = factordict.pop(k) d = factorint(k, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) for k, v in d.items(): if k in factordict: factordict[k] += v*e else: factordict[k] = v*e if visual or (type(n) is dict and visual is not True and visual is not False): if factordict == {}: return S.One if -1 in factordict: factordict.pop(-1) args = [S.NegativeOne] else: args = [] args.extend([Pow(*i, evaluate=False) for i in sorted(factordict.items())]) return Mul(*args, evaluate=False) elif isinstance(n, dict) or isinstance(n, Mul): return factordict assert use_trial or use_rho or use_pm1 n = as_int(n) if limit: limit = int(limit) # special cases if n < 0: factors = factorint( -n, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) factors[-1] = 1 return factors if limit: if limit < 2: if n == 1: return {} return {n: 1} elif n < 10: # doing this we are assured of getting a limit > 2 # when we have to compute it later return [{0: 1}, {}, {2: 1}, {3: 1}, {2: 2}, {5: 1}, {2: 1, 3: 1}, {7: 1}, {2: 3}, {3: 2}][n] factors = {} # do simplistic factorization if verbose: sn = str(n) if len(sn) > 50: print 'Factoring %s' % sn[:5] + \ '..(%i other digits)..' % (len(sn) - 10) + sn[-5:] else: print 'Factoring', n if use_trial: # this is the preliminary factorization for small factors small = 2**15 fail_max = 600 small = min(small, limit or small) if verbose: print trial_int_msg % (2, small, fail_max) n, next_p = _factorint_small(factors, n, small, fail_max) else: next_p = 2 if factors and verbose: for k in sorted(factors): print factor_msg % (k, factors[k]) if next_p == 0: if n > 1: factors[int(n)] = 1 if verbose: print complete_msg return factors # continue with more advanced factorization methods # first check if the simplistic run didn't finish # because of the limit and check for a perfect # power before exiting try: if limit and next_p > limit: if verbose: print 'Exceeded limit:', limit _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) if n > 1: factors[int(n)] = 1 return factors else: # Before quitting (or continuing on)... # ...do a Fermat test since it's so easy and we need the # square root anyway. Finding 2 factors is easy if they are # "close enough." This is the big root equivalent of dividing by # 2, 3, 5. sqrt_n = integer_nthroot(n, 2)[0] a = sqrt_n + 1 a2 = a**2 b2 = a2 - n for i in range(3): b, fermat = integer_nthroot(b2, 2) if fermat: break b2 += 2*a + 1 # equiv to (a+1)**2 - n a += 1 if fermat: if verbose: print fermat_msg if limit: limit -= 1 for r in [a - b, a + b]: facs = factorint(r, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose) factors.update(facs) raise StopIteration # ...see if factorization can be terminated _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) except StopIteration: if verbose: print complete_msg return factors # these are the limits for trial division which will # be attempted in parallel with pollard methods low, high = next_p, 2*next_p limit = limit or sqrt_n # add 1 to make sure limit is reached in primerange calls limit += 1 while 1: try: high_ = high if limit < high_: high_ = limit # Trial division if use_trial: if verbose: print trial_msg % (low, high_) ps = sieve.primerange(low, high_) n, found_trial = _trial(factors, n, ps, verbose) if found_trial: _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) else: found_trial = False if high > limit: if verbose: print 'Exceeded limit:', limit if n > 1: factors[int(n)] = 1 raise StopIteration # Only used advanced methods when no small factors were found if not found_trial: if (use_pm1 or use_rho): high_root = max(int(math.log(high_**0.7)), low, 3) # Pollard p-1 if use_pm1: if verbose: print (pm1_msg % (high_root, high_)) c = pollard_pm1(n, B=high_root, seed=high_) if c: # factor it and let _trial do the update ps = factorint(c, limit=limit - 1, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose) n, _ = _trial(factors, n, ps, verbose=False) _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) # Pollard rho if use_rho: max_steps = high_root if verbose: print (rho_msg % (1, max_steps, high_)) c = pollard_rho(n, retries=1, max_steps=max_steps, seed=high_) if c: # factor it and let _trial do the update ps = factorint(c, limit=limit - 1, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose) n, _ = _trial(factors, n, ps, verbose=False) _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) except StopIteration: if verbose: print complete_msg return factors low, high = high, high*2
def pollard_pm1(n, B=10, a=2, retries=0, seed=1234): """ Use Pollard's p-1 method to try to extract a nontrivial factor of ``n``. Either a divisor (perhaps composite) or ``None`` is returned. The value of ``a`` is the base that is used in the test gcd(a**M - 1, n). The default is 2. If ``retries`` > 0 then if no factor is found after the first attempt, a new ``a`` will be generated randomly (using the ``seed``) and the process repeated. Note: the value of M is lcm(1..B) = reduce(ilcm, range(2, B + 1)). A search is made for factors next to even numbers having a power smoothness less than ``B``. Choosing a larger B increases the likelihood of finding a larger factor but takes longer. Whether a factor of n is found or not depends on ``a`` and the power smoothness of the even mumber just less than the factor p (hence the name p - 1). Although some discussion of what constitutes a good ``a`` some descriptions are hard to interpret. At the modular.math site referenced below it is stated that if gcd(a**M - 1, n) = N then a**M % q**r is 1 for every prime power divisor of N. But consider the following: >>> from sympy.ntheory.factor_ import smoothness_p, pollard_pm1 >>> n=257*1009 >>> smoothness_p(n) (-1, [(257, (1, 2, 256)), (1009, (1, 7, 16))]) So we should (and can) find a root with B=16: >>> pollard_pm1(n, B=16, a=3) 1009 If we attempt to increase B to 256 we find that it doesn't work: >>> pollard_pm1(n, B=256) >>> But if the value of ``a`` is changed we find that only multiples of 257 work, e.g.: >>> pollard_pm1(n, B=256, a=257) 1009 Checking different ``a`` values shows that all the ones that didn't work had a gcd value not equal to ``n`` but equal to one of the factors: >>> from sympy.core.numbers import ilcm, igcd >>> from sympy import factorint, Pow >>> M = 1 >>> for i in range(2, 256): ... M = ilcm(M, i) ... >>> set([igcd(pow(a, M, n) - 1, n) for a in range(2, 256) if ... igcd(pow(a, M, n) - 1, n) != n]) set([1009]) But does aM % d for every divisor of n give 1? >>> aM = pow(255, M, n) >>> [(d, aM%Pow(*d.args)) for d in factorint(n, visual=True).args] [(257**1, 1), (1009**1, 1)] No, only one of them. So perhaps the principle is that a root will be found for a given value of B provided that: 1) the power smoothness of the p - 1 value next to the root does not exceed B 2) a**M % p != 1 for any of the divisors of n. By trying more than one ``a`` it is possible that one of them will yield a factor. Examples ======== With the default smoothness bound, this number can't be cracked: >>> from sympy.ntheory import pollard_pm1, primefactors >>> pollard_pm1(21477639576571) Increasing the smoothness bound helps: >>> pollard_pm1(21477639576571, B=2000) 4410317 Looking at the smoothness of the factors of this number we find: >>> from sympy.utilities import flatten >>> from sympy.ntheory.factor_ import smoothness_p, factorint >>> print smoothness_p(21477639576571, visual=1) p**i=4410317**1 has p-1 B=1787, B-pow=1787 p**i=4869863**1 has p-1 B=2434931, B-pow=2434931 The B and B-pow are the same for the p - 1 factorizations of the divisors because those factorizations had a very large prime factor: >>> factorint(4410317 - 1) {2: 2, 617: 1, 1787: 1} >>> factorint(4869863-1) {2: 1, 2434931: 1} Note that until B reaches the B-pow value of 1787, the number is not cracked; >>> pollard_pm1(21477639576571, B=1786) >>> pollard_pm1(21477639576571, B=1787) 4410317 The B value has to do with the factors of the number next to the divisor, not the divisors themselves. A worst case scenario is that the number next to the factor p has a large prime divisisor or is a perfect power. If these conditions apply then the power-smoothness will be about p/2 or p. The more realistic is that there will be a large prime factor next to p requiring a B value on the order of p/2. Although primes may have been searched for up to this level, the p/2 is a factor of p - 1, something that we don't know. The modular.math reference below states that 15% of numbers in the range of 10**15 to 15**15 + 10**4 are 10**6 power smooth so a B of 10**6 will fail 85% of the time in that range. From 10**8 to 10**8 + 10**3 the percentages are nearly reversed...but in that range the simple trial division is quite fast. References ========== - Richard Crandall & Carl Pomerance (2005), "Prime Numbers: A Computational Perspective", Springer, 2nd edition, 236-238 - http://modular.math.washington.edu/edu/2007/spring/ent/ent-html/node81.html - http://www.cs.toronto.edu/~yuvalf/Factorization.pdf """ n = int(n) if n < 4 or B < 3: raise ValueError('pollard_pm1 should receive n > 3 and B > 2') prng = random.Random(seed + B) # computing a**lcm(1,2,3,..B) % n for B > 2 # it looks weird, but it's right: primes run [2, B] # and the answer's not right until the loop is done. for i in range(retries + 1): aM = a for p in sieve.primerange(2, B + 1): e = int(math.log(B, p)) aM = pow(aM, pow(p, e), n) g = igcd(aM - 1, n) if 1 < g < n: return int(g) # get a new a: # since the exponent, lcm(1..B), is even, if we allow 'a' to be 'n-1' # then (n - 1)**even % n will be 1 which will give a g of 0 and 1 will # give a zero, too, so we set the range as [2, n-2]. Some references # say 'a' should be coprime to n, but either will detect factors. a = prng.randint(2, n - 2)
def factorint(n, limit=None, use_trial=True, use_rho=True, use_pm1=True, verbose=False, visual=None): r""" Given a positive integer ``n``, ``factorint(n)`` returns a dict containing the prime factors of ``n`` as keys and their respective multiplicities as values. For example: >>> from sympy.ntheory import factorint >>> factorint(2000) # 2000 = (2**4) * (5**3) {2: 4, 5: 3} >>> factorint(65537) # This number is prime {65537: 1} For input less than 2, factorint behaves as follows: - ``factorint(1)`` returns the empty factorization, ``{}`` - ``factorint(0)`` returns ``{0:1}`` - ``factorint(-n)`` adds ``-1:1`` to the factors and then factors ``n`` Partial Factorization: If ``limit`` (> 3) is specified, the search is stopped after performing trial division up to (and including) the limit (or taking a corresponding number of rho/p-1 steps). This is useful if one has a large number and only is interested in finding small factors (if any). Note that setting a limit does not prevent larger factors from being found early; it simply means that the largest factor may be composite. Since checking for perfect power is relatively cheap, it is done regardless of the limit setting. This number, for example, has two small factors and a huge semi-prime factor that cannot be reduced easily: >>> from sympy.ntheory import isprime >>> a = 1407633717262338957430697921446883 >>> f = factorint(a, limit=10000) >>> f == {991: 1, 202916782076162456022877024859L: 1, 7: 1} True >>> isprime(max(f)) False This number has a small factor and a residual perfect power whose base is greater than the limit: >>> factorint(3*101**7, limit=5) {3: 1, 101: 7} Visual Factorization: If ``visual`` is set to ``True``, then it will return a visual factorization of the integer. For example: >>> from sympy import pprint >>> pprint(factorint(4200, visual=True)) 3 1 2 1 2 *3 *5 *7 Note that this is achieved by using the evaluate=False flag in Mul and Pow. If you do other manipulations with an expression where evaluate=False, it may evaluate. Therefore, you should use the visual option only for visualization, and use the normal dictionary returned by visual=False if you want to perform operations on the factors. You can easily switch between the two forms by sending them back to factorint: >>> from sympy import Mul, Pow >>> regular = factorint(1764); regular {2: 2, 3: 2, 7: 2} >>> pprint(factorint(regular)) 2 2 2 2 *3 *7 >>> visual = factorint(1764, visual=True); pprint(visual) 2 2 2 2 *3 *7 >>> print factorint(visual) {2: 2, 3: 2, 7: 2} If you want to send a number to be factored in a partially factored form you can do so with a dictionary or unevaluated expression: >>> factorint(factorint({4: 2, 12: 3})) # twice to toggle to dict form {2: 10, 3: 3} >>> factorint(Mul(4, 12, **dict(evaluate=False))) {2: 4, 3: 1} The table of the output logic is: ====== ====== ======= ======= Visual ------ ---------------------- Input True False other ====== ====== ======= ======= dict mul dict mul n mul dict dict mul mul dict dict ====== ====== ======= ======= Notes ===== Algorithm: The function switches between multiple algorithms. Trial division quickly finds small factors (of the order 1-5 digits), and finds all large factors if given enough time. The Pollard rho and p-1 algorithms are used to find large factors ahead of time; they will often find factors of the order of 10 digits within a few seconds: >>> factors = factorint(12345678910111213141516) >>> for base, exp in sorted(factors.items()): ... print base, exp ... 2 2 2507191691 1 1231026625769 1 Any of these methods can optionally be disabled with the following boolean parameters: - ``use_trial``: Toggle use of trial division - ``use_rho``: Toggle use of Pollard's rho method - ``use_pm1``: Toggle use of Pollard's p-1 method ``factorint`` also periodically checks if the remaining part is a prime number or a perfect power, and in those cases stops. If ``verbose`` is set to ``True``, detailed progress is printed. See Also ======== smoothness, smoothness_p, divisors """ factordict = {} if visual and not isinstance(n, Mul) and not isinstance(n, dict): factordict = factorint(n, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) elif isinstance(n, Mul): factordict = dict([(int(k), int(v)) for k, v in n.as_powers_dict().items()]) elif isinstance(n, dict): factordict = n if factordict and (isinstance(n, Mul) or isinstance(n, dict)): # check it for k in factordict.keys(): if isprime(k): continue e = factordict.pop(k) d = factorint(k, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) for k, v in d.items(): if k in factordict: factordict[k] += v*e else: factordict[k] = v*e if visual or (type(n) is dict and visual is not True and visual is not False): if factordict == {}: return S.One if -1 in factordict: factordict.pop(-1) args = [S.NegativeOne] else: args = [] args.extend([Pow(*i, **{'evaluate':False}) for i in sorted(factordict.items())]) return Mul(*args, **{'evaluate': False}) elif isinstance(n, dict) or isinstance(n, Mul): return factordict assert use_trial or use_rho or use_pm1 n = as_int(n) if limit: limit = int(limit) # special cases if n < 0: factors = factorint( -n, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) factors[-1] = 1 return factors if limit: if limit < 2: if n == 1: return {} return {n: 1} elif n < 10: # doing this we are assured of getting a limit > 2 # when we have to compute it later return [{0: 1}, {}, {2: 1}, {3: 1}, {2: 2}, {5: 1}, {2: 1, 3: 1}, {7: 1}, {2: 3}, {3: 2}][n] factors = {} # do simplistic factorization if verbose: sn = str(n) if len(sn) > 50: print 'Factoring %s' % sn[:5] + \ '..(%i other digits)..' % (len(sn) - 10) + sn[-5:] else: print 'Factoring', n if use_trial: # this is the preliminary factorization for small factors small = 2**15 fail_max = 600 small = min(small, limit or small) if verbose: print trial_int_msg % (2, small, fail_max) n, next_p = _factorint_small(factors, n, small, fail_max) else: next_p = 2 if factors and verbose: for k in sorted(factors): print factor_msg % (k, factors[k]) if next_p == 0: if n > 1: factors[int(n)] = 1 if verbose: print complete_msg return factors # continue with more advanced factorization methods # first check if the simplistic run didn't finish # because of the limit and check for a perfect # power before exiting try: if limit and next_p > limit: if verbose: print 'Exceeded limit:', limit _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) if n > 1: factors[int(n)] = 1 return factors else: # Before quitting (or continuing on)... # ...do a Fermat test since it's so easy and we need the # square root anyway. Finding 2 factors is easy if they are # "close enough." This is the big root equivalent of dividing by # 2, 3, 5. sqrt_n = integer_nthroot(n, 2)[0] a = sqrt_n + 1 a2 = a**2 b2 = a2 - n for i in range(3): b, fermat = integer_nthroot(b2, 2) if fermat: break b2 += 2*a + 1 # equiv to (a+1)**2 - n a += 1 if fermat: if verbose: print fermat_msg if limit: limit -= 1 for r in [a - b, a + b]: facs = factorint(r, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose) factors.update(facs) raise StopIteration # ...see if factorization can be terminated _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) except StopIteration: if verbose: print complete_msg return factors # these are the limits for trial division which will # be attempted in parallel with pollard methods low, high = next_p, 2*next_p limit = limit or sqrt_n # add 1 to make sure limit is reached in primerange calls limit += 1 while 1: try: high_ = high if limit < high_: high_ = limit # Trial division if use_trial: if verbose: print trial_msg % (low, high_) ps = sieve.primerange(low, high_) n, found_trial = _trial(factors, n, ps, verbose) if found_trial: _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) else: found_trial = False if high > limit: if verbose: print 'Exceeded limit:', limit if n > 1: factors[int(n)] = 1 raise StopIteration # Only used advanced methods when no small factors were found if not found_trial: if (use_pm1 or use_rho): high_root = max(int(math.log(high_**0.7)), low, 3) # Pollard p-1 if use_pm1: if verbose: print (pm1_msg % (high_root, high_)) c = pollard_pm1(n, B=high_root, seed=high_) if c: # factor it and let _trial do the update ps = factorint(c, limit=limit - 1, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose) n, _ = _trial(factors, n, ps, verbose=False) _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) # Pollard rho if use_rho: max_steps = high_root if verbose: print (rho_msg % (1, max_steps, high_)) c = pollard_rho(n, retries=1, max_steps=max_steps, seed=high_) if c: # factor it and let _trial do the update ps = factorint(c, limit=limit - 1, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose) n, _ = _trial(factors, n, ps, verbose=False) _check_termination(factors, n, limit, use_trial, use_rho, use_pm1, verbose) except StopIteration: if verbose: print complete_msg return factors low, high = high, high*2
def pollard_pm1(n, B=10, a=2, retries=0, seed=1234): """ Use Pollard's p-1 method to try to extract a nontrivial factor of ``n``. Either a divisor (perhaps composite) or ``None`` is returned. The value of ``a`` is the base that is used in the test gcd(a**M - 1, n). The default is 2. If ``retries`` > 0 then if no factor is found after the first attempt, a new ``a`` will be generated randomly (using the ``seed``) and the process repeated. Note: the value of M is lcm(1..B) = reduce(ilcm, range(2, B + 1)). A search is made for factors next to even numbers having a power smoothness less than ``B``. Choosing a larger B increases the likelihood of finding a larger factor but takes longer. Whether a factor of n is found or not depends on ``a`` and the power smoothness of the even mumber just less than the factor p (hence the name p - 1). Although some discussion of what constitutes a good ``a`` some descriptions are hard to interpret. At the modular.math site referenced below it is stated that if gcd(a**M - 1, n) = N then a**M % q**r is 1 for every prime power divisor of N. But consider the following: >>> from sympy.ntheory.factor_ import smoothness_p, pollard_pm1 >>> n=257*1009 >>> smoothness_p(n) (-1, [(257, (1, 2, 256)), (1009, (1, 7, 16))]) So we should (and can) find a root with B=16: >>> pollard_pm1(n, B=16, a=3) 1009 If we attempt to increase B to 256 we find that it doesn't work: >>> pollard_pm1(n, B=256) >>> But if the value of ``a`` is changed we find that only multiples of 257 work, e.g.: >>> pollard_pm1(n, B=256, a=257) 1009 Checking different ``a`` values shows that all the ones that didn't work had a gcd value not equal to ``n`` but equal to one of the factors: >>> from sympy.core.numbers import ilcm, igcd >>> from sympy import factorint, Pow >>> M = 1 >>> for i in range(2, 256): ... M = ilcm(M, i) ... >>> set([igcd(pow(a, M, n) - 1, n) for a in range(2, 256) if ... igcd(pow(a, M, n) - 1, n) != n]) set([1009]) But does aM % d for every divisor of n give 1? >>> aM = pow(255, M, n) >>> [(d, aM%Pow(*d.args)) for d in factorint(n, visual=True).args] [(257**1, 1), (1009**1, 1)] No, only one of them. So perhaps the principle is that a root will be found for a given value of B provided that: 1) the power smoothness of the p - 1 value next to the root does not exceed B 2) a**M % p != 1 for any of the divisors of n. By trying more than one ``a`` it is possible that one of them will yield a factor. Examples ======== With the default smoothness bound, this number can't be cracked: >>> from sympy.ntheory import pollard_pm1, primefactors >>> pollard_pm1(21477639576571) Increasing the smoothness bound helps: >>> pollard_pm1(21477639576571, B=2000) 4410317 Looking at the smoothness of the factors of this number we find: >>> from sympy.utilities import flatten >>> from sympy.ntheory.factor_ import smoothness_p, factorint >>> print smoothness_p(21477639576571, visual=1) p**i=4410317**1 has p-1 B=1787, B-pow=1787 p**i=4869863**1 has p-1 B=2434931, B-pow=2434931 The B and B-pow are the same for the p - 1 factorizations of the divisors because those factorizations had a very large prime factor: >>> factorint(4410317 - 1) {2: 2, 617: 1, 1787: 1} >>> factorint(4869863-1) {2: 1, 2434931: 1} Note that until B reaches the B-pow value of 1787, the number is not cracked; >>> pollard_pm1(21477639576571, B=1786) >>> pollard_pm1(21477639576571, B=1787) 4410317 The B value has to do with the factors of the number next to the divisor, not the divisors themselves. A worst case scenario is that the number next to the factor p has a large prime divisisor or is a perfect power. If these conditions apply then the power-smoothness will be about p/2 or p. The more realistic is that there will be a large prime factor next to p requiring a B value on the order of p/2. Although primes may have been searched for up to this level, the p/2 is a factor of p - 1, something that we don't know. The modular.math reference below states that 15% of numbers in the range of 10**15 to 15**15 + 10**4 are 10**6 power smooth so a B of 10**6 will fail 85% of the time in that range. From 10**8 to 10**8 + 10**3 the percentages are nearly reversed...but in that range the simple trial division is quite fast. References ========== - Richard Crandall & Carl Pomerance (2005), "Prime Numbers: A Computational Perspective", Springer, 2nd edition, 236-238 - http://modular.math.washington.edu/edu/2007/spring/ent/ent-html/ node81.html - http://www.cs.toronto.edu/~yuvalf/Factorization.pdf """ n = int(n) if n < 4 or B < 3: raise ValueError('pollard_pm1 should receive n > 3 and B > 2') prng = random.Random(seed + B) # computing a**lcm(1,2,3,..B) % n for B > 2 # it looks weird, but it's right: primes run [2, B] # and the answer's not right until the loop is done. for i in range(retries + 1): aM = a for p in sieve.primerange(2, B + 1): e = int(math.log(B, p)) aM = pow(aM, pow(p, e), n) g = igcd(aM - 1, n) if 1 < g < n: return int(g) # get a new a: # since the exponent, lcm(1..B), is even, if we allow 'a' to be 'n-1' # then (n - 1)**even % n will be 1 which will give a g of 0 and 1 will # give a zero, too, so we set the range as [2, n-2]. Some references # say 'a' should be coprime to n, but either will detect factors. a = prng.randint(2, n - 2)
def factorint(n, limit=None, use_trial=True, use_rho=True, use_pm1=True, verbose=False): """ Given a positive integer ``n``, ``factorint(n)`` returns a dict containing the prime factors of ``n`` as keys and their respective multiplicities as values. For example: >>> factorint(2000) # 2000 = (2**4) * (5**3) {2: 4, 5: 3} >>> factorint(65537) # This number is prime {65537: 1} For input less than 2, factorint behaves as follows: - ``factorint(1)`` returns the empty factorization, ``{}`` - ``factorint(0)`` returns ``{0:1}`` - ``factorint(-n)`` adds ``-1:1`` to the factors and then factors ``n`` Algorithm ========= The function switches between multiple algorithms. Trial division quickly finds small factors (of the order 1-5 digits), and finds all large factors if given enough time. The Pollard rho and p-1 algorithms are used to find large factors ahead of time; they will often find factors of the order of 10 digits within a few seconds: >>> factors = factorint(12345678910111213141516) >>> for base, exp in sorted(factors.items()): ... print base, exp ... 2 2 2507191691 1 1231026625769 1 Any of these methods can optionally be disabled with the following boolean parameters: - ``use_trial``: Toggle use of trial division - ``use_rho``: Toggle use of Pollard's rho method - ``use_pm1``: Toggle use of Pollard's p-1 method ``factorint`` also periodically checks if the remaining part is a prime number or a perfect power, and in those cases stops. Partial factorization ===================== If ``limit`` is specified, the search is stopped after performing trial division up to the limit (or taking a corresponding number of rho/p-1 steps). This is useful if one has a large number and only is interested in finding small factors (if any). Note that setting a limit does not prevent larger factors from being found early; it simply means that any larger factors returned may be composite. This number, for example, has two small factors and a huge semiprime factor that cannot be reduced easily: >>> a = 1407633717262338957430697921446883 >>> f = factorint(a, limit=10000) >>> f == {991: 1, 202916782076162456022877024859L: 1, 7: 1} True >>> isprime(max(f)) False Miscellaneous options ===================== If ``verbose`` is set to ``True``, detailed progress is printed. """ assert use_trial or use_rho or use_pm1 n = int(n) if not n: return {0:1} if n < 0: n = -n factors = {-1:1} else: factors = {} # Power of two t = trailing(n) if t: factors[2] = t n >>= t if n == 1: return factors # It is sufficient to perform trial division up to sqrt(n) try: limit = limit or (int(n**0.5) + 2) except OverflowError: limit = 1e1000 low, high = 3, 250 # Setting to True here forces _check_termination if first round of # trial division fails found_trial_previous = True if verbose and n < 1e300: print "Factoring", n while 1: try: high_ = min(high, limit) # Trial division if use_trial: if verbose: print trial_msg % (low, high_) ps = sieve.primerange(low, high_) n, found_trial = _trial(factors, n, ps, verbose) else: found_trial = False if high > limit: factors[n] = 1 raise StopIteration # Only used advanced (and more expensive) methods as long as # trial division fails to locate small factors if not found_trial: if found_trial_previous: _check_termination(factors, n, verbose) # Pollard p-1 if use_pm1 and not found_trial: B = int(high_**0.7) if verbose: print (pm1_msg % (high_, high_)) ps = factorint(pollard_pm1(n, B=high_, seed=high_) or 1, \ limit=limit, verbose=verbose) n, found_pm1 = _trial(factors, n, ps, verbose) if found_pm1: _check_termination(factors, n, verbose) # Pollard rho if use_rho and not found_trial: max_steps = int(high_**0.7) if verbose: print (rho_msg % (1, max_steps, high_)) ps = factorint(pollard_rho(n, retries=1, max_steps=max_steps, \ seed=high_) or 1, limit=limit, verbose=verbose) n, found_rho = _trial(factors, n, ps, verbose) if found_rho: _check_termination(factors, n, verbose) except StopIteration: return factors found_trial_previous = found_trial low, high = high, high*2
def factorint(n, limit=None, use_trial=True, use_rho=True, use_pm1=True, verbose=False, visual=False): """ Given a positive integer ``n``, ``factorint(n)`` returns a dict containing the prime factors of ``n`` as keys and their respective multiplicities as values. For example: >>> from sympy.ntheory import factorint >>> factorint(2000) # 2000 = (2**4) * (5**3) {2: 4, 5: 3} >>> factorint(65537) # This number is prime {65537: 1} For input less than 2, factorint behaves as follows: - ``factorint(1)`` returns the empty factorization, ``{}`` - ``factorint(0)`` returns ``{0:1}`` - ``factorint(-n)`` adds ``-1:1`` to the factors and then factors ``n`` Algorithm ========= The function switches between multiple algorithms. Trial division quickly finds small factors (of the order 1-5 digits), and finds all large factors if given enough time. The Pollard rho and p-1 algorithms are used to find large factors ahead of time; they will often find factors of the order of 10 digits within a few seconds: >>> factors = factorint(12345678910111213141516) >>> for base, exp in sorted(factors.items()): ... print base, exp ... 2 2 2507191691 1 1231026625769 1 Any of these methods can optionally be disabled with the following boolean parameters: - ``use_trial``: Toggle use of trial division - ``use_rho``: Toggle use of Pollard's rho method - ``use_pm1``: Toggle use of Pollard's p-1 method ``factorint`` also periodically checks if the remaining part is a prime number or a perfect power, and in those cases stops. Partial Factorization ===================== If ``limit`` (> 2) is specified, the search is stopped after performing trial division up to (and including) the limit (or taking a corresponding number of rho/p-1 steps). This is useful if one has a large number and only is interested in finding small factors (if any). Note that setting a limit does not prevent larger factors from being found early; it simply means that the largest factor may be composite. This number, for example, has two small factors and a huge semi-prime factor that cannot be reduced easily: >>> from sympy.ntheory import isprime >>> a = 1407633717262338957430697921446883 >>> f = factorint(a, limit=10000) >>> f == {991: 1, 202916782076162456022877024859L: 1, 7: 1} True >>> isprime(max(f)) False Visual Factorization ==================== If ``visual`` is set to ``True``, then it will return a visual factorization of the integer. For example: >>> from sympy import pprint >>> pprint(factorint(4200, visual=True)) 3 1 2 1 2 *3 *5 *7 Note that this is achieved by using the evaluate=False flag in Mul and Pow. If you do other manipulations with an expression where evaluate=False, it may evaluate. Therefore, you should use the visual option only for visualization, and use the normal dictionary returned by visual=False if you want to perform operations on the factors. If you find that you want one from the other but you do not want to run expensive factorint again, you can easily switch between the two forms using the following list comprehensions: >>> from sympy import Mul, Pow >>> regular = factorint(1764); regular {2: 2, 3: 2, 7: 2} >>> pprint(Mul(*[Pow(*i, **{'evaluate':False}) for i in regular.items()], ... **{'evaluate':False})) 2 2 2 2 *3 *7 >>> visual = factorint(1764, visual=True); pprint(visual) 2 2 2 2 *3 *7 >>> dict([i.args for i in visual.args]) {2: 2, 3: 2, 7: 2} Miscellaneous Options ===================== If ``verbose`` is set to ``True``, detailed progress is printed. """ if visual: factordict = factorint(n, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) if factordict == {}: return S.One return Mul(*[Pow(*i, **{'evaluate':False}) for i in factordict.items()], **{'evaluate':False}) assert use_trial or use_rho or use_pm1 n = int(n) if not n: return {0:1} if n < 0: n = -n factors = {-1:1} else: factors = {} # Power of two t = trailing(n) if t: factors[2] = t n >>= t if n == 1: return factors low, high = 3, 250 # It is sufficient to perform trial division up to sqrt(n) try: # add 1 to sqrt in case there is round off; add 1 overall to make # sure that the limit is included limit = iff(limit, lambda: max(limit, low), lambda: int(n**0.5) + 1) + 1 except OverflowError: limit = 1e1000 # Setting to True here forces _check_termination if first round of # trial division fails found_trial_previous = True if verbose and n < 1e300: print "Factoring", n while 1: try: high_ = min(high, limit) # Trial division if use_trial: if verbose: print trial_msg % (low, high_) ps = sieve.primerange(low, high_) n, found_trial = _trial(factors, n, ps, verbose) else: found_trial = False if high > limit: factors[n] = 1 raise StopIteration # Only used advanced (and more expensive) methods as long as # trial division fails to locate small factors if not found_trial: if found_trial_previous: _check_termination(factors, n, verbose) # Pollard p-1 if use_pm1 and not found_trial: B = int(high_**0.7) if verbose: print (pm1_msg % (high_, high_)) ps = factorint(pollard_pm1(n, B=high_, seed=high_) or 1, \ limit=limit-1, verbose=verbose) n, found_pm1 = _trial(factors, n, ps, verbose) if found_pm1: _check_termination(factors, n, verbose) # Pollard rho if use_rho and not found_trial: max_steps = int(high_**0.7) if verbose: print (rho_msg % (1, max_steps, high_)) ps = factorint(pollard_rho(n, retries=1, max_steps=max_steps, \ seed=high_) or 1, limit=limit-1, verbose=verbose) n, found_rho = _trial(factors, n, ps, verbose) if found_rho: _check_termination(factors, n, verbose) except StopIteration: return factors found_trial_previous = found_trial low, high = high, high*2
def factorint(n, limit=None, use_trial=True, use_rho=True, use_pm1=True, verbose=False): """ Given a positive integer ``n``, ``factorint(n)`` returns a dict containing the prime factors of ``n`` as keys and their respective multiplicities as values. For example: >>> factorint(2000) # 2000 = (2**4) * (5**3) {2: 4, 5: 3} >>> factorint(65537) # This number is prime {65537: 1} For input less than 2, factorint behaves as follows: - ``factorint(1)`` returns the empty factorization, ``{}`` - ``factorint(0)`` returns ``{0:1}`` - ``factorint(-n)`` adds ``-1:1`` to the factors and then factors ``n`` Algorithm ========= The function switches between multiple algorithms. Trial division quickly finds small factors (of the order 1-5 digits), and finds all large factors if given enough time. The Pollard rho and p-1 algorithms are used to find large factors ahead of time; they will often find factors of the order of 10 digits within a few seconds: >>> factors = factorint(12345678910111213141516) >>> for base, exp in sorted(factors.items()): ... print base, exp ... 2 2 2507191691 1 1231026625769 1 Any of these methods can optionally be disabled with the following boolean parameters: - ``use_trial``: Toggle use of trial division - ``use_rho``: Toggle use of Pollard's rho method - ``use_pm1``: Toggle use of Pollard's p-1 method ``factorint`` also periodically checks if the remaining part is a prime number or a perfect power, and in those cases stops. Partial factorization ===================== If ``limit`` is specified, the search is stopped after performing trial division up to the limit (or taking a corresponding number of rho/p-1 steps). This is useful if one has a large number and only is interested in finding small factors (if any). Note that setting a limit does not prevent larger factors from being found early; it simply means that any larger factors returned may be composite. This number, for example, has two small factors and a huge semiprime factor that cannot be reduced easily: >>> a = 1407633717262338957430697921446883 >>> f = factorint(a, limit=10000) >>> f == {991: 1, 202916782076162456022877024859L: 1, 7: 1} True >>> isprime(max(f)) False Miscellaneous options ===================== If ``verbose`` is set to ``True``, detailed progress is printed. """ assert use_trial or use_rho or use_pm1 n = int(n) if not n: return {0: 1} if n < 0: n = -n factors = {-1: 1} else: factors = {} # Power of two t = trailing(n) if t: factors[2] = t n >>= t if n == 1: return factors # It is sufficient to perform trial division up to sqrt(n) try: limit = limit or (int(n**0.5) + 2) except OverflowError: limit = 1e1000 low, high = 3, 250 # Setting to True here forces _check_termination if first round of # trial division fails found_trial_previous = True if verbose and n < 1e300: print "Factoring", n while 1: try: high_ = min(high, limit) # Trial division if use_trial: if verbose: print trial_msg % (low, high_) ps = sieve.primerange(low, high_) n, found_trial = _trial(factors, n, ps, verbose) else: found_trial = False if high > limit: factors[n] = 1 raise StopIteration # Only used advanced (and more expensive) methods as long as # trial division fails to locate small factors if not found_trial: if found_trial_previous: _check_termination(factors, n, verbose) # Pollard p-1 if use_pm1 and not found_trial: B = int(high_**0.7) if verbose: print(pm1_msg % (high_, high_)) ps = factorint(pollard_pm1(n, B=high_, seed=high_) or 1, \ limit=limit, verbose=verbose) n, found_pm1 = _trial(factors, n, ps, verbose) if found_pm1: _check_termination(factors, n, verbose) # Pollard rho if use_rho and not found_trial: max_steps = int(high_**0.7) if verbose: print(rho_msg % (1, max_steps, high_)) ps = factorint(pollard_rho(n, retries=1, max_steps=max_steps, \ seed=high_) or 1, limit=limit, verbose=verbose) n, found_rho = _trial(factors, n, ps, verbose) if found_rho: _check_termination(factors, n, verbose) except StopIteration: return factors found_trial_previous = found_trial low, high = high, high * 2
def factorint(n, limit=None, verbose=False): """ Given a positive integer n, factorint(n) returns a list [(p_1, m_1), (p_2, m_2), ...] with all p prime and n = p_1**m_1 * p_2**m_2 * ... Special cases: 1 factors as [], 0 factors as [(0, 1)], and negative integers factor as [(-1, 1), ...]. The function uses a composite algorithm, switching between Pollard's p-1 method and looking for small factors through trial division. It is sometimes useful to look only for small factors. If 'limit' is specified, factorint will only perform trial division with candidate factors up to this limit (and p-1 search up to the same smoothness bound). As a result, the last 'prime' in the returned list may be composite. Example usage ============= Here are some simple factorizations (with at most six digits in the second largest factor). They should all complete within a fraction of a second: >>> factorint(1) [] >>> factorint(100) [(2, 2), (5, 2)] >>> factorint(17*19) [(17, 1), (19, 1)] >>> factorint(prime(100)*prime(1000)*prime(10000)) [(541, 1), (7919, 1), (104729, 1)] >>> factors = factorint(2**(2**6) + 1) >>> for base, exp in factors: print base, exp ... 274177 1 67280421310721 1 Factors on the order of 10 digits can generally be found quickly. The following computations should complete within a few seconds: >>> factors = factorint(21477639576571) >>> for base, exp in factors: print base, exp ... 4410317 1 4869863 1 >>> factors = factorint(12345678910111213141516) >>> for base, exp in factors: print base, exp ... 2 2 2507191691 1 1231026625769 1 >>> factors = factorint(5715365922033905625269) >>> for base, exp in factors: print base, exp ... 74358036521 1 76862786989 1 This number has an enormous semiprime factor that is better ignored: >>> a = 1407633717262338957430697921446883 >>> factorint(a, limit=10000) [(7, 1), (991, 1), (202916782076162456022877024859L, 1)] >>> isprime(_[-1][0]) False """ n = int(n) if n < 0: return [(-1, 1)] + factorint(-n, limit) if n == 0: return [(0, 1)] if n == 1: return [] if isprime(n): return [(n, 1)] if limit is None: limit = int(n**0.5) + 1 factors = [] low, high = 2, 50 while 1: # Trial divide for small factors first tfactors = trial(n, sieve.primerange(low, min(high, limit))) if verbose: print "trial division from", low, "to", \ min(high,limit)-1, "gave", tfactors # If all were primes, we're done if isprime(tfactors[-1][0]): factors += tfactors break elif tfactors[-1][0] == 1: factors += tfactors[:-1] break else: factors += tfactors[:-1] n = tfactors[-1][0] # If we're lucky, Pollard's p-1 will extract a large factor w = pollard_pm1(n, high) if verbose: print "pollard p-1 with smoothness bound", high, "gave", w print if w is not None: # w may be composite for f, m in factorint(w, limit): m *= multiplicity(f, n) factors += [(f, m)] n //= f**(m) if n == 1: break if isprime(n): factors += [(int(n), 1)] break if high > limit: factors += [(int(n), 1)] break low, high = high, high*5 return sorted(factors)
def factorint(n, limit=None, verbose=False): """ Given a positive integer n, factorint(n) returns a list [(p_1, m_1), (p_2, m_2), ...] with all p prime and n = p_1**m_1 * p_2**m_2 * ... Special cases: 1 factors as [], 0 factors as [(0, 1)], and negative integers factor as [(-1, 1), ...]. The function uses a composite algorithm, switching between Pollard's p-1 method and looking for small factors through trial division. It is sometimes useful to look only for small factors. If 'limit' is specified, factorint will only perform trial division with candidate factors up to this limit (and p-1 search up to the same smoothness bound). As a result, the last 'prime' in the returned list may be composite. Example usage ============= Here are some simple factorizations (with at most six digits in the second largest factor). They should all complete within a fraction of a second: >>> factorint(1) [] >>> factorint(100) [(2, 2), (5, 2)] >>> factorint(17*19) [(17, 1), (19, 1)] >>> factorint(prime(100)*prime(1000)*prime(10000)) [(541, 1), (7919, 1), (104729, 1)] >>> factors = factorint(2**(2**6) + 1) >>> for base, exp in factors: print base, exp ... 274177 1 67280421310721 1 Factors on the order of 10 digits can generally be found quickly. The following computations should complete within a few seconds: >>> factors = factorint(21477639576571) >>> for base, exp in factors: print base, exp ... 4410317 1 4869863 1 >>> factors = factorint(12345678910111213141516) >>> for base, exp in factors: print base, exp ... 2 2 2507191691 1 1231026625769 1 >>> factors = factorint(5715365922033905625269) >>> for base, exp in factors: print base, exp ... 74358036521 1 76862786989 1 This number has an enormous semiprime factor that is better ignored: >>> a = 1407633717262338957430697921446883 >>> factorint(a, limit=10000) [(7, 1), (991, 1), (202916782076162456022877024859, 1)] >>> isprime(_[-1][0]) False """ n = int(n) if n < 0: return [(-1, 1)] + factorint(-n, limit) if n == 0: return [(0, 1)] if n == 1: return [] if isprime(n): return [(n, 1)] if limit is None: limit = int(n**0.5) + 1 factors = [] low, high = 2, 50 while 1: # Trial divide for small factors first tfactors = trial(n, sieve.primerange(low, min(high, limit))) if verbose: print "trial division from", low, "to", \ min(high,limit)-1, "gave", tfactors # If all were primes, we're done if isprime(tfactors[-1][0]): factors += tfactors break elif tfactors[-1][0] == 1: factors += tfactors[:-1] break else: factors += tfactors[:-1] n = tfactors[-1][0] # If we're lucky, Pollard's p-1 will extract a large factor w = pollard_pm1(n, high) if verbose: print "pollard p-1 with smoothness bound", high, "gave", w print if w is not None: # w may be composite for f, m in factorint(w, limit): m *= multiplicity(f, n) factors += [(f, m)] n //= f**(m) if n == 1: break if isprime(n): factors += [(int(n), 1)] break if high > limit: factors += [(int(n), 1)] break low, high = high, high * 5 return sorted(factors)
def factorint(n, limit=None, use_trial=True, use_rho=True, use_pm1=True, verbose=False, visual=False): """ Given a positive integer ``n``, ``factorint(n)`` returns a dict containing the prime factors of ``n`` as keys and their respective multiplicities as values. For example: >>> from sympy.ntheory import factorint >>> factorint(2000) # 2000 = (2**4) * (5**3) {2: 4, 5: 3} >>> factorint(65537) # This number is prime {65537: 1} For input less than 2, factorint behaves as follows: - ``factorint(1)`` returns the empty factorization, ``{}`` - ``factorint(0)`` returns ``{0:1}`` - ``factorint(-n)`` adds ``-1:1`` to the factors and then factors ``n`` Algorithm ========= The function switches between multiple algorithms. Trial division quickly finds small factors (of the order 1-5 digits), and finds all large factors if given enough time. The Pollard rho and p-1 algorithms are used to find large factors ahead of time; they will often find factors of the order of 10 digits within a few seconds: >>> factors = factorint(12345678910111213141516) >>> for base, exp in sorted(factors.items()): ... print base, exp ... 2 2 2507191691 1 1231026625769 1 Any of these methods can optionally be disabled with the following boolean parameters: - ``use_trial``: Toggle use of trial division - ``use_rho``: Toggle use of Pollard's rho method - ``use_pm1``: Toggle use of Pollard's p-1 method ``factorint`` also periodically checks if the remaining part is a prime number or a perfect power, and in those cases stops. Partial Factorization ===================== If ``limit`` (> 2) is specified, the search is stopped after performing trial division up to (and including) the limit (or taking a corresponding number of rho/p-1 steps). This is useful if one has a large number and only is interested in finding small factors (if any). Note that setting a limit does not prevent larger factors from being found early; it simply means that the largest factor may be composite. This number, for example, has two small factors and a huge semi-prime factor that cannot be reduced easily: >>> from sympy.ntheory import isprime >>> a = 1407633717262338957430697921446883 >>> f = factorint(a, limit=10000) >>> f == {991: 1, 202916782076162456022877024859L: 1, 7: 1} True >>> isprime(max(f)) False Visual Factorization ==================== If ``visual`` is set to ``True``, then it will return a visual factorization of the integer. For example: >>> from sympy import pprint >>> pprint(factorint(4200, visual=True)) 3 1 2 1 2 *3 *5 *7 Note that this is achieved by using the evaluate=False flag in Mul and Pow. If you do other manipulations with an expression where evaluate=False, it may evaluate. Therefore, you should use the visual option only for visualization, and use the normal dictionary returned by visual=False if you want to perform operations on the factors. If you find that you want one from the other but you do not want to run expensive factorint again, you can easily switch between the two forms using the following list comprehensions: >>> from sympy import Mul, Pow >>> regular = factorint(1764); regular {2: 2, 3: 2, 7: 2} >>> pprint(Mul(*[Pow(*i, **{'evaluate':False}) for i in regular.items()], ... **{'evaluate':False})) 2 2 2 2 *3 *7 >>> visual = factorint(1764, visual=True); pprint(visual) 2 2 2 2 *3 *7 >>> dict([i.args for i in visual.args]) {2: 2, 3: 2, 7: 2} Miscellaneous Options ===================== If ``verbose`` is set to ``True``, detailed progress is printed. """ if visual: factordict = factorint(n, limit=limit, use_trial=use_trial, use_rho=use_rho, use_pm1=use_pm1, verbose=verbose, visual=False) if factordict == {}: return S.One return Mul( *[Pow(*i, **{'evaluate': False}) for i in factordict.items()], **{'evaluate': False}) assert use_trial or use_rho or use_pm1 n = int(n) if not n: return {0: 1} if n < 0: n = -n factors = {-1: 1} else: factors = {} # Power of two t = trailing(n) if t: factors[2] = t n >>= t if n == 1: return factors low, high = 3, 250 # It is sufficient to perform trial division up to sqrt(n) try: # add 1 to sqrt in case there is round off; add 1 overall to make # sure that the limit is included limit = iff(limit, lambda: max(limit, low), lambda: int(n**0.5) + 1) + 1 except OverflowError: limit = 1e1000 # Setting to True here forces _check_termination if first round of # trial division fails found_trial_previous = True if verbose and n < 1e300: print "Factoring", n while 1: try: high_ = min(high, limit) # Trial division if use_trial: if verbose: print trial_msg % (low, high_) ps = sieve.primerange(low, high_) n, found_trial = _trial(factors, n, ps, verbose) else: found_trial = False if high > limit: factors[n] = 1 raise StopIteration # Only used advanced (and more expensive) methods as long as # trial division fails to locate small factors if not found_trial: if found_trial_previous: _check_termination(factors, n, verbose) # Pollard p-1 if use_pm1 and not found_trial: B = int(high_**0.7) if verbose: print(pm1_msg % (high_, high_)) ps = factorint(pollard_pm1(n, B=high_, seed=high_) or 1, \ limit=limit-1, verbose=verbose) n, found_pm1 = _trial(factors, n, ps, verbose) if found_pm1: _check_termination(factors, n, verbose) # Pollard rho if use_rho and not found_trial: max_steps = int(high_**0.7) if verbose: print(rho_msg % (1, max_steps, high_)) ps = factorint(pollard_rho(n, retries=1, max_steps=max_steps, \ seed=high_) or 1, limit=limit-1, verbose=verbose) n, found_rho = _trial(factors, n, ps, verbose) if found_rho: _check_termination(factors, n, verbose) except StopIteration: return factors found_trial_previous = found_trial low, high = high, high * 2