Пример #1
0
def _get_similarity(doc1, doc2, vec1, vec2):
    """Returns similarity of two documents.

    Parameters
    ----------
    doc1 : list of (list of (tuple of int))
        First document.
    doc2 : list of (list of (tuple of int))
        Second document.
    vec1 : array
        ? of first document.
    vec1 : array
        ? of secont document.

    Returns
    -------
    float
        Similarity of two documents.

    """
    numerator = vec1.dot(vec2.transpose()).toarray()[0][0]
    length_1 = _get_doc_length(doc1)
    length_2 = _get_doc_length(doc2)

    denominator = _log10(length_1) + _log10(
        length_2) if length_1 > 0 and length_2 > 0 else 0

    return numerator / denominator if denominator != 0 else 0
Пример #2
0
def get_similarity(str1, str2):

    n1 = len(str1)
    n2 = len(str2)

    min = n2
    if n1 < n2:
        min = n1

    i = 0
    match = 0
    l1 = 0
    l2 = 0
    while i < min:
        if str1[i] == '1' and str2[i] == '1':
            match = match + 1
        if str1[i] == '1':
            l1 = l1 + 1
        if str2[i] == '1':
            l2 = l2 + 1
        i = i + 1

    if l1 == 0 or l2 == 0:
        return 0

    if _log10(l1) + _log10(l2) == 0:
        return 0

    return match / (_log10(l1) + _log10(l2))
Пример #3
0
def _get_similarity(doc1, doc2, vec1, vec2):
    numerator = vec1.dot(vec2.transpose()).toarray()[0][0]
    length_1 = _get_doc_length(doc1)
    length_2 = _get_doc_length(doc2)
    denominator = _log10(length_1) + _log10(
        length_2) if length_1 > 0 and length_2 > 0 else 0
    return numerator / denominator if denominator != 0 else 0
Пример #4
0
def _get_similarity(doc1, doc2, vec1, vec2):
    """Returns similarity of two documents.

    Parameters
    ----------
    doc1 : list of (list of (tuple of int))
        First document.
    doc2 : list of (list of (tuple of int))
        Second document.
    vec1 : array
        ? of first document.
    vec1 : array
        ? of secont document.

    Returns
    -------
    float
        Similarity of two documents.

    """
    numerator = vec1.dot(vec2.transpose()).toarray()[0][0]
    length_1 = _get_doc_length(doc1)
    length_2 = _get_doc_length(doc2)

    denominator = _log10(length_1) + _log10(length_2) if length_1 > 0 and length_2 > 0 else 0

    return numerator / denominator if denominator != 0 else 0
Пример #5
0
def _get_similarity(doc1, doc2, vec1, vec2):
    numerator = vec1.dot(vec2.transpose()).toarray()[0][0]
    length_1 = _get_doc_length(doc1)
    length_2 = _get_doc_length(doc2)

    denominator = _log10(length_1) + _log10(length_2) if length_1 > 0 and length_2 > 0 else 0

    return numerator / denominator if denominator != 0 else 0
Пример #6
0
def _get_similarity(s1, s2):
    words_sentence_one = s1.split()
    words_sentence_two = s2.split()

    common_word_count = _count_common_words(words_sentence_one, words_sentence_two)

    log_s1 = _log10(len(words_sentence_one))
    log_s2 = _log10(len(words_sentence_two))

    if log_s1 + log_s2 == 0:
        return 0

    return common_word_count / (log_s1 + log_s2)
Пример #7
0
def _get_similarity(s1, s2):
    words_sentence_one = s1.split()
    words_sentence_two = s2.split()

    common_word_count = _count_common_words(words_sentence_one, words_sentence_two)

    log_s1 = _log10(len(words_sentence_one))
    log_s2 = _log10(len(words_sentence_two))

    if log_s1 + log_s2 == 0:
        return 0

    return common_word_count / (log_s1 + log_s2)
Пример #8
0
def expand(series, min=1.0, max=10.0E6):
	'''
	Expand a single decade series over multiple decades.

	series = single decade series to expand.
	min = minimum value of expanded series.
	max = maximum value of expanded series.

	eg. expand(E12, 10.0, 100E3)
	'''
	# Calc range of multipliers required.
	# -3/-1: Adjusted because series arrays normalized to 1E2.
	exp_min = int(_log10(min)-3)
	exp_max = int(_log10(max))
	# Expand series.
	return [x*10**m for m in range(exp_min, exp_max) 
		for x in series 
			if x*10**m >= min
				if x*10**m <= max]
Пример #9
0
def expand(series, min=1.0, max=10.0E6):
    '''
	Expand a single decade series over multiple decades.

	series = single decade series to expand.
	min = minimum value of expanded series.
	max = maximum value of expanded series.

	eg. expand(E12, 10.0, 100E3)
	'''
    # Calc range of multipliers required.
    # -3/-1: Adjusted because series arrays normalized to 1E2.
    exp_min = int(_log10(min) - 3)
    exp_max = int(_log10(max))
    # Expand series.
    return [
        x * 10**m for m in range(exp_min, exp_max) for x in series
        if x * 10**m >= min if x * 10**m <= max
    ]
def _number_profile(value, precision):
    '''
  returns:
    string of significant digits
    10s exponent to get the dot to the proper location in the significant digits
    bool that's true if value is less than zero else false

    created by William Rusnack
      github.com/BebeSparkelSparkel
      linkedin.com/in/williamrusnack/
      [email protected]
    contributions by Thomas Hladish
      github.com/tjhladish
      Issue: github.com/BebeSparkelSparkel/to-precision/issues/5
  '''
    value = float(value)
    is_neg = value < 0
    value = abs(value)

    if value == 0:
        sig_digits = '0' * precision
        power = -(1 - precision)

    else:
        if _math.isnan(value):
            return _math.nan

        power = -1 * _floor(_log10(value)) + precision - 1

        # issue soved by Thomas Haladish
        # github.com/BebeSparkelSparkel/to-precision/issues/5
        value_power = value * 10.0**power
        if value < 1 and \
            _floor(_log10(int(round(value_power)))) > \
            _floor(_log10(int(value_power))):
            power -= 1

        sig_digits = str(int(round(
            value *
            10.0**power)))  # cannot use value_power because power is changed

    return sig_digits, int(-power), is_neg
Пример #11
0
def _get_similarity(s1, s2):
    common_word_count = 0
    len_s1 = 0
    len_s2 = 0
    i = 0
    while i < len(s1):
        if s1[i] != '0' and s2[i] != '0':
            common_word_count = common_word_count + 1
        if s1[i] != '0':
            len_s1 = len_s1 + 1
        if s2[i] != '0':
            len_s2 = len_s2 + 1
        i = i + 1
    if len_s1 == 0 or len_s2 == 0:
        return 0
    log_s1 = _log10(len_s1)
    log_s2 = _log10(len_s2)
    if log_s1 + log_s2 == 0:
        return 0
    return common_word_count / (log_s1 + log_s2)
Пример #12
0
def _get_similarity(su1, su2):

    words1 = [ w for w in su1.processed if w[1] not in ['NNP', 'NNPS'] ]
    words2 = [ w for w in su2.processed if w[1] not in ['NNP', 'NNPS'] ]
    properNouns1 = [ w for w in su1.processed if w[1] in ['NNP', 'NNPS'] ]
    properNouns2 = [ w for w in su2.processed if w[1] in ['NNP', 'NNPS'] ]

    log_s1 = _log10(len(su1.processed)+1)
    log_s2 = _log10(len(su2.processed)+1)
    if log_s1 + log_s2 == 0:
        return 0

    eo = entail_overlap(words1, words2)
    no = noun_overlap(properNouns1, properNouns2)
    x = (eo + no) / (log_s1 + log_s2)

    # if su1.index <= 3 and su2.index <= 5:
    #     print "HELLO!!!!!!!"
    #     print su1.text
    #     print su2.text
    #     print _count_common_words(su1.text.split(), su2.text.split())
    #     print log_s1, log_s2
    #     print eo, no
    return x
Пример #13
0
def lg(x):
    return _log10(x)
Пример #14
0
def lg(x):
    return _log10(x)
Пример #15
0
MGPER  : number of angles with atoms completely in perturbed group
MDPER  : number of dihedrals with atoms completely in perturbed groups
IFBOX  : set to 1 if standard periodic box, 2 when truncated octahedral
NMXRS  : number of atoms in the largest residue
IFCAP  : set to 1 if the CAP option from edit was specified
NUMEXTRA: number of extra points
NCOPY  : Number of copies for advanded simulations
"""
# These global variables provide a more natural way of accessing
# the various pointers.  Most useful if they're loaded into the
# top-level namespace.
NATOM  = 0; NTYPES = 1; NBONH  = 2; MBONA  = 3; NTHETH = 4
MTHETA = 5; NPHIH  = 6; MPHIA  = 7; NHPARM = 8; NPARM  = 9
NEXT   = 10; NRES   = 11; NBONA  = 12; NTHETA = 13; NPHIA  = 14
NUMBND = 15; NUMANG = 16; NPTRA  = 17; NATYP  = 18; NPHB   = 19
IFPERT = 20; NBPER  = 21; NGPER  = 22; NDPER  = 23; MBPER  = 24
MGPER  = 25; MDPER  = 26; IFBOX  = 27; NMXRS  = 28; IFCAP  = 29
NUMEXTRA= 30; NCOPY  = 31

# An alias
NNB = NEXT

RAD_TO_DEG = 180.0 / _pi
DEG_TO_RAD = _pi / 180.0

# For use in floating point comparisons
TINY = 1.0e-8
SMALL = 1.0e-4
TINY_DIGITS = int(_log10(TINY) + 0.5)
SMALL_DIGITS = int(_log10(SMALL) + 0.5)