Пример #1
0
def count_articles():
    cursor = mc.getData()
    count = 0

    try:
        while True:
            cursor.next()
            count += 1
    except StopIteration:
        return count
Пример #2
0
def no_duplicates():
    '''
	Verify that the database does not have duplicate articles
	'''
    cursor = mc.getData()

    title_set = set()
    try:
        while True:
            title = cursor.next()["title"]
            if title in title_set:
                print(title)
                return False
            else:
                title_set.add(title)

    except StopIteration:
        return True
Пример #3
0
def get_duplicates():
    '''
	returns array of duplicate articles titles in the database
	'''
    cursor = mc.getData()
    title_set = set()

    duplicates = []
    try:
        while True:
            title = cursor.next()["title"]
            if title in title_set:
                duplicates.append(title)
            else:
                title_set.add(title)

    except StopIteration:
        return duplicates
Пример #4
0
def count_true_and_false():
    '''
	Counts number of true and false articles in db
	'''
    cursor = mc.getData()

    trueCount = 0
    falseCount = 0
    try:
        while True:
            article = cursor.next()
            truth = article["truth"]
            if truth:
                trueCount += 1
            else:
                falseCount += 1

    except StopIteration:
        return trueCount, falseCount
Пример #5
0
def count_duplicates_by_truthiness():
    '''
	Counts the number of duplicate articles in the database
	'''
    cursor = mc.getData()
    title_set = set()

    true_duplicate_count = 0
    false_duplicate_count = 0
    try:
        while True:
            article = cursor.next()
            title = article["title"]
            truth = article["truth"]
            if title in title_set:
                if truth:
                    true_duplicate_count += 1
                else:
                    false_duplicate_count += 1
            else:
                title_set.add(title)

    except StopIteration:
        return true_duplicate_count, false_duplicate_count