Exemplo n.º 1
0
def get_user_bios(twitter, recent):
	result = twitter.get_friends_list(screen_name='verified',
		skip_status=True, include_user_entities=False, count=200)
	bios = []
	for user in reversed(result['users']):
		desc = clean_description(user['description'])
		if not desc or len(desc) < 30 or len(desc.split()) < 6:
			continue # enforce length
		elif user['protected'] or not user['verified']:
			continue # respect privacy
		elif blacklist.isOffensive(user['name']):
			continue # no bad words in user name
		elif blacklist.isOffensive(desc):
			continue # no bad words in description
		elif not 'en' in user['lang'] or isNotEnglish(desc):
			continue # avoid non-english
		elif isTooSimilar(desc, recent):
			continue # avoid repeating recent tweets
		elif isTooSimilar(desc, bios):
			continue # avoid repeating found bios
		else:
			bios.append(desc) # valid description
	print(str(len(bios)) + " bios to tweet")
	random.shuffle(bios)
	return bios
Exemplo n.º 2
0
def hasAdultContent(data):
    for gif in data['data']:
        rating = gif['rating'].lower()
        if rating.find('r') >= 0 or rating.find('nc') >= 0 or rating.find('x') >= 0:
            return True
        elif 'source' in gif and blacklist.isOffensive(gif['source']):
            return True
        elif 'username' in gif and blacklist.isOffensive(gif['username']):
            return True
    return False
Exemplo n.º 3
0
def isValidTrack(track, max_length, recentTracks):
    # check if we can use this track
    if blacklist.isOffensive(track.title) or blacklist.isOffensive(track.description):
        return False
    elif len(track.title) > max_length or track.duration < 60000:
        return False
    elif track.embeddable_by != "all" or track.state != "finished":
        return False
    elif track.title in recentTracks:
        return False
    elif track.track_type == "original" or track.track_type == "remix":
        return True
    else:
        return False
Exemplo n.º 4
0
def getRandomWords(wordList=[]):
    # get a list of random words from the wordnik API
    wordnik = swagger.ApiClient(config.wordnik_key, 'http://api.wordnik.com/v4')
    wordsApi = WordsApi.WordsApi(wordnik)
    random = wordsApi.getRandomWords(includePartOfSpeech='noun', minCorpusCount=2000,
        minDictionaryCount=12, hasDictionaryDef='true', maxLength=10)
    
    assert random and len(random) > 0, "Wordnik API error"
    
    # filter out offensive words
    for r in random:
        if not blacklist.isOffensive(r.word):
            wordList.append(r.word)
    
    return wordList
Exemplo n.º 5
0
def getHypernyms(wordnik, recent, word):
    # get hypernyms for the given word
    query = WordApi.WordApi(wordnik).getRelatedWords(
        word=word, relationshipTypes='hypernym')
    hypernyms = []
    if not query:
        return hypernyms
    for q in query:
        for hyp in q.words:
            if hyp in word or word in hyp:
                continue
            if any(hyp in s for s in recent):
                continue
            if blacklist.isOffensive(hyp):
                continue
            if not isNoun(wordnik, hyp):
                continue
            hypernyms.append(hyp)
    return hypernyms
Exemplo n.º 6
0
def getHypernyms(wordnik, recent, word):
	# get hypernyms for the given word
	query = WordApi.WordApi(wordnik).getRelatedWords(
		word = word, relationshipTypes = 'hypernym')
	hypernyms = []
	if not query:
		return hypernyms
	for q in query:
		for hyp in q.words:
			if hyp in word or word in hyp:
				continue
			if any(hyp in s for s in recent):
				continue
			if blacklist.isOffensive(hyp):
				continue
			if not isNoun(wordnik, hyp):
				continue
			hypernyms.append(hyp)
	return hypernyms
Exemplo n.º 7
0
def getRandomWords(wordnik, recent):
    # get a list of random words from the wordnik API
    random = WordsApi.WordsApi(wordnik).getRandomWords(
        includePartOfSpeech='noun',
        minCorpusCount=1000,
        minDictionaryCount=10,
        hasDictionaryDef='true',
        maxLength=10)

    assert random and len(random) > 0, "Wordnik API error"

    # filter out offensive words
    wordList = []
    for r in random:
        if blacklist.isOffensive(r.word):
            continue
        if any(r.word in s for s in recent):
            continue
        wordList.append(r.word)

    return wordList
Exemplo n.º 8
0
def getRandomWords(wordnik, recent):
	# get a list of random words from the wordnik API
	random = WordsApi.WordsApi(wordnik).getRandomWords(
		includePartOfSpeech='noun',
		minCorpusCount=1000,
		minDictionaryCount=10,
		hasDictionaryDef='true',
		maxLength=10)
	
	assert random and len(random) > 0, "Wordnik API error"
	
	# filter out offensive words
	wordList=[]
	for r in random:
		if blacklist.isOffensive(r.word):
			continue
		if any(r.word in s for s in recent):
			continue
		wordList.append(r.word)
	
	return wordList
Exemplo n.º 9
0
def get_new_words(crossword_hints):
    remove_brackets = re.compile(r' \([^)]*\)')
    remove_topic_punc = re.compile(r'^([^,\(]*)')
    remove_hint_punc = re.compile(r'^([^.;:!?\(]*)')

    try:
        print_safe("Getting random topics from Wikipedia...")
        random_topics = wikipedia.random(pages=10)
    except HTTPTimeoutError as e:
        random_topics = []

    for wiki_topic in random_topics:
        # reject some vague topics right away
        topic_lower = wiki_topic.lower()
        if ' in ' in topic_lower: continue
        if 'list of ' in topic_lower: continue
        if 'iowa' in topic_lower: continue  # too many
        if blacklist.isOffensive(wiki_topic): continue

        # clean up the topic string
        topic = re.sub(remove_brackets, '', wiki_topic)
        topic = re.search(remove_topic_punc, topic).group(0)
        topic = topic.strip()

        # reject too short or too long
        if len(topic) > 20 or len(topic) < 3 or len(topic.split()) > 4:
            continue

        print_safe("Getting summary for " + wiki_topic + "...")
        summary = get_summary(wiki_topic)

        if summary:
            # avoid disambiguation blurbs
            if 'this article is about' in summary.lower(): continue

            # get the description of the topic
            hint = substring_after(summary,
                                   [' is ', ' was ', ' are ', ' were '])

            # reject hints that are too vague to solve
            hint_lower = hint.lower()
            if ' commune ' in hint_lower: continue
            if ' parish ' in hint_lower: continue
            if ' district ' in hint_lower: continue
            if ' town ' in hint_lower: continue
            if ' rayon of ' in hint_lower: continue
            if ' municipality in ' in hint_lower: continue
            if ' county ' in hint_lower: continue
            if ' actor' in hint_lower: continue
            if ' actress' in hint_lower: continue
            if ' singer' in hint_lower: continue
            if ' a swimmer ' in hint_lower: continue
            if 'football player' in hint_lower: continue
            if 'footballer' in hint_lower: continue
            if 'figure skater' in hint_lower: continue
            if 'racing driver' in hint_lower: continue
            if 'soccer player' in hint_lower: continue
            if 'basketball player' in hint_lower: continue
            if 'baseball player' in hint_lower: continue
            if 'tennis player' in hint_lower: continue
            if 'cricketer' in hint_lower: continue
            if 'politician' in hint_lower: continue
            if 'common year' in hint_lower: continue
            if ' a year in ' in hint_lower: continue
            if ' a list of ' in hint_lower: continue
            if blacklist.isOffensive(hint): continue

            # clean up the hint string
            hint = re.sub(r'U\.S\.', 'US', hint)
            hint = re.sub(r'D\.C\.', 'DC', hint)
            hint = re.sub(remove_brackets, '', hint)
            hint = re.search(remove_hint_punc, hint).group(0)
            hint = hint.strip()

            if len(hint) < 5 or len(hint) > 36 or len(hint.split()) < 4:
                continue  # too long or too short
            else:
                new_topic = {
                    'topic': topic,
                    'hint': hint,
                    'crossword': get_crossword_string(topic)
                }
                crossword_hints.append(new_topic)
    return crossword_hints