예제 #1
0
def writeRhymes():
	countryInfo = util.readExamples("country")["country"]
	hiphoprapInfo = util.readExamples("hiphoprap")["hiphoprap"]
	popInfo = util.readExamples("pop")["pop"]

	wordMap = {}

	lines = countryInfo["lines"]
	lines += hiphoprapInfo["lines"]
	lines += popInfo["lines"]

	for line in lines:
		line = line.split(" ")
		for word in line:
			word = word.lower()
			if word not in wordMap:
				wordMap[word] = word

	values = wordMap.values()

	fo = open("rhymes.txt", "w")

	ct = 0
	for word in values:
		if "," in word:
			splitArr = word.split(",")
			for splitWord in splitArr:
				print "%i. finding rhyme for: %s" % (ct,splitWord)
				fo.write("%s:" % splitWord.encode('utf8')) 
				splitWord = turnNumIntoString(splitWord)
				rhymes = util.findRhymes(splitWord)
				for i in range(len(rhymes)):
					text = "%s," % rhymes[i] if i != len(rhymes) - 1 else "%s" % rhymes[i]
					fo.write(text.encode('utf8'))
				fo.write("\n")
				ct += 1
		else:
			print "%i. finding rhyme for: %s" % (ct,word)
			fo.write("%s:" % word.encode('utf8')) 
			word = turnNumIntoString(word)
			rhymes = util.findRhymes(word)
			for i in range(len(rhymes)):
				text = "%s," % rhymes[i] if i != len(rhymes) - 1 else "%s" % rhymes[i]
				fo.write(text.encode('utf8'))
			fo.write("\n")
			ct += 1
	fo.close()
예제 #2
0
def createGrammarTree(genre):
	def tree(): return defaultdict(tree)
	
	def add(t, path):
		for node in path:
			t = t[node]

	#genres = ["country", "hiphoprap", "pop"]
	genreDict = util.readExamples(genre)
	sentences = genreDict[genre]["lines"]
	grammarTree = tree()
	for i,sentence in enumerate(sentences):
		print i
		s = tagSentence(sentence)
		partsOfSpeech = [tup[1] for tup in s]
		add(grammarTree,partsOfSpeech)
	return grammarTree
예제 #3
0
	def __init__(self, genre, startLyrics=None):
		self.genre = genre
		self.genre_db = util.readExamples(genre)
		self.blank_marker = '_'
		self.startLyrics = startLyrics
		self.ceiling = 1000000
		self.bigramCeiling = 100000
		self.trigramCeiling = 10000
		self.fourgramCeiling = 1000
		self.tagger = tagger.Tagger()		
		self.cache = {}
		self.rhymeCache = {}
		self.syllableCache = {}
		self.endingCache = {}
		self.numBigrams = 0
		self.numTrigrams = 0
		self.numFourgrams = 0
		self.sentenceEndingFloor = 50
예제 #4
0
def createPOSMaps():
	wordsToPOS = {}
	PosToWords = {}
	genres = ["country","hiphoprap","pop"]
	for g,genre in enumerate(genres):
		genreDict = util.readExamples(genre)
		sentences = genreDict[genre]["lines"]
		for i,sentence in enumerate(sentences):
			print "genre:{}, line:{}".format(genres[g],i)
			s = tagSentence(sentence)
			for tup in s:
				if tup[0] not in wordsToPOS:
					wordsToPOS[tup[0]]=[tup[1]]
				elif tup[1] not in wordsToPOS[tup[0]]: wordsToPOS[tup[0]].append(tup[1])
				if tup[1] not in PosToWords:
					PosToWords[tup[1]]=[tup[0]]
				elif tup[0] not in PosToWords[tup[1]]: PosToWords[tup[1]].append(tup[0])
	with open("wordsToPOS.json",'w+') as outfile1:
		outfile1.write(json.dumps(wordsToPOS,outfile1))
	with open("POSToWords.json",'w+') as outfile2:
		outfile2.write(json.dumps(PosToWords,outfile2))
	return
예제 #5
0
def createPartsOfSpeechFile(fileName, genre):
	fo = open(fileName, "w")
	genreDict = util.readExamples(genre)
	lines = genreDict[genre]["lines"]
	
	print len(lines)
	ct = 0
	structureMap = {}
	for line in lines:
		if ct == 5:
			break
		st = getStructure(line)
		print "%s | %s" % (st, line)
		splitArr = st.split(",")
		lineArr = line.split(",")
		for i in range(len(splitArr)):
			s = splitArr[i]
			l = lineArr[i]
			if structureMap.get(s) is None:
				arr = [l]
				structureMap[s] = (s, arr, 1)
			else:
				tup = structureMap[s]
				ct = tup[2] + 1
				arr = tup[1]
				arr += [l]
				structureMap[s] = (tup[0],arr, ct)
			
			ct += 1
		

	for k,tup in structureMap.iteritems():
		fo.write("%s|%s|%s" % (tup[0],tup[1], tup[2]))
		fo.write("\n")

	fo.close()
예제 #6
0
            num_punchlines += 1
    precision = 1.0 - float(num_punchlines) / len(examples)
    recall = 0.0
    fscore = 0.0
    print "\tPrecision:%f\n\tRecall:%f\n\tF1:%f" % (precision, recall, fscore)


def realtimePredict(vocabulary, freq_col_idx, regr):
    '''
        Predicts based on inputed transcript
    '''
    x = raw_input('Give me a punchline: ')
    print x
    while (x):
        examples = []
        examples.append((x, 0))
        feature, _, _ = fitModel(examples,
                                 vocab=vocabulary,
                                 frequent_ngram_col_idx=freq_col_idx)
        predict = regr.predict(feature)
        print 'Your punchline was funny: ', predict[0]
        x = raw_input('Give me a punchline: ')


trainExamples = util.readExamples('switchboardsampleL.train')
valExamples = util.readExamples('switchboardsampleL.val')
testExamples = util.readExamples('switchboardsampleL.test')
vocabulary, freq_col_idx, regr = learnPredictor(trainExamples, valExamples,
                                                testExamples)
allPosNegBaseline(trainExamples, valExamples, testExamples)
realtimePredict(vocabulary, freq_col_idx, regr)
예제 #7
0
    trainPredict = regr.predict(trainX)

    print("coefficient of acoustic", regr.coef_)
    print("TRAIN Mean squared error", mean_squared_error(trainY, trainPredict))
    print("TRAIN Variance score", r2_score(trainY, trainPredict))

    devX, _, _ = extractFeatures(devExamples,
                                 vocab=vocabulary,
                                 frequent_ngram_col_idx=frequent_ngram_col_idx)
    devY = [y for x, y in devExamples]

    devX = np.array(devX)
    devY = np.reshape(np.array(devY), (len(devY), 1))
    print('DEV X shape', devX.shape)
    print('DEV Y shape', devY.shape)
    pickle.dump(devX, open('devX_b.pkl', 'wb'))
    pickle.dump(devY, open('devY_b.pkl', 'wb'))

    devPredict = regr.predict(devX)
    print("DEV Mean squared error", mean_squared_error(devY, devPredict))
    print("DEV Variance score", r2_score(devY, devPredict))

    return vocabulary, frequent_ngram_col_idx, regr


trainExamples = util.readExamples('moviesS_b.train')
devExamples = util.readExamples('moviesS_b.dev')
testExamples = util.readExamples('moviesS_b.test')

trainPredictor(trainExamples, devExamples)
예제 #8
0
            # main REPL loop
            response = raw_input("Press 's' to start: ")
            while response != 'q':
                print("press enter to stop recording")
                record_audio()
                print("audio recorded")
                transcript = get_transcript_from_file(credential)
                print("transcript: ", transcript)
                convert_audio_sample()

                test_dataset = load_dataset("laughbot_audio.test.pkl")
                feature_b, label_b, seqlens_b = make_batches(
                    test_dataset, batch_size=len(test_dataset[0]))
                feature_b = pad_all_batches(feature_b)
                batch_cost, summary, acc, predicted, acoustic = model.train_on_batch(
                    session,
                    feature_b[0],
                    label_b[0],
                    seqlens_b[0],
                    train=False)
                text = readExamples('laughbot_text.txt')
                prediction = predictLaughter(text, acoustic)
                if prediction[0] == 1:
                    play_laughtrack()
                else:
                    print('Not funny :(')
                response = raw_input("Press 'c' to continue, 'q' to quit: ")

            print('Thanks for talking to me')
예제 #9
0
파일: main.py 프로젝트: braca51e/cs221_ai
import submission, util
from collections import defaultdict

# Read in examples
trainExamples = util.readExamples('names.train')
devExamples = util.readExamples('names.dev')


def featureExtractor(x):
    # x = "took Mauritius into"
    phi = defaultdict(float)
    #phi[x] = 1
    tokens = x.split()
    left, entity, right = tokens[0], tokens[1:-1], tokens[-1]
    phi['entity is ' + ' '.join(entity)] = 1
    phi['left is ' + left] = 1
    phi['right is ' + right] = 1
    for word in entity:
        phi['entity contains ' + word] = 1
        phi['entity contains prefix ' + word[:4]] = 1
        phi['entity contains suffix ' + word[-4:]] = 1
    return phi


# Learn a predictor
weights = submission.learnPredictor(trainExamples, devExamples,
                                    featureExtractor, 30, 0.05)
util.outputWeights(weights, 'weights')
util.outputErrorAnalysis(devExamples, featureExtractor, weights,
                         'error-analysis')
예제 #10
0
        for x, y in trainExamples:
            phi = featureExtractor(x)
            # print phi
            # if y * score < 1 (wrong prediction) then calculate gradient loss then update weight for each feature
            margin = y*util.dotProduct(weights, phi)
            if (1-margin) > 0:
                indicator = 1
            else:
                indicator = 0
            scale = stepSize*indicator*y
            increment(weights, scale, phi)  

        # this uses the defined feature extractor to predict the classification of x
        def predictor(x):
            phi = featureExtractor(x)
            # create thresholds for different scores
            score = dotProduct(phi, weights)
            # return 1 if (dotProduct(phi, weights) > 0) else -1

        # Print out training and test error for every iteration:
        # print 'TRAINING ERROR:', util.evaluatePredictor(trainExamples, predictor)
        # print 'TEST ERROR:', util.evaluatePredictor(testExamples, predictor)
    return weights

# Run
trainReviews = util.readExamples('reviews.train')
testReviews = util.readExamples('reviews.dev')
featureExtractor = extractWordFeatures
weights = learnPredictor(trainReviews, testReviews, featureExtractor)

print 'output weights:', weights