示例#1
0
def main():
    wordlist = InitializeWords()
    tweetFile = '../dataset_raw/semeval2016-task6-trainingdata.txt'
    tweetTags = '../processedTweets.txt'
    tweetWriter = open(tweetTags, 'w')
    tweets = oldWork.readTweets(tweetFile)
    tweets = tweets[:len(tweets) - 1]
    tweetClasses = tweets[len(tweets) - 1]
    for tweet in tweets:
        sentence = ''
        for word in tweet:
            splitWord = word
            # print word
            if len(word) > 0 and word[0] == '#' and word != '#semst':
                splitWord = ParseSentence(splitWord, wordlist)
                # print splitWord
            sentence += splitWord
            sentence += ' '
        # sentence = ' '.join(tweet[1:len(tweet)])
        sentence = ' '.join(sentence.split())
        tweetWriter.write(sentence + '\n')
def main():
	wordlist = InitializeWords()
	tweetFile = '../dataset_raw/semeval2016-task6-trainingdata.txt'
	tweetTags = '../processedTweets.txt'
	tweetWriter = open(tweetTags, 'w')
	tweets = oldWork.readTweets(tweetFile)
	tweets = tweets[:len(tweets) - 1]
	tweetClasses = tweets[len(tweets) - 1]
	for tweet in tweets:
		sentence = ''
		for word in tweet:
			splitWord = word
			# print word
			if len(word) > 0 and word[0] == '#' and word != '#semst':
				splitWord = ParseSentence(splitWord, wordlist)
				# print splitWord
			sentence += splitWord
			sentence += ' '
		# sentence = ' '.join(tweet[1:len(tweet)])
		sentence = ' '.join(sentence.split())
		tweetWriter.write(sentence + '\n')
import os
import sys
import IntermediateProjectWork as oldWork

origTweetFile = '../dataset_raw/semeval2016-task6-trainingdata.txt'

origTweets = oldWork.readTweets(origTweetFile)
origTweetClasses = origTweets[len(origTweets) - 1]
origTweets = origTweets[:len(origTweets) - 1]

origTweetReader = open(origTweetFile)

taggedTweetFile = '../tweetsTagged.txt'
taggedTweetReader = open(taggedTweetFile)

outputFile = '../dataset_raw/semeval2016-task6-edited-trainingdata.txt'
outputWriter = open(outputFile, 'w')

origLine = origTweetReader.readline()
outputWriter.write(origLine)
# print origTweets[0]
for taggedTweet in taggedTweetReader:
	print taggedTweet
	origLine = origTweetReader.readline()
	# print 'yo'
	origLineSplit = origLine.split('\t')
	# print origLineSplit
	outputWriter.write(origLineSplit[0] + '\t' + origLineSplit[1] + '\t')
	# print taggedTweet
	line = taggedTweet.strip().split('\t')
	tweetString = line[0]