def __init__(self): self._previous_note = None self._markovNote = MarkovChain( ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"]) self._timings = MarkovChain([1, 2, 4, 8, 16]) self._markovChord = MarkovChain( ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"])
class PoetryGenerator: def __init__(self, k, n): self.mc = MarkovChain() self.k = k self.n = n def train(self, filename): file = open(filename) words = [] for line in file: #words = words + re.compile("([\w]+|[.,!?;]+|[\n])").findall(line) words = words + re.compile("([\w.,!?;']+|[\n])").findall(line) print(words) for i in range(len(words) - self.k - 1): state = tuple(words[i:i+self.k]) next = words[i+self.k] self.mc.add_state(state, next) def generate(self): state = list(self.mc.get_random_tuple()) for w in state: print("{} ".format(w), end='') for i in range(0, self.n): gen_word = self.mc.get_state(tuple(state)) print("{} ".format(gen_word), end='') state = self.shift(state, 1) state[2] = gen_word def shift(self, l, n): return l[n:] + l[:n]
class TestMarkovChain(unittest.TestCase): def setUp(self): self.mc = MarkovChain("empty.txt") def test_empty(self): self.assertEqual(self.mc.printMatrix(), None) def test_updateModelSimple(self): self.mc.updateModel("Hello, my name is Andy!")
class MusicMatrix: def __init__(self): self._previous_note = None self._markovNote = MarkovChain( ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"]) self._timings = MarkovChain([1, 2, 4, 8, 16]) self._markovChord = MarkovChain( ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"]) def add(self, to_note): """Add a path from a note to another note. Re-adding a path between notes will increase the associated weight.""" if(self._previous_note is None): self._previous_note = to_note return from_note = self._previous_note self._markovNote.add(from_note[0], to_note[0]) self._timings.add(from_note[1], to_note[1]) self._markovChord.add(from_note[2], to_note[2]) self._previous_note = to_note def next_note(self, from_note): return [ self._markovNote.next_value( from_note[0]), self._timings.next_value( from_note[1]), self._markovChord.next_value(from_note[2])]
def jsonToMarkovChain(path, encoding=None): if encoding: file = io.open(path, 'r', encoding=encoding) else: file = open(path, 'r') data = json.load(file) if all(k in data for k in ARRAY_KEYS): return MarkovChain(data[SS], data[ID], data[TM]) else: return MarkovChain(data)
def __init__(self, user, filename, repliesFilename): """ user: The id of the user that you want to collect data from and that will be used in the app. filename: the name of the file that you want to store user's tweets (exluding replies) e.g: "data_tweets.json". Please insert {} in this file otherwise it'll crash. repliesFilename: the name of the file that you want to store user's tweets (only replies to other users) ex: "data_replies.json". Please insert {} in this file otherwise it'll crash. """ self._user = user self._stream = None self._filename = filename self._MarkovChain = MarkovChain(self._filename) self._repliesFilename = repliesFilename self._repliedUsers = {}
def parse_porn_csv(csv_file, title_index): markov_db_filename = './{csv}_markov_db'.format(csv=csv_file) if os.path.isfile('./{csv}_markov_db'.format(csv=csv_file)): return MarkovChain(markov_db_filename) porn_titles = StringIO.StringIO() with open(csv_file) as porn_csv: reader = csv.DictReader(porn_csv, delimiter='|', fieldnames=[ 'iframe', 'thumbnail', 'samples', 'title', 'tags', 'more_tags', 'unknown', 'length','views','likes','dislikes' ]) for row in reader: porn_titles.write(row['title'] + '.') mc = MarkovChain(markov_db_filename) mc.generateDatabase(porn_titles.getvalue()) mc.dumpdb() return mc
def initLeftRightMC(self, nStates, stateDuration=None): defaultDuration = 10.0 if nStates <= 1: print 'Number of states must be > 1' if stateDuration is None: stateDuration = defaultDuration if type(stateDuration) == float or type( stateDuration) == np.float64 or len(stateDuration) == 1: stateDuration = np.tile(stateDuration, (nStates, 1)) elif len(stateDuration) != nStates: print 'Incompatible length of state durations' minDiagProb = 0.1 D = np.maximum(np.ones((stateDuration.shape)), stateDuration) aii = np.maximum( np.ones((D.shape)) * minDiagProb, np.divide((D - 1), D)) aij = (1 - aii) aij = np.diagflat(aij, 1) aij = aij[0:nStates, :] A = np.concatenate((np.diagflat(aii), np.zeros( (nStates, 1))), axis=1) + aij p0 = np.concatenate((np.matrix([1]), np.zeros((nStates - 1, 1))), axis=0) mc = MarkovChain(p0, A) return mc
def __init__(self, soundfont_name=None): if soundfont_name is None: fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa') else: fluidsynth.init(soundfont_name, 'alsa') self.m_chain = MarkovChain(get_all_progressions()) self.sim = self.m_chain.infinite_progression() down1 = (0.7, 0.05, 0.2) down2 = (0.2, 0.05, 0.7) off = (0.0, 0.4, 0.1) self.bassproba = [down1, off, down2, off, down1, off, down2, off] self.current = self.m_chain.START
def test_backward(): initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]]) x = np.matrix([-0.2, 2.6, 1.3]) c = np.matrix([1.0, 0.1625, 0.8266, 0.0581]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) pX, logS = g1.prob(np.matrix([[g1], [g2]]), x) betaHat = mc.backward(mc, pX, c) print 'betaHat:', betaHat print 'expected: [1.0003 1.0393 0; 8.4182 9.3536 2.0822]' initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1], [0.1, 0.9]]) x = np.matrix([-0.2, 2.6, 1.3]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) pX, logS = g1.prob(np.matrix([g1, g2]), x) alphaHat, c = mc.forward(mc, pX) betaHat = mc.backward(mc, pX, c) print 'betaHat:', betaHat print 'expected: [1.0 6.798238264 1.125986646; 5.223087455 5.75095566 1.125986646]'
def generate_words(): my_file = open("./frost.txt", "r") lines = my_file.readlines() words_list = [] for line in lines: words = line.split() for word in words: words_list.append(word) # myhistogram = histogram() markovchain = MarkovChain(words_list) # sentence = "" # num_words = 10 # for i in range(num_words): # word = sample_by_frequency(myhistogram) # print(word) # sentence += " " + word return markovchain.walk(10)
def multi_dim_observation(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]])) g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def finite_duration(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def generate_dict(api, user_id): """Generates Dictionary, no RT no @""" tweets = list() for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items(): tweet_text = tweet._json['text'].encode('utf-8') #tweet_text = json.dumps(tweet).encode('utf-8') if not str.startswith(tweet_text, 'RT') and not str.startswith( tweet_text, '@'): tweets.append(tweet_text) markov_chain = MarkovChain(tweets) return markov_chain
class Snooki(object): def __init__(self, soundfont_name=None): if soundfont_name is None: fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa') else: fluidsynth.init(soundfont_name, 'alsa') self.m_chain = MarkovChain(get_all_progressions()) self.sim = self.m_chain.infinite_progression() down1 = (0.7, 0.05, 0.2) down2 = (0.2, 0.05, 0.7) off = (0.0, 0.4, 0.1) self.bassproba = [down1, off, down2, off, down1, off, down2, off] self.current = self.m_chain.START def _next_bar(self): prev = None while True: chord_bar = Bar() nxt_chord = next(self.sim) nxt_voiced = voice(prev, nxt_chord) prev = nxt_voiced chord_bar + NoteContainer(nxt_voiced) chord_bar[0][1] = 1 self.current = nxt_chord yield (chord_bar, bassline(nxt_chord, self.bassproba), drum_beat(self.bassproba)) def play(self): fluidsynth.set_instrument(1, 73) # chords fluidsynth.set_instrument(2, 32) # bass fluidsynth.set_instrument(3, 1, 128) # drums fluidsynth.main_volume(1, 50) fluidsynth.main_volume(2, 100) fluidsynth.main_volume(3, 30) for bars in self._next_bar(): fluidsynth.play_Bars(bars, [1, 2, 3], 110) yield self.current
def txtToMarkovChain(path, encoding=None): if encoding: file = io.open(path, 'r', encoding=encoding) else: file = open(path, 'r') builder = MarkovChain.builder() states = [] prec = None transitions = {} for i, line in enumerate(file): for j in range(len(line)): s = _state(i, j, line) if not s in transitions: transitions[s] = {} builder.addState(s) if i == j == 0: builder.setInitial(s, 1) if prec: if s not in transitions[prec]: transitions[prec][s] = 1 else: transitions[prec][s] += 1 prec = s for s1 in transitions: cases = 0 for s2 in transitions[s1]: cases += transitions[s1][s2] for s2 in transitions[s1]: builder.addTransition(s1, s2, Fraction(transitions[s1][s2], cases)) return builder.build()
def parse_porn_csv(csv_file, title_index): markov_db_filename = './{csv}_markov_db'.format(csv=csv_file) if os.path.isfile('./{csv}_markov_db'.format(csv=csv_file)): return MarkovChain(markov_db_filename) porn_titles = StringIO.StringIO() with open(csv_file) as porn_csv: reader = csv.DictReader(porn_csv, delimiter='|', fieldnames=[ 'iframe', 'thumbnail', 'samples', 'title', 'tags', 'more_tags', 'unknown', 'length', 'views', 'likes', 'dislikes' ]) for row in reader: porn_titles.write(row['title'] + '.') mc = MarkovChain(markov_db_filename) mc.generateDatabase(porn_titles.getvalue()) mc.dumpdb() return mc
def main(arg=None): """Main Function""" lines = [] arg = sys.argv if len(arg) < 2: lines = get_default_lines() else: input_file = open(arg[1], 'r') raw_lines = input_file.read() lines = raw_lines.strip().split('\n') print lines markov_chain = MarkovChain(lines) markov_dict = markov_chain.get_dictionary() print markov_dict print markov_chain.generate_line() print markov_chain.generate_line() print markov_chain.generate_line()
def test_forward(): initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) # output sequence x = np.matrix([-0.2, 2.6, 1.3]) pX, logS = g1.prob(np.matrix([g1, g2]), x) alphaHat, c = mc.forward(mc, pX) print 'alphaHat:', alphaHat, 'expected: [1 0.3847 0.4189; 0 0.6153 0.5811]' print 'c:', c, 'expected: [1 0.1625 0.8266 0.0581]' h = HMM(mc, np.matrix([[g1], [g2]])) # logP = P(X|h) logP = h.logprob(h, x) print 'logP: ', logP, 'expected: -9.1877' initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.0, 1.0, 0.0], [0.0, 0.7, 0.3]]) x = np.matrix([-0.2, 2.6, 1.3]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1])) h1 = HMM(mc, np.matrix([[g1], [g2]])) transitionMatrix = np.matrix([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5]]) mc2 = MarkovChain(initMatrix, transitionMatrix) g3 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g4 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1])) h2 = HMM(mc2, np.matrix([[g3], [g4]])) logP = h1.logprob(np.matrix([h1, h2]), x) print 'logP:', logP, 'expected: [-5.562463348 -6.345037882]'
nb_train = len(train_instances) # print(nb_train) test_data_path = data_dir + 'test_lines.txt' test_instances = MC_utils.read_instances_lines_from_file(test_data_path) nb_test = len(test_instances) # print(nb_test) print("---------------------@Build knowledge-------------------------------") MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = MC_utils.build_knowledge(train_instances+test_instances) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, 'transition_matrix_MC.npz') # print("Save model in ", saved_file) transition_matrix = sp.load_npz(saved_file) mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, transition_matrix, mc_order) if ex_file is not None: ex_instances = MC_utils.read_instances_lines_from_file(ex_file) else : ex_instances = test_instances for i in random.sample(ex_instances, nb_predict): elements = i.split('|') b_seq = elements[1:] # prev_basket = [item for item in re.split('[\\s]+',b_seq[-2].strip())] prev_item = [] for prev_basket in b_seq[:-1]: prev_item += re.split('[\\s]+', prev_basket.strip()) target_basket = [item for item in re.split('[\\s]+',b_seq[-1].strip())] topk_item = mc_model.top_predicted_item(prev_item, topk) correct_set = set(topk_item).intersection(set(target_basket))
class Bot: def __init__(self, user, filename, repliesFilename): """ user: The id of the user that you want to collect data from and that will be used in the app. filename: the name of the file that you want to store user's tweets (exluding replies) e.g: "data_tweets.json". Please insert {} in this file otherwise it'll crash. repliesFilename: the name of the file that you want to store user's tweets (only replies to other users) ex: "data_replies.json". Please insert {} in this file otherwise it'll crash. """ self._user = user self._stream = None self._filename = filename self._MarkovChain = MarkovChain(self._filename) self._repliesFilename = repliesFilename self._repliedUsers = {} # SETTER def CollectingUsersTweets(self, noReplies, count): """ Collects user's stream. noReplies: a boolean, True if you don't want replies, False if you want replies. count: a int between 1 and 200, this is the number of tweets that you want to load. Doesn't return anything. """ self._stream = api.GetUserTimeline(user_id=self._user, screen_name='', since_id='', max_id='', count=count, include_rts=False, trim_user=True, exclude_replies=noReplies) def StoringIntoJSON(self): """ Store user's stream (consider calling CollectingUsersTweets() before calling this one) into filename. Doesn't return anything. """ with open(self._filename, "r", encoding="utf-8") as read_file: tweets = json.load(read_file) for post in self._stream: text = post.full_text filteredtext = '' link = False if str(post.id) not in tweets.keys(): for i in range(len(text)): if (len(text)) - i >= 7: if text[i] == 'h' and text[i + 1] == 't' and text[ i + 2] == 't' and text[i + 3] == 'p' and text[ i + 4] == 's' and text[i + 5] == ':' and text[ i + 6] == '/' and text[i + 7] == '/': link = True if not link: filteredtext += text[i] if link: if text[i] == ' ' or text[i] == '\n': link = False if filteredtext != "": tweets[post.id] = { "text": filteredtext, "date": post.created_at } with open(self._filename, "w", encoding="utf-8") as write_file: json.dump(tweets, write_file) def test(self): File = open('Data.txt', 'w', encoding="utf-8") for post in self._stream: text = post.full_text filteredtext = '' link = False for i in range(len(text)): if (len(text)) - i >= 7: if text[i] == 'h' and text[i + 1] == 't' and text[ i + 2] == 't' and text[i + 3] == 'p' and text[ i + 4] == 's' and text[i + 5] == ':' and text[ i + 6] == '/' and text[i + 7] == '/': link = True if not link: filteredtext += text[i] if link: if text[i] == ' ' or text[i] == '\n': link = False File.write(filteredtext + '\n') File.write('\n') File.close() def StoringIntoJSONOnlyReplies(self): """ Store user's stream (consider calling CollectingUsersTweets() before calling this one) into filename. Doesn't return anything. """ with open(self._repliesFilename, "r", encoding="utf-8") as read_file: tweets = json.load(read_file) for post in self._stream: text = post.full_text filteredtext = '' link = False mention = False if str(post.id) not in tweets.keys( ) and post.in_reply_to_user_id != self._user and post.in_reply_to_user_id is not None: for i in range(len(text)): if (len(text)) - i >= 7: if text[i] == 'h' and text[i + 1] == 't' and text[ i + 2] == 't' and text[i + 3] == 'p' and text[ i + 4] == 's' and text[i + 5] == ':' and text[ i + 6] == '/' and text[i + 7] == '/': link = True if text[i] == '@': mention = True if not link and not mention: filteredtext += text[i] if link: if text[i] == ' ' or text[i] == '\n': link = False if mention: if text[i] == ' ' or text[i] == '\n': mention = False tweets[post.id] = { "text": filteredtext, "in_reply_to_user_id": str(post.in_reply_to_user_id) } with open(self._repliesFilename, "w", encoding="utf-8") as write_file: json.dump(tweets, write_file) def updateUserRepliedDictionnary(self): """ Update the dictionnary that contains the users that the user replied to. Doesn't return anything. """ with open(self._repliesFilename, "r", encoding="utf-8") as read_file: tweets = json.load(read_file) res = {} for tweet in tweets: if tweets[tweet]['in_reply_to_user_id'] in res: res[tweets[tweet]['in_reply_to_user_id']]["occurences"] += 1 else: res[tweets[tweet]['in_reply_to_user_id']] = {"occurences": 1} userToRemove = [] for User in res.keys(): if res[User]["occurences"] == 1: userToRemove.append(User) for User in userToRemove: res.pop(User) i = 0 for User in res.keys(): res[User]["screen_name"] = api.GetUser(User).screen_name os.system('cls' if os.name == 'nt' else 'clear') progressTable = Table(show_header=True, header_style="Green") progressTable.add_column("Progress: " + str(round(i / len(res) * 100)) + "%") progressTable.add_row(((round(i / len(res) * 100)) // 2) * '■' + (( (round(i / len(res) * 100) - 100) // 2) * -1) * '□') console.print(progressTable) i += 1 self._repliedUsers = res def refreshPorfilePicture(self): image_url = self.getUser(self._user).profile_image_url_https.replace( '_normal', '') img_data = requests.get(image_url).content with open('profile_picture.jpg', 'wb') as handler: handler.write(img_data) api.UpdateImage('profile_picture.jpg') def AjustCoef(self): with open(self._filename, "r", encoding="utf-8") as read_file: tweets = json.load(read_file) condition = True while condition: moyPoster = 0 moyBot = 0 for tweet in tweets: moyPoster += len(tweets[tweet]["text"]) moyPoster = moyPoster / len(tweets.keys()) for _ in range(100): moyBot += len(self._MarkovChain.generateTweet()) moyBot = moyBot / 100 if moyPoster > moyBot: self._MarkovChain.setCoef(self._MarkovChain.getCoef() + 0.01) elif moyPoster < moyBot: self._MarkovChain.setCoef(self._MarkovChain.getCoef() - 0.01) if abs(moyBot - moyPoster) < 2 and abs(moyPoster - moyBot) < 2: condition = False ''' print(moyPoster) print(moyBot) print(self._MarkovChain.getCoef()) ''' # GETTER def CollectLastTweetFromUser(self, user): """ user: the id of the user (int). return the tweet of the last user. """ lastTweet = api.GetUserTimeline(user_id=user, screen_name='', since_id='', max_id='', count=1, include_rts=False, trim_user=True, exclude_replies=True) return lastTweet[0] def CollectLastTweetFromSelf(self): """ user: the id of the user (int). return the tweet of the last user. """ lastTweet = api.GetUserTimeline(user_id=self._user, screen_name='', since_id='', max_id='', count=1, include_rts=False, trim_user=True, exclude_replies=True) return lastTweet[0] def getRepliedUserDictionnary(self): """ return the dictionnary that contains all users that has been replied by the user. """ return self._repliedUsers @staticmethod def getUser(Id): """ Id: the id of the user (int). return the user data. """ return api.GetUser(user_id=Id) def GenerateTweet(self): """ filename: the name of the file that you want to gather data ex:"data_tweets.json". MaxSententences: a int between 0 and infinite (you are limited to 140 char so putting 99999 musn't be a good idea). return a tweet generated by the Makov chain (str). """ return self._MarkovChain.generateTweet() def GenerateTweetWithKey(self, Key): """ return a tweet generated by the Makov chain (str). """ pass def isKeyInData(self, Key): with open(self._filename, "r", encoding="utf-8") as read_file: tweets = json.load(read_file) for tweet in tweets: if Key in tweets[tweet]["text"]: return True return False def getWaitingTime(self): tweets = api.GetUserTimeline(user_id=self._user, screen_name='', since_id='', max_id='', count=200, include_rts=False, trim_user=True, exclude_replies=True) a = self.strToDate(tweets[1].created_at) b = self.strToDate(tweets[0].created_at) d = 0 if (b - a).days > 1: d = (b - a).days return d * 86400 + (abs(b.hour - dt.datetime.now().hour) * 60 * 60) + randint(0, 1800) @staticmethod def strToDate(dateStr): months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 } year = int(dateStr[-4:]) month = months[dateStr[4:7]] day = int(dateStr[8:10]) hour = int(dateStr[10:13]) + 2 minute = int(dateStr[14:16]) second = int(dateStr[17:19]) return dt.datetime(year, month, day, hour, minute, second) # POSTER def PostTweet(self, tweet): """ tweet: str. post a tweet. """ api.PostUpdate(tweet) def PostReply(self, tweet, tweetToReplyID): """ tweet: a str with the @ of the user that you want to reply to at the beginning of the str. tweetToReply: tweet's id that you want to reply. post a reply to a tweet. """ api.PostUpdate(tweet, in_reply_to_status_id=tweetToReplyID)
from MarkovChain import MarkovChain if __name__ == "__main__": transition_matrix = [[0.8, 0.19, 0.01], [0.2, 0.7, 0.1], [0.1, 0.2, 0.7]] weather_chain = MarkovChain( transition_matrix=transition_matrix, states=[ 'GGG_GGG', 'GGG_BGG', 'GGG_GBG', 'GGG_GGB', 'GGG_GBB', 'GGG_BBG', 'GGG_BGB', 'GGG_BBB', 'GGB_GGG', 'GGB_BGG', 'GGB_GBG', 'GGB_GGB', 'GGB_GBB', 'GGB_BGB', 'GGB_BBG', 'GGB_BBB', 'GBG_GGG', 'GBG_BGG', 'GBG_GBG', 'GBG_GGB', 'GBG_GBB', 'BGG_BBG', 'BGG_BGB', 'BGG_BBB', 'BGG_GGG', 'BGG_BGG', 'BGG_GBG', 'BGG_GGB', 'BGG_GBB', 'BGG_BBG', 'BGG_BGB', 'BGG_BBB', 'GBB_GGG', 'GBB_BGG', 'GBB_GBG', 'GBB_GGB', 'GBB_GBB', 'GBB_BBG', 'GBB_BGB', 'GBB_BBB', 'BBG_GGG', 'BBG_BGG', 'BBG_GBG', 'BBG_GGB', 'BBG_GBB', 'BBG_BBG', 'BBG_BGB', 'BBG_BBB', 'BGB_GGG', 'BGB_BGG', 'BGB_GBG', 'BGB_GGB', 'BGB_GBB', 'BGB_BBG', 'BGB_BGB', 'BGB_BBB', 'BBB_GGG', 'BBB_BGG', 'BBB_GBG', 'BBB_GGB', 'BBB_GBB', 'BBB_BBG', 'BBB_BGB', 'BBB_BBB' ]) print(weather_chain.next_state(current_state='GGG_GGG')) print(weather_chain.next_state(current_state='Snowy')) weather_chain.generate_states(current_state='Snowy', no=10) #env = MarkovChain() #RL = QLearningTable(actions=list(range(env.n_actions))) #update()
def get_markov_chain(): if os.path.isfile(FILE): with open(FILE, 'rb') as pickle_file: return pickle.load(pickle_file) else: return MarkovChain()
def __init__(self, k, n): self.mc = MarkovChain() self.k = k self.n = n
train_instances + test_instances) print("Number item: ", len(item_dict)) transition_matrix = MC_utils.calculate_transition_matrix( train_instances, item_dict, item_freq_dict, reversed_item_dict, mc_order) print('Build knowledge done') sp_matrix_path = model_name + '_transition_matrix_MC.npz' # nb_item = len(item_dict) # print('Density : %.6f' % (transition_matrix.nnz * 1.0 / nb_item / nb_item)) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, sp_matrix_path) print("Save model in ", saved_file) sp.save_npz(saved_file, transition_matrix) mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, transition_matrix, mc_order) topk = 50 print('Predict to outfile') predict_file = os.path.join(o_dir, 'predict_' + model_name + '.txt') MC_utils.write_predict(predict_file, test_instances, topk, mc_model) print('Predict done') ground_truth, predict = MC_utils.read_predict(predict_file) for topk in [5, 10, 15, 20]: print("Top : ", topk) # hit_rate = MC_hit_ratio(test_instances, topk, mc_model) # recall = MC_recall(test_instances, topk, mc_model) hit_rate = MC_utils.hit_ratio(ground_truth, predict, topk) recall = MC_utils.recall(ground_truth, predict, topk) print("hit ratio: ", hit_rate) print("recall: ", recall)
def main(): # Load dataset with given txt file X = load_dataset(txt_name="player_name.txt") markov_object = MarkovChain(dataset=X) markov_object.markov_chain_process()
def setUp(self): self.mc = MarkovChain("empty.txt")
"GreatEspectations.txt", "TaleOfTwoCities.txt" ] for i in range(len(bookArray)): bookArray[i] = "/home/team2/Project/" + bookArray[i] #enter number of sentences here numSentences = 12 #enter word you would like to start with startWord = 'a' markovModel = DFMaker(bookArray) edgeDF = markovModel.createEdgeDF(sc) verticiesDF = markovModel.getVerticiesDF(sc) graphFrame = GraphFrame(verticiesDF, edgeDF) sentences = "" if verticiesDF[verticiesDF["id"] == startWord].collect() == []: print("The word you entered does not appear in any of the texts!") else: markovChain = MarkovChain(graphFrame, startWord) while numSentences > 0: currentState = markovChain.getState() if currentState in string.punctuation: numSentences -= 1 sentences += markovChain.getState() + " " markovChain.nextState() print(sentences) print(markovModel.sortedLikeliness("the"))
#!/usr/bin/env python3 from sys import argv import sqlite3 from datetime import datetime from MarkovChain import MarkovChain from config import COMMENT_FILE, MARKOV_DB, MIN_LEN, LOG_FILE mc = MarkovChain(MARKOV_DB) def makeDatabase(): with open(COMMENT_FILE, 'r') as f: mc.generateDatabase(f.read()) mc.dumpdb() def printComments(qty): for i in range(0, qty): x = mc.generateString() while len(x) < MIN_LEN: x = mc.generateString() print(str(i) + " - " + x[:140]) def singleComment(): comment = mc.generateString() while len(comment) < MIN_LEN: comment = mc.generateString() return comment