示例#1
0
 def __init__(self):
     self._previous_note = None
     self._markovNote = MarkovChain(
         ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"])
     self._timings = MarkovChain([1, 2, 4, 8, 16])
     self._markovChord = MarkovChain(
         ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"])
示例#2
0
class PoetryGenerator:

    def __init__(self, k, n):
        self.mc = MarkovChain()
        self.k = k
        self.n = n

    def train(self, filename):
        file = open(filename)
        words = []
        for line in file:
            #words = words + re.compile("([\w]+|[.,!?;]+|[\n])").findall(line)
            words = words + re.compile("([\w.,!?;']+|[\n])").findall(line)

        print(words)
        for i in range(len(words) - self.k - 1):
            state = tuple(words[i:i+self.k])
            next = words[i+self.k]
            self.mc.add_state(state, next)

    def generate(self):
        state = list(self.mc.get_random_tuple())
        for w in state:
            print("{} ".format(w), end='')

        for i in range(0, self.n):
            gen_word = self.mc.get_state(tuple(state))
            print("{} ".format(gen_word), end='')
            state = self.shift(state, 1)
            state[2] = gen_word

    def shift(self, l, n):
        return l[n:] + l[:n]
示例#3
0
class TestMarkovChain(unittest.TestCase):
    def setUp(self):
        self.mc = MarkovChain("empty.txt")

    def test_empty(self):
        self.assertEqual(self.mc.printMatrix(), None)

    def test_updateModelSimple(self):
        self.mc.updateModel("Hello, my name is Andy!")
示例#4
0
class MusicMatrix:

    def __init__(self):
        self._previous_note = None
        self._markovNote = MarkovChain(
            ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"])
        self._timings = MarkovChain([1, 2, 4, 8, 16])
        self._markovChord = MarkovChain(
            ["a", "a#", "b", "c", "c#", "d", "d#", "e", "f", "f#", "g", "g#"])

    def add(self, to_note):
        """Add a path from a note to another note. Re-adding a path between
        notes will increase the associated weight."""
        if(self._previous_note is None):
            self._previous_note = to_note
            return
        from_note = self._previous_note
        self._markovNote.add(from_note[0], to_note[0])
        self._timings.add(from_note[1], to_note[1])
        self._markovChord.add(from_note[2], to_note[2])
        self._previous_note = to_note

    def next_note(self, from_note):
        return [
            self._markovNote.next_value(
                from_note[0]), self._timings.next_value(
                from_note[1]), self._markovChord.next_value(from_note[2])]
示例#5
0
def jsonToMarkovChain(path, encoding=None):

    if encoding:
        file = io.open(path, 'r', encoding=encoding)
    else:
        file = open(path, 'r')

    data = json.load(file)
    if all(k in data for k in ARRAY_KEYS):
        return MarkovChain(data[SS], data[ID], data[TM])
    else:
        return MarkovChain(data)
示例#6
0
 def __init__(self, user, filename, repliesFilename):
     """
     user: The id of the user that you want to collect data from and that will be used in the app.
     filename: the name of the file that you want to store user's tweets (exluding replies) e.g: "data_tweets.json". Please insert {} in this file otherwise it'll crash.
     repliesFilename: the name of the file that you want to store user's tweets (only replies to other users) ex: "data_replies.json". Please insert {} in this file otherwise it'll crash.
     """
     self._user = user
     self._stream = None
     self._filename = filename
     self._MarkovChain = MarkovChain(self._filename)
     self._repliesFilename = repliesFilename
     self._repliedUsers = {}
示例#7
0
def parse_porn_csv(csv_file, title_index):
    markov_db_filename = './{csv}_markov_db'.format(csv=csv_file)
    if os.path.isfile('./{csv}_markov_db'.format(csv=csv_file)):
        return MarkovChain(markov_db_filename)
    porn_titles = StringIO.StringIO()
    with open(csv_file) as porn_csv:
        reader = csv.DictReader(porn_csv, delimiter='|', fieldnames=[
            'iframe', 'thumbnail', 'samples', 'title', 'tags', 'more_tags', 'unknown', 'length','views','likes','dislikes'
        ])
        for row in reader:
            porn_titles.write(row['title'] + '.')
    mc = MarkovChain(markov_db_filename)
    mc.generateDatabase(porn_titles.getvalue())
    mc.dumpdb()
    return mc
示例#8
0
    def initLeftRightMC(self, nStates, stateDuration=None):
        defaultDuration = 10.0
        if nStates <= 1:
            print 'Number of states must be > 1'
        if stateDuration is None:
            stateDuration = defaultDuration
        if type(stateDuration) == float or type(
                stateDuration) == np.float64 or len(stateDuration) == 1:
            stateDuration = np.tile(stateDuration, (nStates, 1))
        elif len(stateDuration) != nStates:
            print 'Incompatible length of state durations'

        minDiagProb = 0.1
        D = np.maximum(np.ones((stateDuration.shape)), stateDuration)
        aii = np.maximum(
            np.ones((D.shape)) * minDiagProb, np.divide((D - 1), D))
        aij = (1 - aii)
        aij = np.diagflat(aij, 1)
        aij = aij[0:nStates, :]
        A = np.concatenate((np.diagflat(aii), np.zeros(
            (nStates, 1))), axis=1) + aij
        p0 = np.concatenate((np.matrix([1]), np.zeros((nStates - 1, 1))),
                            axis=0)

        mc = MarkovChain(p0, A)

        return mc
示例#9
0
    def __init__(self, soundfont_name=None):

        if soundfont_name is None:
            fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa')
        else:
            fluidsynth.init(soundfont_name, 'alsa')

        self.m_chain = MarkovChain(get_all_progressions())
        self.sim = self.m_chain.infinite_progression()

        down1 = (0.7, 0.05, 0.2)
        down2 = (0.2, 0.05, 0.7)
        off = (0.0, 0.4, 0.1)

        self.bassproba = [down1, off, down2, off, down1, off, down2, off]

        self.current = self.m_chain.START
示例#10
0
def test_backward():
    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]])
    x = np.matrix([-0.2, 2.6, 1.3])
    c = np.matrix([1.0, 0.1625, 0.8266, 0.0581])

    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))

    pX, logS = g1.prob(np.matrix([[g1], [g2]]), x)
    betaHat = mc.backward(mc, pX, c)
    print 'betaHat:', betaHat
    print 'expected: [1.0003 1.0393 0; 8.4182 9.3536 2.0822]'

    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.9, 0.1], [0.1, 0.9]])
    x = np.matrix([-0.2, 2.6, 1.3])

    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))

    pX, logS = g1.prob(np.matrix([g1, g2]), x)
    alphaHat, c = mc.forward(mc, pX)
    betaHat = mc.backward(mc, pX, c)
    print 'betaHat:', betaHat
    print 'expected: [1.0 6.798238264 1.125986646; 5.223087455 5.75095566 1.125986646]'
示例#11
0
def generate_words():
    my_file = open("./frost.txt", "r")
    lines = my_file.readlines()
    words_list = []
    for line in lines:
        words = line.split()
        for word in words:
            words_list.append(word)
    # myhistogram = histogram()
    markovchain = MarkovChain(words_list)
    # sentence = ""

    # num_words = 10
    # for i in range(num_words):
    #     word = sample_by_frequency(myhistogram)
    #     print(word)
    #     sentence += " " + word

    return markovchain.walk(10)
示例#12
0
def multi_dim_observation():
    initMatrix = np.matrix([[0.75], [0.25]])
    transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]])
    markovChain = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]]))
    g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]]))
    h = HMM(markovChain, np.matrix([[g1], [g2]]))
    [X, S] = h.rand(h, 100)

    return (X, S)
示例#13
0
def finite_duration():
    initMatrix = np.matrix([[0.75], [0.25]])
    transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]])
    markovChain = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))
    h = HMM(markovChain, np.matrix([[g1], [g2]]))
    [X, S] = h.rand(h, 100)

    return (X, S)
示例#14
0
def generate_dict(api, user_id):
    """Generates Dictionary, no RT no @"""

    tweets = list()
    for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items():
        tweet_text = tweet._json['text'].encode('utf-8')
        #tweet_text = json.dumps(tweet).encode('utf-8')
        if not str.startswith(tweet_text, 'RT') and not str.startswith(
                tweet_text, '@'):
            tweets.append(tweet_text)

    markov_chain = MarkovChain(tweets)
    return markov_chain
示例#15
0
class Snooki(object):

    def __init__(self, soundfont_name=None):

        if soundfont_name is None:
            fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa')
        else:
            fluidsynth.init(soundfont_name, 'alsa')

        self.m_chain = MarkovChain(get_all_progressions())
        self.sim = self.m_chain.infinite_progression()

        down1 = (0.7, 0.05, 0.2)
        down2 = (0.2, 0.05, 0.7)
        off = (0.0, 0.4, 0.1)

        self.bassproba = [down1, off, down2, off, down1, off, down2, off]

        self.current = self.m_chain.START

    def _next_bar(self):
        prev = None
        while True:
            chord_bar = Bar()

            nxt_chord = next(self.sim)

            nxt_voiced = voice(prev, nxt_chord)

            prev = nxt_voiced

            chord_bar + NoteContainer(nxt_voiced)

            chord_bar[0][1] = 1

            self.current = nxt_chord

            yield (chord_bar, bassline(nxt_chord, self.bassproba), drum_beat(self.bassproba))


    def play(self):
        fluidsynth.set_instrument(1, 73) # chords
        fluidsynth.set_instrument(2, 32) # bass
        fluidsynth.set_instrument(3, 1, 128) # drums
        fluidsynth.main_volume(1, 50)
        fluidsynth.main_volume(2, 100)
        fluidsynth.main_volume(3, 30)
        for bars in self._next_bar():
            fluidsynth.play_Bars(bars, [1, 2, 3], 110)
            yield self.current
示例#16
0
class Snooki(object):
    def __init__(self, soundfont_name=None):

        if soundfont_name is None:
            fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa')
        else:
            fluidsynth.init(soundfont_name, 'alsa')

        self.m_chain = MarkovChain(get_all_progressions())
        self.sim = self.m_chain.infinite_progression()

        down1 = (0.7, 0.05, 0.2)
        down2 = (0.2, 0.05, 0.7)
        off = (0.0, 0.4, 0.1)

        self.bassproba = [down1, off, down2, off, down1, off, down2, off]

        self.current = self.m_chain.START

    def _next_bar(self):
        prev = None
        while True:
            chord_bar = Bar()

            nxt_chord = next(self.sim)

            nxt_voiced = voice(prev, nxt_chord)

            prev = nxt_voiced

            chord_bar + NoteContainer(nxt_voiced)

            chord_bar[0][1] = 1

            self.current = nxt_chord

            yield (chord_bar, bassline(nxt_chord, self.bassproba),
                   drum_beat(self.bassproba))

    def play(self):
        fluidsynth.set_instrument(1, 73)  # chords
        fluidsynth.set_instrument(2, 32)  # bass
        fluidsynth.set_instrument(3, 1, 128)  # drums
        fluidsynth.main_volume(1, 50)
        fluidsynth.main_volume(2, 100)
        fluidsynth.main_volume(3, 30)
        for bars in self._next_bar():
            fluidsynth.play_Bars(bars, [1, 2, 3], 110)
            yield self.current
示例#17
0
    def __init__(self, soundfont_name=None):

        if soundfont_name is None:
            fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa')
        else:
            fluidsynth.init(soundfont_name, 'alsa')

        self.m_chain = MarkovChain(get_all_progressions())
        self.sim = self.m_chain.infinite_progression()

        down1 = (0.7, 0.05, 0.2)
        down2 = (0.2, 0.05, 0.7)
        off = (0.0, 0.4, 0.1)

        self.bassproba = [down1, off, down2, off, down1, off, down2, off]

        self.current = self.m_chain.START
示例#18
0
def txtToMarkovChain(path, encoding=None):

    if encoding:
        file = io.open(path, 'r', encoding=encoding)
    else:
        file = open(path, 'r')

    builder = MarkovChain.builder()

    states = []
    prec = None

    transitions = {}

    for i, line in enumerate(file):
        for j in range(len(line)):

            s = _state(i, j, line)

            if not s in transitions:
                transitions[s] = {}
                builder.addState(s)

            if i == j == 0:
                builder.setInitial(s, 1)

            if prec:
                if s not in transitions[prec]:
                    transitions[prec][s] = 1
                else:
                    transitions[prec][s] += 1

            prec = s

    for s1 in transitions:

        cases = 0

        for s2 in transitions[s1]:
            cases += transitions[s1][s2]

        for s2 in transitions[s1]:
            builder.addTransition(s1, s2, Fraction(transitions[s1][s2], cases))

    return builder.build()
示例#19
0
def parse_porn_csv(csv_file, title_index):
    markov_db_filename = './{csv}_markov_db'.format(csv=csv_file)
    if os.path.isfile('./{csv}_markov_db'.format(csv=csv_file)):
        return MarkovChain(markov_db_filename)
    porn_titles = StringIO.StringIO()
    with open(csv_file) as porn_csv:
        reader = csv.DictReader(porn_csv,
                                delimiter='|',
                                fieldnames=[
                                    'iframe', 'thumbnail', 'samples', 'title',
                                    'tags', 'more_tags', 'unknown', 'length',
                                    'views', 'likes', 'dislikes'
                                ])
        for row in reader:
            porn_titles.write(row['title'] + '.')
    mc = MarkovChain(markov_db_filename)
    mc.generateDatabase(porn_titles.getvalue())
    mc.dumpdb()
    return mc
示例#20
0
def main(arg=None):
    """Main Function"""

    lines = []
    arg = sys.argv

    if len(arg) < 2:
        lines = get_default_lines()
    else:
        input_file = open(arg[1], 'r')
        raw_lines = input_file.read()
        lines = raw_lines.strip().split('\n')

    print lines

    markov_chain = MarkovChain(lines)
    markov_dict = markov_chain.get_dictionary()
    print markov_dict
    print markov_chain.generate_line()
    print markov_chain.generate_line()
    print markov_chain.generate_line()
示例#21
0
def test_forward():

    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]])
    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))

    # output sequence
    x = np.matrix([-0.2, 2.6, 1.3])
    pX, logS = g1.prob(np.matrix([g1, g2]), x)
    alphaHat, c = mc.forward(mc, pX)
    print 'alphaHat:', alphaHat, 'expected: [1 0.3847 0.4189; 0 0.6153 0.5811]'
    print 'c:', c, 'expected: [1 0.1625 0.8266 0.0581]'

    h = HMM(mc, np.matrix([[g1], [g2]]))
    # logP = P(X|h)
    logP = h.logprob(h, x)
    print 'logP: ', logP, 'expected: -9.1877'

    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.0, 1.0, 0.0], [0.0, 0.7, 0.3]])
    x = np.matrix([-0.2, 2.6, 1.3])
    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1]))
    h1 = HMM(mc, np.matrix([[g1], [g2]]))

    transitionMatrix = np.matrix([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5]])
    mc2 = MarkovChain(initMatrix, transitionMatrix)
    g3 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g4 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1]))
    h2 = HMM(mc2, np.matrix([[g3], [g4]]))

    logP = h1.logprob(np.matrix([h1, h2]), x)
    print 'logP:', logP, 'expected: [-5.562463348 -6.345037882]'
    nb_train = len(train_instances)
    # print(nb_train)

    test_data_path = data_dir + 'test_lines.txt'
    test_instances = MC_utils.read_instances_lines_from_file(test_data_path)
    nb_test = len(test_instances)
    # print(nb_test)
    print("---------------------@Build knowledge-------------------------------")
    MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = MC_utils.build_knowledge(train_instances+test_instances)

    if not os.path.exists(o_dir):
        os.makedirs(o_dir)
    saved_file = os.path.join(o_dir, 'transition_matrix_MC.npz')
    # print("Save model in ", saved_file)
    transition_matrix = sp.load_npz(saved_file)
    mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, transition_matrix, mc_order)

    if ex_file is not None:
        ex_instances = MC_utils.read_instances_lines_from_file(ex_file)
    else :
        ex_instances = test_instances
    for i in random.sample(ex_instances, nb_predict):
        elements = i.split('|')
        b_seq = elements[1:]
        # prev_basket = [item for item in re.split('[\\s]+',b_seq[-2].strip())]
        prev_item = []
        for prev_basket in b_seq[:-1]:
            prev_item += re.split('[\\s]+', prev_basket.strip())
        target_basket = [item for item in re.split('[\\s]+',b_seq[-1].strip())]
        topk_item = mc_model.top_predicted_item(prev_item, topk)
        correct_set = set(topk_item).intersection(set(target_basket))
示例#23
0
class Bot:
    def __init__(self, user, filename, repliesFilename):
        """
        user: The id of the user that you want to collect data from and that will be used in the app.
        filename: the name of the file that you want to store user's tweets (exluding replies) e.g: "data_tweets.json". Please insert {} in this file otherwise it'll crash.
        repliesFilename: the name of the file that you want to store user's tweets (only replies to other users) ex: "data_replies.json". Please insert {} in this file otherwise it'll crash.
        """
        self._user = user
        self._stream = None
        self._filename = filename
        self._MarkovChain = MarkovChain(self._filename)
        self._repliesFilename = repliesFilename
        self._repliedUsers = {}

    # SETTER
    def CollectingUsersTweets(self, noReplies, count):
        """
        Collects user's stream.
        noReplies: a boolean, True if you don't want replies, False if you want replies.
        count: a int between 1 and 200, this is the number of tweets that you want to load.
        Doesn't return anything.
        """
        self._stream = api.GetUserTimeline(user_id=self._user,
                                           screen_name='',
                                           since_id='',
                                           max_id='',
                                           count=count,
                                           include_rts=False,
                                           trim_user=True,
                                           exclude_replies=noReplies)

    def StoringIntoJSON(self):
        """
        Store user's stream (consider calling CollectingUsersTweets() before calling this one) into filename.
        Doesn't return anything.
        """
        with open(self._filename, "r", encoding="utf-8") as read_file:
            tweets = json.load(read_file)
        for post in self._stream:
            text = post.full_text
            filteredtext = ''
            link = False
            if str(post.id) not in tweets.keys():
                for i in range(len(text)):
                    if (len(text)) - i >= 7:
                        if text[i] == 'h' and text[i + 1] == 't' and text[
                                i + 2] == 't' and text[i + 3] == 'p' and text[
                                    i +
                                    4] == 's' and text[i + 5] == ':' and text[
                                        i + 6] == '/' and text[i + 7] == '/':
                            link = True
                    if not link:
                        filteredtext += text[i]
                    if link:
                        if text[i] == ' ' or text[i] == '\n':
                            link = False
                if filteredtext != "":
                    tweets[post.id] = {
                        "text": filteredtext,
                        "date": post.created_at
                    }
        with open(self._filename, "w", encoding="utf-8") as write_file:
            json.dump(tweets, write_file)

    def test(self):
        File = open('Data.txt', 'w', encoding="utf-8")
        for post in self._stream:
            text = post.full_text
            filteredtext = ''
            link = False
            for i in range(len(text)):
                if (len(text)) - i >= 7:
                    if text[i] == 'h' and text[i + 1] == 't' and text[
                            i + 2] == 't' and text[i + 3] == 'p' and text[
                                i + 4] == 's' and text[i + 5] == ':' and text[
                                    i + 6] == '/' and text[i + 7] == '/':
                        link = True
                if not link:
                    filteredtext += text[i]
                if link:
                    if text[i] == ' ' or text[i] == '\n':
                        link = False
            File.write(filteredtext + '\n')
        File.write('\n')
        File.close()

    def StoringIntoJSONOnlyReplies(self):
        """
        Store user's stream (consider calling CollectingUsersTweets() before calling this one) into filename.
        Doesn't return anything.
        """
        with open(self._repliesFilename, "r", encoding="utf-8") as read_file:
            tweets = json.load(read_file)
        for post in self._stream:
            text = post.full_text
            filteredtext = ''
            link = False
            mention = False
            if str(post.id) not in tweets.keys(
            ) and post.in_reply_to_user_id != self._user and post.in_reply_to_user_id is not None:
                for i in range(len(text)):
                    if (len(text)) - i >= 7:
                        if text[i] == 'h' and text[i + 1] == 't' and text[
                                i + 2] == 't' and text[i + 3] == 'p' and text[
                                    i +
                                    4] == 's' and text[i + 5] == ':' and text[
                                        i + 6] == '/' and text[i + 7] == '/':
                            link = True
                    if text[i] == '@':
                        mention = True
                    if not link and not mention:
                        filteredtext += text[i]
                    if link:
                        if text[i] == ' ' or text[i] == '\n':
                            link = False
                    if mention:
                        if text[i] == ' ' or text[i] == '\n':
                            mention = False
                tweets[post.id] = {
                    "text": filteredtext,
                    "in_reply_to_user_id": str(post.in_reply_to_user_id)
                }
        with open(self._repliesFilename, "w", encoding="utf-8") as write_file:
            json.dump(tweets, write_file)

    def updateUserRepliedDictionnary(self):
        """
        Update the dictionnary that contains the users that the user replied to.
        Doesn't return anything.
        """
        with open(self._repliesFilename, "r", encoding="utf-8") as read_file:
            tweets = json.load(read_file)
        res = {}
        for tweet in tweets:
            if tweets[tweet]['in_reply_to_user_id'] in res:
                res[tweets[tweet]['in_reply_to_user_id']]["occurences"] += 1
            else:
                res[tweets[tweet]['in_reply_to_user_id']] = {"occurences": 1}
        userToRemove = []
        for User in res.keys():
            if res[User]["occurences"] == 1:
                userToRemove.append(User)
        for User in userToRemove:
            res.pop(User)

        i = 0
        for User in res.keys():
            res[User]["screen_name"] = api.GetUser(User).screen_name
            os.system('cls' if os.name == 'nt' else 'clear')
            progressTable = Table(show_header=True, header_style="Green")
            progressTable.add_column("Progress: " +
                                     str(round(i / len(res) * 100)) + "%")
            progressTable.add_row(((round(i / len(res) * 100)) // 2) * '■' + ((
                (round(i / len(res) * 100) - 100) // 2) * -1) * '□')
            console.print(progressTable)
            i += 1
        self._repliedUsers = res

    def refreshPorfilePicture(self):
        image_url = self.getUser(self._user).profile_image_url_https.replace(
            '_normal', '')
        img_data = requests.get(image_url).content
        with open('profile_picture.jpg', 'wb') as handler:
            handler.write(img_data)
        api.UpdateImage('profile_picture.jpg')

    def AjustCoef(self):
        with open(self._filename, "r", encoding="utf-8") as read_file:
            tweets = json.load(read_file)
        condition = True
        while condition:
            moyPoster = 0
            moyBot = 0
            for tweet in tweets:
                moyPoster += len(tweets[tweet]["text"])
            moyPoster = moyPoster / len(tweets.keys())
            for _ in range(100):
                moyBot += len(self._MarkovChain.generateTweet())
            moyBot = moyBot / 100
            if moyPoster > moyBot:
                self._MarkovChain.setCoef(self._MarkovChain.getCoef() + 0.01)
            elif moyPoster < moyBot:
                self._MarkovChain.setCoef(self._MarkovChain.getCoef() - 0.01)
            if abs(moyBot - moyPoster) < 2 and abs(moyPoster - moyBot) < 2:
                condition = False
            '''    
            print(moyPoster)
            print(moyBot)
            print(self._MarkovChain.getCoef())
            '''

    # GETTER
    def CollectLastTweetFromUser(self, user):
        """
        user: the id of the user (int).
        return the tweet of the last user.
        """
        lastTweet = api.GetUserTimeline(user_id=user,
                                        screen_name='',
                                        since_id='',
                                        max_id='',
                                        count=1,
                                        include_rts=False,
                                        trim_user=True,
                                        exclude_replies=True)
        return lastTweet[0]

    def CollectLastTweetFromSelf(self):
        """
        user: the id of the user (int).
        return the tweet of the last user.
        """
        lastTweet = api.GetUserTimeline(user_id=self._user,
                                        screen_name='',
                                        since_id='',
                                        max_id='',
                                        count=1,
                                        include_rts=False,
                                        trim_user=True,
                                        exclude_replies=True)
        return lastTweet[0]

    def getRepliedUserDictionnary(self):
        """
        return the dictionnary that contains all users that has been replied by the user.
        """
        return self._repliedUsers

    @staticmethod
    def getUser(Id):
        """
        Id: the id of the user (int).
        return the user data.
        """
        return api.GetUser(user_id=Id)

    def GenerateTweet(self):
        """
        filename: the name of the file that you want to gather data ex:"data_tweets.json".
        MaxSententences: a int between 0 and infinite (you are limited to 140 char so putting 99999 musn't be a good idea).
        return a tweet generated by the Makov chain (str).
        """
        return self._MarkovChain.generateTweet()

    def GenerateTweetWithKey(self, Key):
        """
        return a tweet generated by the Makov chain (str).
        """
        pass

    def isKeyInData(self, Key):
        with open(self._filename, "r", encoding="utf-8") as read_file:
            tweets = json.load(read_file)
        for tweet in tweets:
            if Key in tweets[tweet]["text"]:
                return True
        return False

    def getWaitingTime(self):
        tweets = api.GetUserTimeline(user_id=self._user,
                                     screen_name='',
                                     since_id='',
                                     max_id='',
                                     count=200,
                                     include_rts=False,
                                     trim_user=True,
                                     exclude_replies=True)
        a = self.strToDate(tweets[1].created_at)
        b = self.strToDate(tweets[0].created_at)
        d = 0
        if (b - a).days > 1:
            d = (b - a).days
        return d * 86400 + (abs(b.hour - dt.datetime.now().hour) * 60 *
                            60) + randint(0, 1800)

    @staticmethod
    def strToDate(dateStr):
        months = {
            "Jan": 1,
            "Feb": 2,
            "Mar": 3,
            "Apr": 4,
            "May": 5,
            "Jun": 6,
            "Jul": 7,
            "Aug": 8,
            "Sep": 9,
            "Oct": 10,
            "Nov": 11,
            "Dec": 12
        }
        year = int(dateStr[-4:])
        month = months[dateStr[4:7]]
        day = int(dateStr[8:10])
        hour = int(dateStr[10:13]) + 2
        minute = int(dateStr[14:16])
        second = int(dateStr[17:19])
        return dt.datetime(year, month, day, hour, minute, second)

        # POSTER

    def PostTweet(self, tweet):
        """
        tweet: str.
        post a tweet.
        """
        api.PostUpdate(tweet)

    def PostReply(self, tweet, tweetToReplyID):
        """
        tweet: a str with the @ of the user that you want to reply to at the beginning of the str.
        tweetToReply: tweet's id that you want to reply.
        post a reply to a tweet.
        """
        api.PostUpdate(tweet, in_reply_to_status_id=tweetToReplyID)
示例#24
0
from MarkovChain import MarkovChain

if __name__ == "__main__":
    transition_matrix = [[0.8, 0.19, 0.01], [0.2, 0.7, 0.1], [0.1, 0.2, 0.7]]

    weather_chain = MarkovChain(
        transition_matrix=transition_matrix,
        states=[
            'GGG_GGG', 'GGG_BGG', 'GGG_GBG', 'GGG_GGB', 'GGG_GBB', 'GGG_BBG',
            'GGG_BGB', 'GGG_BBB', 'GGB_GGG', 'GGB_BGG', 'GGB_GBG', 'GGB_GGB',
            'GGB_GBB', 'GGB_BGB', 'GGB_BBG', 'GGB_BBB', 'GBG_GGG', 'GBG_BGG',
            'GBG_GBG', 'GBG_GGB', 'GBG_GBB', 'BGG_BBG', 'BGG_BGB', 'BGG_BBB',
            'BGG_GGG', 'BGG_BGG', 'BGG_GBG', 'BGG_GGB', 'BGG_GBB', 'BGG_BBG',
            'BGG_BGB', 'BGG_BBB', 'GBB_GGG', 'GBB_BGG', 'GBB_GBG', 'GBB_GGB',
            'GBB_GBB', 'GBB_BBG', 'GBB_BGB', 'GBB_BBB', 'BBG_GGG', 'BBG_BGG',
            'BBG_GBG', 'BBG_GGB', 'BBG_GBB', 'BBG_BBG', 'BBG_BGB', 'BBG_BBB',
            'BGB_GGG', 'BGB_BGG', 'BGB_GBG', 'BGB_GGB', 'BGB_GBB', 'BGB_BBG',
            'BGB_BGB', 'BGB_BBB', 'BBB_GGG', 'BBB_BGG', 'BBB_GBG', 'BBB_GGB',
            'BBB_GBB', 'BBB_BBG', 'BBB_BGB', 'BBB_BBB'
        ])

    print(weather_chain.next_state(current_state='GGG_GGG'))

    print(weather_chain.next_state(current_state='Snowy'))

    weather_chain.generate_states(current_state='Snowy', no=10)

    #env = MarkovChain()

    #RL = QLearningTable(actions=list(range(env.n_actions)))
    #update()
def get_markov_chain():
    if os.path.isfile(FILE):
        with open(FILE, 'rb') as pickle_file:
            return pickle.load(pickle_file)
    else:
        return MarkovChain()
示例#26
0
 def __init__(self, k, n):
     self.mc = MarkovChain()
     self.k = k
     self.n = n
示例#27
0
        train_instances + test_instances)
    print("Number item: ", len(item_dict))
    transition_matrix = MC_utils.calculate_transition_matrix(
        train_instances, item_dict, item_freq_dict, reversed_item_dict,
        mc_order)
    print('Build knowledge done')
    sp_matrix_path = model_name + '_transition_matrix_MC.npz'
    # nb_item = len(item_dict)
    # print('Density : %.6f' % (transition_matrix.nnz * 1.0 / nb_item / nb_item))
    if not os.path.exists(o_dir):
        os.makedirs(o_dir)
    saved_file = os.path.join(o_dir, sp_matrix_path)
    print("Save model in ", saved_file)
    sp.save_npz(saved_file, transition_matrix)

    mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict,
                           transition_matrix, mc_order)
    topk = 50
    print('Predict to outfile')
    predict_file = os.path.join(o_dir, 'predict_' + model_name + '.txt')
    MC_utils.write_predict(predict_file, test_instances, topk, mc_model)
    print('Predict done')
    ground_truth, predict = MC_utils.read_predict(predict_file)
    for topk in [5, 10, 15, 20]:
        print("Top : ", topk)
        # hit_rate = MC_hit_ratio(test_instances, topk, mc_model)
        # recall = MC_recall(test_instances, topk, mc_model)
        hit_rate = MC_utils.hit_ratio(ground_truth, predict, topk)
        recall = MC_utils.recall(ground_truth, predict, topk)
        print("hit ratio: ", hit_rate)
        print("recall: ", recall)
def main():

    # Load dataset with given txt file
    X = load_dataset(txt_name="player_name.txt")
    markov_object = MarkovChain(dataset=X)
    markov_object.markov_chain_process()
示例#29
0
 def setUp(self):
     self.mc = MarkovChain("empty.txt")
示例#30
0
    "GreatEspectations.txt", "TaleOfTwoCities.txt"
]
for i in range(len(bookArray)):
    bookArray[i] = "/home/team2/Project/" + bookArray[i]

#enter number of sentences here
numSentences = 12

#enter word you would like to start with
startWord = 'a'

markovModel = DFMaker(bookArray)
edgeDF = markovModel.createEdgeDF(sc)
verticiesDF = markovModel.getVerticiesDF(sc)
graphFrame = GraphFrame(verticiesDF, edgeDF)

sentences = ""
if verticiesDF[verticiesDF["id"] == startWord].collect() == []:
    print("The word you entered does not appear in any of the texts!")
else:
    markovChain = MarkovChain(graphFrame, startWord)
    while numSentences > 0:
        currentState = markovChain.getState()
        if currentState in string.punctuation:
            numSentences -= 1
        sentences += markovChain.getState() + " "
        markovChain.nextState()

print(sentences)
print(markovModel.sortedLikeliness("the"))
#!/usr/bin/env python3
from sys import argv
import sqlite3
from datetime import datetime
from MarkovChain import MarkovChain
from config import COMMENT_FILE, MARKOV_DB, MIN_LEN, LOG_FILE

mc = MarkovChain(MARKOV_DB)


def makeDatabase():
    with open(COMMENT_FILE, 'r') as f:
        mc.generateDatabase(f.read())
        mc.dumpdb()


def printComments(qty):
    for i in range(0, qty):
        x = mc.generateString()
        while len(x) < MIN_LEN:
            x = mc.generateString()
        print(str(i) + " - " + x[:140])


def singleComment():
    comment = mc.generateString()
    while len(comment) < MIN_LEN:
        comment = mc.generateString()
    return comment