Пример #1
0
    def from_json(cls, json_thing):
        """
        Given a JSON string or object, deserialize Chain.
        To load from file, use `POSMarkovModel.from_file`
        """

        if isinstance(json_thing,str):
            obj = json.loads(json_thing)
        else:
            obj = json_thing

        # We cannot simply call the Chain classmethod because of the extra
        #  attribute we added

        if isinstance(obj, list):
            rehydrated = dict((tuple(item[0]), item[1]) for item in obj)
        elif isinstance(obj, dict):
            rehydrated = obj
        else:
            raise ValueError('Object should be dict or list')

        # Pop bind_char for instantiating model
        pos_bind_char = rehydrated.pop(('_POSMarkovModel_pos_bind_char',))

        # allow markovify to reyhdrate its trash-ass Chain
        model = Chain.from_json(rehydrated)
        state_size = model.state_size

        return cls(state_size, pos_bind_char, model)
Пример #2
0
def main(input_file, output_file):
    """Makes things happen."""
    corpus = []
    lines_processed = 0

    for line in input_file:
        lines_processed += 1
        try:
            corpus.append(
                rusyll.word_to_syllables_safe_wd(line.strip().lower()))
        except AssertionError:
            pass

    chain = Chain(corpus, 1)
    output_file.write(chain.to_json())

    print("{} lines processed. {} words added.".format(lines_processed,
                                                       len(corpus)))
Пример #3
0
def make_model(model_file, state_size, key):
    init_settings()
    chorales = bach_chorales()
    chorales = filter_by_time_signature(chorales)
    if key:
        chorales = filter_by_key(chorales, key)
    training = []

    for chorale in chorales:
        sop = notes_and_durations(chorale, Voice.Soprano)
        alt = notes_and_durations(chorale, Voice.Alto)
        ten = notes_and_durations(chorale, Voice.Tenor)
        bas = notes_and_durations(chorale, Voice.Bass)
        states = [str(state) for state in gen_states(sop, alt, ten, bas)]
        training.append(states)

    chain = Chain(training, state_size=state_size)
    with open(model_file, 'w') as f:
        f.write(chain.to_json())
Пример #4
0
def make_music(model_file):
    with open(model_file) as f:
        model = json.load(f)

    chain = Chain.from_json(model)

    score = stream.Score()
    soprano_part = stream.Part()
    soprano_part.insert(0, instrument.Soprano())
    alto_part = stream.Part()
    alto_part.insert(0, instrument.Alto())
    tenor_part = stream.Part()
    tenor_part.insert(0, instrument.Tenor())
    bass_part = stream.Part()
    bass_part.insert(0, instrument.Bass())

    counter = {
        Voice.Soprano: Decimal(0.),
        Voice.Alto: Decimal(0.),
        Voice.Tenor: Decimal(0.),
        Voice.Bass: Decimal(0.),
    }
    current_state = {
        Voice.Soprano: None,
        Voice.Alto: None,
        Voice.Tenor: None,
        Voice.Bass: None,
    }
    parts = {
        Voice.Soprano: soprano_part,
        Voice.Alto: alto_part,
        Voice.Tenor: tenor_part,
        Voice.Bass: bass_part,
    }

    for state in chain.walk():
        S, A, T, B = make_tuple(state)
        current_state[Voice.Soprano] = S
        current_state[Voice.Alto] = A
        current_state[Voice.Tenor] = T
        current_state[Voice.Bass] = B
        min_value = min(counter.values())
        min_voices = [k for k in counter if counter[k] == min_value]
        for voice in min_voices:
            pitch, d = current_state[voice]
            if pitch == 'rest':
                n = note.Rest(duration=duration.Duration(d))
            else:
                n = note.Note(pitch, duration=duration.Duration(d))
            parts[voice].append(n)
            counter[voice] += Decimal(d)
    for k, v in parts.items():
        score.insert(Voice.order(k), v)
    score.show()
Пример #5
0
def test_main(tmpdir):
    path = tmpdir.mkdir("sub")
    input_file = path.join("test_dict_1.txt")
    output_file = path.join("test_json_1.json")

    input_file.write("Аба\n" + "аБаба \n" + "аб-ба\n" + "zвеве\n" + "ве ве\n" + "ве.\n")

    corpus = [
        ["а", "ба"],
        ["а", "ба", "ба"],
        ["аб", "-", "ба"],
    ]
    chain = Chain(corpus, 1)

    runner = CliRunner()
    result = runner.invoke(corpus_parser.main, [str(input_file), str(output_file)])
    assert "processed" in result.output
    assert result.exit_code == 0
    with open(str(output_file), "r") as test_json_1:
        assert test_json_1.read() == chain.to_json()
Пример #6
0
class MarkovChain(object):
    def __init__(self, state_size=3, pos_bind_char='::'):
        self.state_size = state_size
        self.pos_bind_char = pos_bind_char

    def train_model(self, corpus):
        self.model = Chain(corpus, self.state_size)

        return self

    def update(self, corpus, contribution=1):
        new_model = Chain(corpus, self.state_size)
        self.model = combine([self.model, new_model], [1, contribution])

        return self

    def make_response(self, init_state=None, n_sentences=100):

        sentences = []
        while (len(sentences) < n_sentences):
            sentence_i = self.model.walk(init_state=init_state)
            if sentence_i is None:
                continue
            else:
                sentence_i = self._prune_pos_tags(sentence_i,
                                                  self.pos_bind_char)
                sentences.append(sentence_i)

        return sentences

    def _prune_pos_tags(self, sentence, pos_bind_char):
        untagged_sentence = list()
        for i, token in enumerate(sentence):
            if self.pos_bind_char in token:
                token = token.split(self.pos_bind_char)[0]
            if i == 0 or token in string.punctuation:
                untagged_sentence.append(token)
            else:
                token = ' ' + token
                untagged_sentence.append(token)

        return ''.join(untagged_sentence)
Пример #7
0
    def update(self, corpus, contribution=1):
        new_model = Chain(corpus, self.state_size)
        self.model = combine([self.model, new_model], [1, contribution])

        return self
Пример #8
0
    def train_model(self, corpus):
        self.model = Chain(corpus, self.state_size)

        return self
Пример #9
0
 def create_markov_chain(self):
     # Set the markov chain
     self.markov_chain = Chain(self.tokens, state_size=2)
Пример #10
0
class POSMarkovModel(object):
    def __init__(self, state_size=3, pos_bind_char='::', model=None):
        self.state_size = state_size
        self.pos_bind_char = pos_bind_char
        # Allow for model to already have been fit
        self.model = model

    def fit(self, corpus):
        self.model = Chain(corpus, self.state_size)
        return self
        
    def update(self, corpus, contribution=1):
        # train a new model and merge it with old
        new_model = Chain(corpus, self.state_size)
        self.model = combine([self.model, new_model], [1, contribution])
        return self
        
    def make_response(self, init_state=None, n_sentences=1):
        sentences = list()
        for i in range(0, n_sentences):
            words = self.model.walk(init_state)
            sentence_i = self._prune_pos_tags(words, self.pos_bind_char)
            sentences.append(sentence_i)
        if len(sentences) > 1:
            response = ' '.join(sentences)
        else:
            response = sentences[0]
        return response
        
    def _prune_pos_tags(self, sentence, pos_bind_char):
        untagged_sentence = list()
        for i, token in enumerate(sentence):
            if self.pos_bind_char in token:
                token = token.split(self.pos_bind_char)[0]
            if i == 0 or token in string.punctuation:
                untagged_sentence.append(token)
            else:
                token = ' ' + token
                untagged_sentence.append(token)
        return ''.join(untagged_sentence)

    def to_json(self,filename=None):
        """
        Dump underlying model as json.  If no filename, return str.
        """

        if self.model is None:
            raise RuntimeError('Cannot dump to json before fitting model')
        
        # self.model is Chain object, self.model.model is underlying dict
        modeldict = self.model.model

        # add pos_bind_char attribute
        modeldict[('_POSMarkovModel_pos_bind_char',)] = self.pos_bind_char

        if not filename:
            return json.dumps(list(modeldict.items()))
        else:
            with open(filename,'w') as f:
                json.dump(list(modeldict.items()),f)
            return

    @classmethod
    def from_json(cls, json_thing):
        """
        Given a JSON string or object, deserialize Chain.
        To load from file, use `POSMarkovModel.from_file`
        """

        if isinstance(json_thing,str):
            obj = json.loads(json_thing)
        else:
            obj = json_thing

        # We cannot simply call the Chain classmethod because of the extra
        #  attribute we added

        if isinstance(obj, list):
            rehydrated = dict((tuple(item[0]), item[1]) for item in obj)
        elif isinstance(obj, dict):
            rehydrated = obj
        else:
            raise ValueError('Object should be dict or list')

        # Pop bind_char for instantiating model
        pos_bind_char = rehydrated.pop(('_POSMarkovModel_pos_bind_char',))

        # allow markovify to reyhdrate its trash-ass Chain
        model = Chain.from_json(rehydrated)
        state_size = model.state_size

        return cls(state_size, pos_bind_char, model)

    @classmethod
    def from_file(cls, jsonfile):
        """
        Load model from JSON file
        """
        with open(jsonfile, 'r') as f:
            json_thing = json.load(f)
        return POSMarkovModel.from_json(json_thing)
Пример #11
0
 def fit(self, corpus):
     self.model = Chain(corpus, self.state_size)
     return self