def from_json(cls, json_thing): """ Given a JSON string or object, deserialize Chain. To load from file, use `POSMarkovModel.from_file` """ if isinstance(json_thing,str): obj = json.loads(json_thing) else: obj = json_thing # We cannot simply call the Chain classmethod because of the extra # attribute we added if isinstance(obj, list): rehydrated = dict((tuple(item[0]), item[1]) for item in obj) elif isinstance(obj, dict): rehydrated = obj else: raise ValueError('Object should be dict or list') # Pop bind_char for instantiating model pos_bind_char = rehydrated.pop(('_POSMarkovModel_pos_bind_char',)) # allow markovify to reyhdrate its trash-ass Chain model = Chain.from_json(rehydrated) state_size = model.state_size return cls(state_size, pos_bind_char, model)
def main(input_file, output_file): """Makes things happen.""" corpus = [] lines_processed = 0 for line in input_file: lines_processed += 1 try: corpus.append( rusyll.word_to_syllables_safe_wd(line.strip().lower())) except AssertionError: pass chain = Chain(corpus, 1) output_file.write(chain.to_json()) print("{} lines processed. {} words added.".format(lines_processed, len(corpus)))
def make_model(model_file, state_size, key): init_settings() chorales = bach_chorales() chorales = filter_by_time_signature(chorales) if key: chorales = filter_by_key(chorales, key) training = [] for chorale in chorales: sop = notes_and_durations(chorale, Voice.Soprano) alt = notes_and_durations(chorale, Voice.Alto) ten = notes_and_durations(chorale, Voice.Tenor) bas = notes_and_durations(chorale, Voice.Bass) states = [str(state) for state in gen_states(sop, alt, ten, bas)] training.append(states) chain = Chain(training, state_size=state_size) with open(model_file, 'w') as f: f.write(chain.to_json())
def make_music(model_file): with open(model_file) as f: model = json.load(f) chain = Chain.from_json(model) score = stream.Score() soprano_part = stream.Part() soprano_part.insert(0, instrument.Soprano()) alto_part = stream.Part() alto_part.insert(0, instrument.Alto()) tenor_part = stream.Part() tenor_part.insert(0, instrument.Tenor()) bass_part = stream.Part() bass_part.insert(0, instrument.Bass()) counter = { Voice.Soprano: Decimal(0.), Voice.Alto: Decimal(0.), Voice.Tenor: Decimal(0.), Voice.Bass: Decimal(0.), } current_state = { Voice.Soprano: None, Voice.Alto: None, Voice.Tenor: None, Voice.Bass: None, } parts = { Voice.Soprano: soprano_part, Voice.Alto: alto_part, Voice.Tenor: tenor_part, Voice.Bass: bass_part, } for state in chain.walk(): S, A, T, B = make_tuple(state) current_state[Voice.Soprano] = S current_state[Voice.Alto] = A current_state[Voice.Tenor] = T current_state[Voice.Bass] = B min_value = min(counter.values()) min_voices = [k for k in counter if counter[k] == min_value] for voice in min_voices: pitch, d = current_state[voice] if pitch == 'rest': n = note.Rest(duration=duration.Duration(d)) else: n = note.Note(pitch, duration=duration.Duration(d)) parts[voice].append(n) counter[voice] += Decimal(d) for k, v in parts.items(): score.insert(Voice.order(k), v) score.show()
def test_main(tmpdir): path = tmpdir.mkdir("sub") input_file = path.join("test_dict_1.txt") output_file = path.join("test_json_1.json") input_file.write("Аба\n" + "аБаба \n" + "аб-ба\n" + "zвеве\n" + "ве ве\n" + "ве.\n") corpus = [ ["а", "ба"], ["а", "ба", "ба"], ["аб", "-", "ба"], ] chain = Chain(corpus, 1) runner = CliRunner() result = runner.invoke(corpus_parser.main, [str(input_file), str(output_file)]) assert "processed" in result.output assert result.exit_code == 0 with open(str(output_file), "r") as test_json_1: assert test_json_1.read() == chain.to_json()
class MarkovChain(object): def __init__(self, state_size=3, pos_bind_char='::'): self.state_size = state_size self.pos_bind_char = pos_bind_char def train_model(self, corpus): self.model = Chain(corpus, self.state_size) return self def update(self, corpus, contribution=1): new_model = Chain(corpus, self.state_size) self.model = combine([self.model, new_model], [1, contribution]) return self def make_response(self, init_state=None, n_sentences=100): sentences = [] while (len(sentences) < n_sentences): sentence_i = self.model.walk(init_state=init_state) if sentence_i is None: continue else: sentence_i = self._prune_pos_tags(sentence_i, self.pos_bind_char) sentences.append(sentence_i) return sentences def _prune_pos_tags(self, sentence, pos_bind_char): untagged_sentence = list() for i, token in enumerate(sentence): if self.pos_bind_char in token: token = token.split(self.pos_bind_char)[0] if i == 0 or token in string.punctuation: untagged_sentence.append(token) else: token = ' ' + token untagged_sentence.append(token) return ''.join(untagged_sentence)
def update(self, corpus, contribution=1): new_model = Chain(corpus, self.state_size) self.model = combine([self.model, new_model], [1, contribution]) return self
def train_model(self, corpus): self.model = Chain(corpus, self.state_size) return self
def create_markov_chain(self): # Set the markov chain self.markov_chain = Chain(self.tokens, state_size=2)
class POSMarkovModel(object): def __init__(self, state_size=3, pos_bind_char='::', model=None): self.state_size = state_size self.pos_bind_char = pos_bind_char # Allow for model to already have been fit self.model = model def fit(self, corpus): self.model = Chain(corpus, self.state_size) return self def update(self, corpus, contribution=1): # train a new model and merge it with old new_model = Chain(corpus, self.state_size) self.model = combine([self.model, new_model], [1, contribution]) return self def make_response(self, init_state=None, n_sentences=1): sentences = list() for i in range(0, n_sentences): words = self.model.walk(init_state) sentence_i = self._prune_pos_tags(words, self.pos_bind_char) sentences.append(sentence_i) if len(sentences) > 1: response = ' '.join(sentences) else: response = sentences[0] return response def _prune_pos_tags(self, sentence, pos_bind_char): untagged_sentence = list() for i, token in enumerate(sentence): if self.pos_bind_char in token: token = token.split(self.pos_bind_char)[0] if i == 0 or token in string.punctuation: untagged_sentence.append(token) else: token = ' ' + token untagged_sentence.append(token) return ''.join(untagged_sentence) def to_json(self,filename=None): """ Dump underlying model as json. If no filename, return str. """ if self.model is None: raise RuntimeError('Cannot dump to json before fitting model') # self.model is Chain object, self.model.model is underlying dict modeldict = self.model.model # add pos_bind_char attribute modeldict[('_POSMarkovModel_pos_bind_char',)] = self.pos_bind_char if not filename: return json.dumps(list(modeldict.items())) else: with open(filename,'w') as f: json.dump(list(modeldict.items()),f) return @classmethod def from_json(cls, json_thing): """ Given a JSON string or object, deserialize Chain. To load from file, use `POSMarkovModel.from_file` """ if isinstance(json_thing,str): obj = json.loads(json_thing) else: obj = json_thing # We cannot simply call the Chain classmethod because of the extra # attribute we added if isinstance(obj, list): rehydrated = dict((tuple(item[0]), item[1]) for item in obj) elif isinstance(obj, dict): rehydrated = obj else: raise ValueError('Object should be dict or list') # Pop bind_char for instantiating model pos_bind_char = rehydrated.pop(('_POSMarkovModel_pos_bind_char',)) # allow markovify to reyhdrate its trash-ass Chain model = Chain.from_json(rehydrated) state_size = model.state_size return cls(state_size, pos_bind_char, model) @classmethod def from_file(cls, jsonfile): """ Load model from JSON file """ with open(jsonfile, 'r') as f: json_thing = json.load(f) return POSMarkovModel.from_json(json_thing)
def fit(self, corpus): self.model = Chain(corpus, self.state_size) return self