Пример #1
0
def run(model, filename=None, train_filename=None, predict_filename=None, line_function=None, train_line_function=None, predict_line_function=None, evaluate_function=None, split=0.8, header=True):
    if train_line_function is None and line_function is not None:
        train_line_function = line_function
    if predict_line_function is None and line_function is not None:
        predict_line_function = line_function
    if train_filename is None and filename is not None:
        train_filename = filename
    if predict_filename is None and filename is not None:
        predict_filename = filename
    num_cores = len(model) if isinstance(model, collections.Sequence) else 1
    if num_cores > 1:
        os.system("spanning_tree")
        if header:
            num_lines = sum(1 for line in open(train_filename))
            os.system('tail -n {} {} > {}'.format(num_lines - 1, train_filename, train_filename + '_'))
            if predict_filename != train_filename:
                num_lines = sum(1 for line in open(predict_filename))
                os.system('tail -n {} {} > {}'.format(num_lines - 1, predict_filename, predict_filename + '_'))
            train_filename = train_filename + '_'
            predict_filename = predict_filename + '_'
            header = False
        split_file(train_filename, num_cores)
        if predict_filename != train_filename:
            split_file(predict_filename, num_cores)
        pool = Pool(num_cores)
        train_filenames = [train_filename + (str(n) if n >= 10 else '0' + str(n)) for n in range(num_cores)]
        predict_filenames = [predict_filename + (str(n) if n >= 10 else '0' + str(n)) for n in range(num_cores)]
        args = []
        for i in range(num_cores):
            args.append({'model': model[i],
                         'train_filename': train_filenames[i],
                         'predict_filename': predict_filenames[i],
                         'train_line_function': train_line_function,
                         'predict_line_function': predict_line_function,
                         'evaluate_function': evaluate_function,
                         'split': split,
                         'quiet': model[i].params.get('quiet'),
                         'multicore': True,
                         'header': header})
        results = sum(pool.map(run_model, args), [])
        if evaluate_function:
            print(evaluate_function(results))
        for f in train_filenames + predict_filenames:
            safe_remove(f)
        os.system('killall spanning_tree')
        return results
    else:
        return run_(model,
                    train_filename=train_filename,
                    predict_filename=predict_filename,
                    train_line_function=train_line_function,
                    predict_line_function=predict_line_function,
                    evaluate_function=evaluate_function,
                    split=split,
                    quiet=model.params.get('quiet'),
                    multicore=False,
                    header=header)
Пример #2
0
def read_bytes_as_segments(segment_byte_data: bytes) -> Segment:
    byte_file = BytesIO(segment_byte_data)
    segment_bytes = split_file(byte_file, 12)
    counted_segment_bytes = enumerate(segment_bytes)
    casted_counted_segment_bytes = (get_segment(
        i, current_byte) for i, current_byte in counted_segment_bytes)
    yield from casted_counted_segment_bytes
Пример #3
0
def index_worker(index, ipath):
    """Indexing.

    Parameters
    ----------
    index : str
        Name of index.
    ipath : str
        Path to raw json files.

    """

    opath = os.path.join('tmp.json')
    for chunk in split_file(ipath, 10000):
        with open(opath, 'w') as ofp:
            for doc in chunk:
                if 'title' not in doc or 'abstract' not in doc:
                    global drop_count
                    drop_count += 1
                    continue
                doc['title'] = doc['title'].lower().strip()
                doc['abstract'] = doc['abstract'].lower()
                doc['keywords'] = [e.lower().strip()
                                   for e in doc.get('keywords', [])],
                doc['fos'] = [e.lower().strip() for e in doc.get('fos', [])],
                json.dump({'index': {'_index': index}}, ofp)
                ofp.write('\n')
                json.dump(doc, ofp)
                ofp.write('\n')
        bulk_insert(index, opath)
    refresh(args.index)
    os.remove('tmp.json')
Пример #4
0
def get_file_index_info(header_data):
    with open("doom1.wad", 'rb') as wadfile:
        wadfile.seek(header_data.info_table_offset)
        file_chunks = split_file(wadfile, 16)
        enumerated_file_parts = enumerate(file_chunks)
        yield from ((position, get_index_entry(file_chunk))
                    for position, file_chunk in enumerated_file_parts)
Пример #5
0
 def create_rhymescheme(self):
     """
     Creates rhyme scheme list for lyrics and writes it to a file
     """
     lyrics = split_file(self.lyrics_filename)
     schemes = []
     for i, line in enumerate(lyrics):
         schemes.append(self.line_rhymescheme(line))
     with open(self.scheme_filename, "w", encoding='utf-8') as f:
         f.write('\n'.join(schemes) + '\n')
def evaluate(ident, syllable_rhyme=True, generated=False):
    if generated:
        gen = '_generated'
    else:
        gen = ''
    if syllable_rhyme:
        syl = '.syl'
    else:
        syl = ''
    endings = split_file(f'schemes/{ident}{gen}{syl}.schemes')
    return rhyme_score(endings)
Пример #7
0
    def run(self) -> List[subprocess.CompletedProcess]:
        # Record file information to the database:
        self.record_file()

        # Split file into smaller files for parallel transfer:
        split_files = split_file(file=self.file,
                                 file_split_size=self.file_split_size,
                                 file_split_chunk=self.cores)

        # Spawn a pool of workers to process transfer:
        with Pool(self.cores) as pool:
            results = pool.map(self.run_sub_experiment, split_files)

        return results
def average_syllables(target_list):
    """
    Counts average number of syllables in lyrics for a specified artists and/or genres.

    :param target_list: list of identifiers (artist and/or genres), str
    :return: dictionary {identifier: average number of syllables}, dict
    """
    syllables = {}
    for ident in target_list:
        original_bars = split_file(f'data/{ident}.txt')
        count = 0
        excluded = 0
        for line in original_bars:
            syls = count_syllables(line)
            if syls > 3:
                count += syls
            else:
                excluded += 1
        syllables[ident] = count / (len(original_bars) - excluded)
    return syllables
Пример #9
0
 def __init__(self, identifier, params, syllable_rhyme=False):
     """
     Initialization of LyricsGenerator class providing methods for generation of lyrics based on Markov chains and
     RNN: it generates markov sequences based on word1->word2 transition probabilities (markovify library) and
     uses an LSTM network to pick the most suitable sequence (LyricsNN class).
     :param identifier: current artist or genre, str
     :param params: contains depth, max_syllables, max_overlap, num_lines parameters, dict
     :param syllable_rhyme: True if we use morpheme based rhyme, False if we use 2-last-letters based rhyme, bool
     """
     self.params = params
     self.identifier = identifier
     if syllable_rhyme:
         self.rhymer = RhymerSyl(identifier)
         self.path_modifier = '_syl'
     else:
         self.rhymer = RhymerEnd(identifier)
         self.path_modifier = ''
     self.training_file = f"data/{identifier}.txt"
     self.lyrics_model = LyricsNN(self.params['depth'], identifier)
     self.markov_model = create_markov_model(self.training_file)
     self.original_bars = split_file(self.training_file)
Пример #10
0
def main():
    # if len(sys.argv) != 5:
    #     logging.info('please input args: car_path, road_path, cross_path, answerPath')
    #     exit(1)

    # car_path = sys.argv[1]
    # road_path = sys.argv[2]
    # cross_path = sys.argv[3]
    # answer_path = sys.argv[4]
    #car_path = '/Users/ch_cmpter/Desktop/car_test.txt'
    car_path = '../config_5/car.txt'
    road_path = '../config_5/road.txt'
    cross_path = '../config_5/cross.txt'
    answer_path = '../config_5/answer.txt'
    # logging.info("car_path is %s" % (car_path))
    # logging.info("road_path is %s" % (road_path))
    # logging.info("cross_path is %s" % (cross_path))
    # logging.info("answer_path is %s" % (answer_path))

    car_paths = split_file(car_path, 8000)
    base_answer_dir = split(answer_path)[0]
    answer_paths = [
        join(base_answer_dir, 'answer_{}.txt'.format(str(ix)))
        for ix, _ in enumerate(car_paths)
    ]

    pool = multiprocessing.Pool()
    results = []
    for ix, path in zip(car_paths, answer_paths):
        result = pool.apply_async(driver,
                                  args=(road_path, path[0], cross_path,
                                        path[1]))
        results.append(result)

    pool.close()
    pool.join()
    for r in results:
        print(r.get())
Пример #11
0
 def generating_phase(self):
     """
     Generation phase: consecutive creation and filtering of markov sequences and vectors and
     converting these vectors into lyrics.
     :return: generated lyrics, str
     """
     markov_bars = self.generate_lyrics()
     if os.path.exists(self.rhymer.rhyme_filename):
         rhyme_list = split_file(self.rhymer.rhyme_filename)
     else:
         print("Rhyme list was not created, please train the model first.")
         return
     vectors = self.create_vectors(rhyme_list)
     lyrics = self.vectors_into_lyrics(vectors, markov_bars, rhyme_list)
     f = open(
         f"generated_lyrics/{self.identifier}{self.path_modifier}_generated.txt",
         "w",
         encoding='utf-8')
     lyrics_str = ''
     for bar in lyrics:
         f.write(bar)
         f.write("\n")
         lyrics_str += bar + '\n'
     return lyrics_str
Пример #12
0
def read_bytes_as_lines(line_byte_data: bytes) -> Line:
    byte_file = BytesIO(line_byte_data)
    line_bytes = split_file(byte_file, 14)
    counted_line_bytes = enumerate(line_bytes)
    casted_counted_line_bytes = (get_line(i, current_byte) for i, current_byte in counted_line_bytes)
    yield from casted_counted_line_bytes