Пример #1
0
 def lines(self, human=False, abbr=False):
     if len(self.corpus_files) > 0:
         return utils.format_number(
             self.corpus_files[0].file.lines,
             abbr=abbr) if human else self.corpus_files[0].file.lines
     else:
         return 0
Пример #2
0
def train_console(id):
    engine = Engine.query.filter_by(id=id).first()
    config_file_path = os.path.join(
        os.path.realpath(
            os.path.join(app.config['PRELOADED_ENGINES_FOLDER'], engine.path)),
        'config.yaml')
    config = None

    try:
        with open(config_file_path, 'r') as config_file:
            config = yaml.load(config_file, Loader=yaml.FullLoader)
    except:
        pass

    launched = datetime.datetime.timestamp(engine.launched)
    finished = datetime.datetime.timestamp(
        engine.finished) if engine.finished else None

    corpora_raw = Corpus_Engine.query.filter_by(engine_id=engine.id,
                                                is_info=True).all()

    corpora = {}
    for corpus_entry in corpora_raw:
        if corpus_entry.phase in corpora:
            corpora[corpus_entry.phase].append(
                (corpus_entry.corpus,
                 utils.format_number(corpus_entry.selected_size, abbr=True)))
        else:
            corpora[corpus_entry.phase] = [
                (corpus_entry.corpus,
                 utils.format_number(corpus_entry.selected_size, abbr=True))
            ]

    return render_template("train_console.html.jinja2",
                           page_name="train",
                           engine=engine,
                           config=config,
                           launched=launched,
                           finished=finished,
                           elapsed=engine.runtime,
                           corpora=corpora,
                           elapsed_format=utils.seconds_to_timestring(
                               engine.runtime) if engine.runtime else None)
Пример #3
0
 def chars(self, human=False, abbr=False):
     char_count = 0
     for file_entry in self.corpus_files:
         char_count += file_entry.file.chars
     return utils.format_number(char_count,
                                abbr=abbr) if human else char_count
Пример #4
0
 def words(self, human=False, abbr=False):
     word_count = 0
     for file_entry in self.corpus_files:
         word_count += file_entry.file.words
     return utils.format_number(word_count,
                                abbr=abbr) if human else word_count
Пример #5
0
def library_corpora_feed():
    public = request.form.get('public') == "true"

    if public:
        library_objects = user_utils.get_user_corpora(public=True).all()
    else:
        library_objects = user_utils.get_user_corpora().all()

    user_library = [lc.corpus for lc in library_objects]

    # We are not using the datatables helper since this is an specific case
    # and we need more control to group corpora

    draw = int(request.form.get('draw'))
    search = request.form.get('search[value]')
    start = int(request.form.get('start'))
    length = int(request.form.get('length'))
    order = int(request.form.get('order[0][column]'))
    dir = request.form.get('order[0][dir]')

    corpus_rows = []
    for corpus in user_library:
        corpus_rows.append([
            corpus.id, corpus.name,
            corpus.source.name + (corpus.target.name if corpus.target else ""),
            corpus.lines(),
            corpus.words(),
            corpus.chars(),
            corpus.uploaded()
        ])

    recordsTotal = len(corpus_rows)
    recordsFiltered = 0

    if order:
        corpus_rows.sort(key=lambda c: c[order], reverse=(dir == 'asc'))

    if start is not None and length is not None:
        corpus_rows = corpus_rows[start:(start + length)]

    corpus_data = []
    for row in corpus_rows:
        corpus = Corpus.query.filter_by(id=row[0]).first()

        file_entries = corpus.corpus_files
        file_entries.sort(key=lambda f: f.role)

        file_data = []
        for file_entry in file_entries:
            file = file_entry.file

            uploaded_date = datetime.fromtimestamp(
                datetime.timestamp(file.uploaded)).strftime("%d/%m/%Y")
            file_data.append([
                file.id, file.name, file.language.name,
                utils.format_number(file.lines),
                utils.format_number(file.words),
                corpus.topic.name if corpus.topic else "", uploaded_date, {
                    "corpus_owner":
                    file.uploader.id == user_utils.get_uid()
                    if file.uploader else False,
                    "corpus_uploader":
                    file.uploader.username if file.uploader else "MutNMT",
                    "corpus_id":
                    corpus.id,
                    "corpus_name":
                    corpus.name,
                    "corpus_description":
                    corpus.description,
                    "corpus_source":
                    corpus.source.name,
                    "corpus_target":
                    corpus.target.name if corpus.target else "",
                    "corpus_public":
                    corpus.public,
                    "corpus_size":
                    corpus.corpus_files[0].file.lines,
                    "corpus_preview":
                    url_for('library.corpora_preview', id=corpus.id),
                    "corpus_share":
                    url_for('library.library_share_toggle',
                            type='library_corpora',
                            id=corpus.id),
                    "corpus_delete":
                    url_for('library.library_delete',
                            id=corpus.id,
                            type='library_corpora'),
                    "corpus_grab":
                    url_for('library.library_grab',
                            id=corpus.id,
                            type='library_corpora'),
                    "corpus_ungrab":
                    url_for('library.library_ungrab',
                            id=corpus.id,
                            type='library_corpora'),
                    "corpus_export":
                    url_for('library.library_export',
                            id=corpus.id,
                            type="library_corpora"),
                    "file_preview":
                    url_for('data.data_preview', file_id=file.id)
                }
            ])

        if search:
            found = False
            for col in row + file_data:
                found = found or (search.lower() in str(col).lower())

            if found:
                corpus_data = corpus_data + file_data
                recordsFiltered += 1
        else:
            corpus_data = corpus_data + file_data

    return jsonify({
        "draw": draw + 1,
        "recordsTotal": recordsTotal,
        "recordsFiltered": recordsFiltered if search else recordsTotal,
        "data": corpus_data
    })