Exemplo n.º 1
0
def models_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]
    return render_template('%s/models.html' % lang, other_lang=other_lang, languages=languages, url=url)
Exemplo n.º 2
0
def raw_finder(lang, model, userquery):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    model = model.strip()
    if not model.strip() in our_models:
        return redirect(url + lang + '/', code=303)
    if userquery.strip().replace('_', '').replace('-', '').replace('::', '').isalnum():
        query = process_query(userquery.strip())
        if tags:
            if len(query.split('_')) < 2:
                error_value = "Incorrect tag!"
                return render_template('wordpage.html', error=error_value, other_lang=other_lang,
                                       languages=languages, url=url)
            pos_tag = query.split('_')[-1]
        else:
            pos_tag = 'ALL'
        images = {query.split('_')[0]: None}
        image = None
        if model_props[model]['tags'] == 'False':
            query = query.split('_')[0]
            pos_tag = 'ALL'
        message = {'operation': '1', 'query': query, 'pos': pos_tag, 'model': model}
        result = json.loads(serverquery(message).decode('utf-8'))
        if query + " is unknown to the model" in result or "No results" in result:
            return render_template('wordpage.html', error=list(result)[0], other_lang=other_lang,
                                   languages=languages, url=url, word=query, models=our_models, model=model)
        else:
            inferred = set()
            if 'inferred' in result:
                inferred.add(model)
            vector = result['vector']
            for word in result['neighbors']:
                images[word[0].split('_')[0]] = None
            m = hashlib.md5()
            name = query.encode('ascii', 'backslashreplace')
            m.update(name)
            fname = m.hexdigest()
            plotfile = root + 'data/images/singleplots/' + model + '_' + fname + '.png'
            if not os.access(plotfile, os.F_OK):
                singularplot(query, model, vector, fname)
            if dbpedia:
                try:
                    images = get_images(images)
                    image = images[query.split('_')[0]]
                except:
                    pass
            return render_template('wordpage.html', list_value=result['neighbors'], word=query, model=model,
                                   pos=pos_tag,
                                   vector=vector, image=image, wordimages=images, vectorvis=fname, tags=tags,
                                   other_lang=other_lang, languages=languages, url=url, search=defaultsearchengine,
                                   models=our_models, inferred=inferred, frequencies=result['frequencies'])
    else:
        error_value = 'Incorrect query!'
        return render_template("wordpage.html", error=error_value, tags=tags, other_lang=other_lang,
                               languages=languages, url=url)
Exemplo n.º 3
0
def home(lang):
    # pass all required variables to template
    # repeated within each @wvectors.route function
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        list_data = 'dummy'
        try:
            list_data = request.form['list_query']
        except:
            pass
        if list_data != 'dummy' and \
                list_data.replace('_', '').replace('-', '').replace('::', '').replace(' ',
                                                                                      '').isalnum():
            query = process_query(list_data)
            if query == "Incorrect tag!":
                error_value = "Incorrect tag!"
                return render_template('home.html', error=error_value, other_lang=other_lang,
                                       languages=languages, url=url)
            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model = defaultmodel
            else:
                model = model_value[0]
            images = {query.split('_')[0]: None}
            if model_props[model]['tags'] == 'False':
                query = query.split('_')[0]
            message = {'operation': '1', 'query': query, 'pos': 'ALL', 'model': model}
            result = json.loads(serverquery(message).decode('utf-8'))
            if query + " is unknown to the model" in result:
                return render_template('home.html', error=query + " is unknown to the model",
                                       other_lang=other_lang, languages=languages,
                                       url=url, word=query)
            else:
                inferred = set()
                for word in result['neighbors']:
                    images[word[0].split('_')[0]] = None
                if dbpedia:
                    try:
                        images = get_images(images)
                    except:
                        pass
                if 'inferred' in result:
                    inferred.add(model)
                return render_template('home.html', list_value=result['neighbors'], word=query,
                                       wordimages=images, models=our_models, model=model, tags=tags,
                                       other_lang=other_lang, languages=languages, url=url,
                                       inferred=inferred, frequencies=result['frequencies'])
        else:
            error_value = "Incorrect query!"
            return render_template("home.html", error=error_value, tags=tags, other_lang=other_lang,
                                   languages=languages, url=url)
    return render_template(
        'home.html', tags=tags, other_lang=other_lang, languages=languages, url=url)
Exemplo n.º 4
0
Arquivo: nlp.py Projeto: rusnlp/rusnlp
def topical_page(lang):
    # pass all required variables to template
    # repeated within each @nlpsearch.route function
    g.lang = lang
    s = {lang}
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    return render_template("/topical.html",
                           url=url,
                           other_lang=other_lang,
                           languages=languages)
Exemplo n.º 5
0
Arquivo: nlp.py Projeto: rusnlp/rusnlp
def about_page(lang):
    # pass all required variables to template
    # repeated within each @nlpsearch.route function
    g.lang = lang
    s = {lang}
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    query = {"dummy": "dummy"}
    message = [4, query, 10]
    stats = json.loads(serverquery(message))["neighbors"]
    return render_template("/about.html",
                           url=url,
                           other_lang=other_lang,
                           languages=languages,
                           stats=stats)
Exemplo n.º 6
0
def pairwise_page(lang):
    global our_models
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]
    if request.method == 'POST':
        pos = request.form.getlist('pos')[0]
        models_row = {}
        inferred = set()
        frequencies = {}
        models = request.form.getlist("models")
        if len(models) != 2:
            error_value = 'Select 2 years!'
            return render_template("pairwise.html", error=error_value, models=our_models,
                                   tags=tags, url=url, usermodels=[defaultmodel],  userpos=pos,
                                   tags2show=exposed_tags, other_lang=other_lang,
                                   languages=languages, checked_model1=list(our_models.keys())[-8],
                                   checked_model2=list(our_models.keys())[-7])
        model1, model2 = models

        message = {"operation": "5", "model1": model1, "model2": model2,
                   "n": 100, "pos": pos}
        result = json.loads(serverquery(message).decode('utf-8'))
        frequencies[model1] = result['frequencies']
        models_row[model1] = result['neighbors']
        if 'inferred' in result:
            inferred.add(model1)

        return render_template('pairwise.html', list_value=models_row, pos=pos, userpos=pos,
                               models=our_models, tags=tags, other_lang=other_lang, url=url,
                               tags2show=exposed_tags, languages=languages,
                               inferred=inferred, frequencies=frequencies, visible_neighbors=10,
                               checked_model1=model1, checked_model2=model2)

    return render_template('pairwise.html', models=our_models, tags=tags, other_lang=other_lang,
                           languages=languages, url=url, usermodels=[defaultmodel],
                           tags2show=exposed_tags, userpos=['NOUN'],
                           checked_model1=list(our_models.keys())[-8],
                           checked_model2=list(our_models.keys())[-7])
Exemplo n.º 7
0
def home(lang):
    # pass all required variables to template
    # repeated within each @wvectors.route function
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        list_data = 'dummy'
        try:
            list_data = request.form['list_query']
        except:
            pass
        if list_data != 'dummy' and list_data.replace('_', '').replace(
                '-', '').replace('::', '').isalnum():
            query = process_query(list_data)
            if query == "Incorrect tag!":
                return render_template('home.html',
                                       error=query,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url)
            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model = defaultmodel
            else:
                model = model_value[0]
            images = {query.split('_')[0]: None}
            message = "1;" + query + ";" + 'ALL' + ";" + model
            result = serverquery(message)
            associates_list = []
            if "unknown to the" in result or "No result" in result:
                return render_template('home.html',
                                       error=result.decode('utf-8'),
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url)
            else:
                output = result.split('&&&')
                associates = output[0]
                for word in associates.split():
                    w = word.split("#")
                    associates_list.append(
                        (w[0].decode('utf-8'), float(w[-1])))
                    images[w[0].split('_')[0].decode('utf-8')] = None
                try:
                    images = get_images(images)
                except:
                    pass
                return render_template('home.html',
                                       list_value=associates_list,
                                       word=query,
                                       wordimages=images,
                                       model=model,
                                       tags=tags,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url)
        else:
            error_value = u"Incorrect query!"
            return render_template("home.html",
                                   error=error_value,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url)
    return render_template('home.html',
                           tags=tags,
                           other_lang=other_lang,
                           languages=languages,
                           url=url)
Exemplo n.º 8
0
def visual_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        list_data = 'dummy'
        try:
            list_data = request.form['list_query']
        except:
            pass
        if list_data != 'dummy':

            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model_value = [defaultmodel]

            querywords = set([
                process_query(w) for w in list_data.split()
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace(
                    '::', '').isalnum()
            ][:30])
            if len(querywords) < 7:
                error_value = "Too few words!"
                return render_template("visual.html",
                                       error=error_value,
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)

            if "Incorrect tag!" in querywords:
                return render_template('visual.html',
                                       word=list_data,
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)

            unknown = {}
            models_row = {}
            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('home.html',
                                           other_lang=other_lang,
                                           languages=languages,
                                           url=url,
                                           usermodels=model_value)
                unknown[model] = set()
                words2vis = querywords
                m = hashlib.md5()
                name = '_'.join(words2vis).encode('ascii', 'backslashreplace')
                m.update(name)
                fname = m.hexdigest()
                plotfile = "%s_%s.png" % (model, fname)
                models_row[model] = plotfile
                labels = []
                if not os.path.exists(root + 'data/images/tsneplots'):
                    os.makedirs(root + 'data/images/tsneplots')
                if not os.access(root + 'data/images/tsneplots/' + plotfile,
                                 os.F_OK):
                    print >> sys.stderr, 'No previous image found'
                    vectors = []
                    for w in words2vis:
                        message = "4;" + w + ";" + model
                        result = serverquery(message)
                        if 'is unknown' in result:
                            unknown[model].add(w)
                            continue
                        vector = np.array(result.split(','))
                        vectors.append(vector)
                        labels.append(w)
                    if len(vectors) > 1:
                        matrix2vis = np.vstack(([v for v in vectors]))
                        embed(labels, matrix2vis.astype('float64'), model)
                        m = hashlib.md5()
                        name = '_'.join(labels).encode('ascii',
                                                       'backslashreplace')
                        m.update(name)
                        fname = m.hexdigest()
                        plotfile = "%s_%s.png" % (model, fname)
                        models_row[model] = plotfile
                    else:
                        models_row[model] = "Too few words!"

            return render_template('visual.html',
                                   visual=models_row,
                                   words=querywords,
                                   number=len(model_value),
                                   models=our_models,
                                   unknown=unknown,
                                   url=url,
                                   usermodels=model_value)
        else:
            error_value = "Incorrect query!"
            return render_template("visual.html",
                                   error=error_value,
                                   models=our_models,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url,
                                   usermodels=[defaultmodel])
    return render_template('visual.html',
                           models=our_models,
                           other_lang=other_lang,
                           languages=languages,
                           url=url,
                           usermodels=[defaultmodel])
Exemplo n.º 9
0
def finder(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        positive_data = ''
        positive2_data = ''
        negative_data = ''
        positive1_data = ''
        negative1_data = ''
        try:
            positive_data = request.form['positive']
            positive2_data = request.form['positive2']
            negative_data = request.form['negative']
        except:
            pass
        try:
            positive1_data = request.form['positive1']
            negative1_data = request.form['negative1']
        except:
            pass
        # Analogical inference
        if negative_data != '' and positive_data != '' and positive2_data != '':
            positive_data_list = [positive_data, positive2_data]
            negative_list = []
            if len(negative_data.strip()) > 1:
                if negative_data.strip().replace('_', '').replace('-', '').replace('::', '').replace(' ', '').isalnum():
                    negative_list = [process_query(negative_data)]

            positive_list = []
            for w in positive_data_list:
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace('::', '').replace(' ', '').isalnum():
                    positive_list.append(process_query(w))

            calcmodel_value = request.form.getlist('calcmodel')
            if len(calcmodel_value) < 1:
                calcmodel_value = [defaultmodel]

            if len(positive_list) < 2 or len(negative_list) == 0:
                error_value = "Incorrect query!"
                return render_template("calculator.html", error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=calcmodel_value, tags2show=exposed_tags)
            if "Incorrect tag!" in negative_list or "Incorrect tag!" in positive_list:
                error_value = "Incorrect tag!"
                return render_template('calculator.html', error=error_value, models=our_models, tags2show=exposed_tags,
                                       other_lang=other_lang, languages=languages, url=url, usermodels=calcmodel_value)
            userpos = []
            if tags:
                calcpos_value = request.form.getlist('pos')
                if len(calcpos_value) < 1:
                    pos = defaulttag
                else:
                    pos = calcpos_value[0]
                if pos != 'ALL':
                    userpos.append(pos)
            else:
                pos = 'ALL'

            models_row = {}
            images = {}
            frequencies = {}
            for model in calcmodel_value:
                if not model.strip() in our_models:
                    return render_template('home.html', other_lang=other_lang, languages=languages,
                                           models=our_models, url=url, usermodels=calcmodel_value)
                if model_props[model]['tags'] == 'False':
                    message = {'operation': '3', 'query': [[w.split('_')[0] for w in positive_list],
                                                           [w.split('_')[0] for w in negative_list]], 'pos': 'ALL',
                               'model': model}
                else:
                    message = {'operation': '3', 'query': [positive_list, negative_list], 'pos': pos, 'model': model}
                result = json.loads(serverquery(message).decode('utf-8'))
                frequencies[model] = result['frequencies']
                if 'No results' in result:
                    models_row[model] = ["No similar words with this tag."]
                    continue
                if "Unknown to the model" in result:
                    models_row[model] = [result["Unknown to the model"] + 'is unknown to the model']
                    continue
                for word in result['neighbors']:
                    images[word[0].split('_')[0]] = None
                models_row[model] = result['neighbors']
                if dbpedia:
                    try:
                        images = get_images(images)
                    except:
                        pass
            return render_template('calculator.html', analogy_value=models_row, pos=pos, plist=positive_list,
                                   userpos=userpos, nlist=negative_list, wordimages=images, models=our_models,
                                   tags=tags, tags2show=exposed_tags, other_lang=other_lang, languages=languages,
                                   url=url, usermodels=calcmodel_value, frequencies=frequencies)

        # Calculator
        if positive1_data != '':
            negative_list = [process_query(w) for w in negative1_data.split() if
                             len(w) > 1 and w.replace('_', '').replace('-', '').replace('::', '').isalnum()][:10]
            positive_list = [process_query(w) for w in positive1_data.split() if
                             len(w) > 1 and w.replace('_', '').replace('-', '').replace('::', '').isalnum()][:10]

            calcmodel_value = request.form.getlist('calcmodel')
            if len(calcmodel_value) < 1:
                calcmodel_value = [defaultmodel]

            if len(positive_list) == 0:
                error_value = "Incorrect query!"
                return render_template("calculator.html", calc_error=error_value, other_lang=other_lang,
                                       tags2show=exposed_tags,
                                       languages=languages, models=our_models, url=url, usermodels=calcmodel_value)
            if "Incorrect tag!" in negative_list or "Incorrect tag!" in positive_list:
                error_value = "Incorrect tag!"
                return render_template('calculator.html', calc_error=error_value, other_lang=other_lang,
                                       tags2show=exposed_tags,
                                       languages=languages, models=our_models, url=url, usermodels=calcmodel_value)
            userpos = []
            if tags:
                calcpos_value = request.form.getlist('calcpos')
                if len(calcpos_value) < 1:
                    pos = defaulttag
                else:
                    pos = calcpos_value[0]
                if pos != 'ALL':
                    userpos.append(pos)
            else:
                pos = 'ALL'

            models_row = {}
            images = {}
            frequencies = {}
            for model in calcmodel_value:
                if not model.strip() in our_models:
                    return render_template('home.html', other_lang=other_lang, languages=languages,
                                           models=our_models, url=url, usermodels=calcmodel_value)
                if model_props[model]['tags'] == 'False':
                    message = {'operation': '3', 'query': [[w.split('_')[0] for w in positive_list],
                                                           [w.split('_')[0] for w in negative_list]], 'pos': 'ALL',
                               'model': model}
                else:
                    message = {'operation': '3', 'query': [positive_list, negative_list], 'pos': pos, 'model': model}
                result = json.loads(serverquery(message).decode('utf-8'))
                frequencies[model] = result['frequencies']
                if "No results" in result:
                    models_row[model] = ["No similar words with this tag."]
                    continue
                if "Unknown to the model" in result:
                    models_row[model] = [result["Unknown to the model"] + 'is unknown to the model']
                    continue
                for word in result['neighbors']:
                    images[word[0].split('_')[0]] = None
                models_row[model] = result['neighbors']
                if dbpedia:
                    try:
                        images = get_images(images)
                    except:
                        pass
            return render_template('calculator.html', calc_value=models_row, pos=pos, plist2=positive_list,
                                   tags2show=exposed_tags,
                                   nlist2=negative_list, wordimages=images, models=our_models, tags=tags,
                                   userpos=userpos,
                                   other_lang=other_lang, languages=languages, url=url,
                                   usermodels=calcmodel_value, frequencies=frequencies)

        else:
            error_value = "Incorrect query!"
            return render_template("calculator.html", error=error_value, models=our_models, tags=tags,
                                   tags2show=exposed_tags,
                                   other_lang=other_lang, languages=languages, url=url, usermodels=[defaultmodel])
    return render_template("calculator.html", models=our_models, tags=tags, other_lang=other_lang,
                           tags2show=exposed_tags,
                           languages=languages, url=url, usermodels=[defaultmodel])
Exemplo n.º 10
0
def visual_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        list_data = request.form.getlist('list_query')
        if list_data:
            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model_value = [defaultmodel]

            groups = []
            for inputform in list_data[:10]:
                group = set([process_query(w) for w in inputform.split(',') if len(w) > 1
                             and w.replace('_', '').replace('-', '').replace('::', '').replace(' ', '').isalnum()][:30])
                groups.append(group)

            querywords = [word for group in groups for word in group]
            if len(set(querywords)) != len(querywords):
                error_value = "Words must be unique!"
                return render_template("visual.html", error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=model_value)
            if len(querywords) < 7:
                error_value = "Too few words!"
                return render_template("visual.html", error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=model_value)

            if "Incorrect tag!" in querywords:
                error_value = "Incorrect tag!"
                return render_template('visual.html', error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=model_value)

            classes = []
            for word in querywords:
                for group in groups:
                    if word in group:
                        classes.append(groups.index(group))

            unknown = {}
            models_row = {}
            links_row = {}
            frequencies = {}
            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('home.html', other_lang=other_lang, languages=languages, url=url,
                                           usermodels=model_value)
                frequencies[model] = {}
                unknown[model] = set()
                words2vis = querywords
                m = hashlib.md5()
                name = ':::'.join(['__'.join(group) for group in groups])
                name = name.encode('ascii', 'backslashreplace')
                m.update(name)
                fname = m.hexdigest()
                plotfile = "%s_%s.png" % (model, fname)
                identifier = plotfile[:-4]
                models_row[model] = plotfile
                labels = []
                if not os.path.exists(root + 'data/images/tsneplots'):
                    os.makedirs(root + 'data/images/tsneplots')
                if not os.access(root + 'data/images/tsneplots/' + plotfile, os.F_OK):
                    print('No previous image found', root + 'data/images/tsneplots/' + plotfile, file=sys.stderr)
                    vectors = []
                    for w in words2vis:
                        if model_props[model]['tags'] == 'False':
                            message = {'operation': '4', 'query': w.split('_')[0], 'model': model}
                        else:
                            message = {'operation': '4', 'query': w, 'model': model}
                        result = json.loads(serverquery(message).decode('utf-8'))
                        frequencies[model].update(result['frequencies'])
                        if w.split('_')[0] in frequencies[model] and w not in frequencies[model]:
                            frequencies[model][w] = frequencies[model][w.split('_')[0]]
                        if w + " is unknown to the model" in result:
                            unknown[model].add(w)
                            continue
                        vector = np.array(result['vector'])
                        vectors.append(vector)
                        labels.append(w)
                    if len(vectors) > 5:
                        if len(list_data) == 1 and model_props[model]['tags'] == 'True':
                            classes = [word.split('_')[-1] for word in labels]
                        print('Embedding...', file=sys.stderr)
                        matrix2vis = np.vstack(([v for v in vectors]))
                        embed(labels, matrix2vis.astype('float64'), classes, model, fname)
                        models_row[model] = plotfile
                        if tensorflow_integration:
                            l2c = word2vec2tensor(identifier, vectors, labels, classes)
                        else:
                            l2c = None
                        links_row[model] = l2c
                    else:
                        models_row[model] = "Too few words!"
                else:
                    if tensorflow_integration:
                        links_row[model] = open(root + 'data/images/tsneplots/' + identifier + '.url', 'r').read()
                    else:
                        links_row[model] = None
            return render_template('visual.html', languages=languages, visual=models_row, words=groups,
                                   number=len(model_value), models=our_models, unknown=unknown, url=url,
                                   usermodels=model_value, l2c=links_row, qwords=querywords, frequencies=frequencies)
        else:
            error_value = "Incorrect query!"
            return render_template("visual.html", error=error_value, models=our_models, other_lang=other_lang,
                                   languages=languages, url=url, usermodels=[defaultmodel])
    return render_template('visual.html', models=our_models, other_lang=other_lang, languages=languages,
                           url=url, usermodels=[defaultmodel])
Exemplo n.º 11
0
def similar_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]
    if request.method == 'POST':
        list_data = 'dummy'
        try:
            list_data = request.form['list_query']
        except:
            pass
        # Nearest associates queries
        if list_data != 'dummy' and list_data.replace('_', '').replace('-', '').replace('::', ''). \
                replace(' ', '').isalnum():
            list_data = list_data.strip()
            query = process_query(list_data)

            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model_value = [defaultmodel]

            if query == "Incorrect tag!":
                error_value = "Incorrect tag!"
                return render_template('associates.html', error=error_value, word=list_data, models=our_models,
                                       tags2show=exposed_tags,
                                       other_lang=other_lang, languages=languages, url=url, usermodels=model_value)
            userpos = []
            if tags:
                pos_value = request.form.getlist('pos')
                if len(pos_value) < 1:
                    pos = query.split('_')[-1]
                else:
                    pos = pos_value[0]
                if pos != 'ALL':
                    userpos.append(pos)
                if pos == 'Q':
                    pos = query.split('_')[-1]
            else:
                pos = 'ALL'

            images = {query.split('_')[0]: None}
            models_row = {}
            inferred = set()
            frequencies = {}
            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('home.html', other_lang=other_lang, languages=languages, url=url,
                                           usermodels=model_value)
                if model_props[model]['tags'] == 'False':
                    message = {'operation': '1', 'query': query.split('_')[0], 'pos': 'ALL', 'model': model}
                else:
                    message = {'operation': '1', 'query': query, 'pos': pos, 'model': model}
                result = json.loads(serverquery(message).decode('utf-8'))
                frequencies[model] = result['frequencies']
                if query.split('_')[0] in frequencies[model] and query not in frequencies[model]:
                    frequencies[model][query] = frequencies[model][query.split('_')[0]]
                if query + " is unknown to the model" in result:
                    models_row[model] = "Unknown!"
                    continue
                elif 'No results' in result:
                    models_row[model] = 'No results!'
                    continue
                else:
                    for word in result['neighbors']:
                        images[word[0].split('_')[0]] = None
                    models_row[model] = result['neighbors']
                    if dbpedia:
                        try:
                            images = get_images(images)
                        except:
                            pass
                    if 'inferred' in result:
                        inferred.add(model)
            return render_template('associates.html', list_value=models_row, word=query, pos=pos,
                                   number=len(model_value), wordimages=images, models=our_models, tags=tags,
                                   other_lang=other_lang, languages=languages, tags2show=exposed_tags,
                                   url=url, usermodels=model_value, userpos=userpos,
                                   inferred=inferred, frequencies=frequencies)
        else:
            error_value = "Incorrect query!"
            return render_template("associates.html", error=error_value, models=our_models, tags=tags, url=url,
                                   usermodels=[defaultmodel], tags2show=exposed_tags)
    return render_template('associates.html', models=our_models, tags=tags, other_lang=other_lang,
                           languages=languages, url=url, usermodels=[defaultmodel], tags2show=exposed_tags)
Exemplo n.º 12
0
def misc_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        input_data = 'dummy'
        try:
            input_data = request.form['query']
        except:
            pass
        # Similarity queries
        if input_data != 'dummy':
            if ' ' in input_data.strip():
                input_data = input_data.strip()
                if input_data.endswith(','):
                    input_data = input_data[:-1]
                cleared_data = []
                sim_history = request.form['sim_history']
                if not sim_history.strip():
                    sim_history = []
                else:
                    sim_history = json.loads(sim_history)
                model_value = request.form.getlist('simmodel')
                if len(model_value) < 1:
                    model = defaultmodel
                else:
                    model = model_value[0]
                if not model.strip() in our_models:
                    return render_template('home.html', other_lang=other_lang, languages=languages, url=url,
                                           usermodels=model_value)
                for query in input_data.split(','):
                    if '' not in query.strip():
                        continue
                    query = query.split()
                    words = []
                    for w in query[:2]:
                        if w.replace('_', '').replace('-', '').replace('::', '').isalnum():
                            w = process_query(w)
                            if "Incorrect tag!" in w:
                                error_value = "Incorrect tag!"
                                return render_template('similar.html', error_sim=error_value, models=our_models,
                                                       other_lang=other_lang, languages=languages, url=url,
                                                       usermodels=model_value, tags2show=exposed_tags)
                            if model_props[model]['tags'] == 'False':
                                words.append(w.split('_')[0].strip())
                            else:
                                words.append(w.strip())
                    if len(words) == 2:
                        cleared_data.append((words[0].strip(), words[1].strip()))
                if len(cleared_data) == 0:
                    error_value = "Incorrect query!"
                    return render_template("similar.html", error_sim=error_value, other_lang=other_lang,
                                           languages=languages, url=url, usermodels=model_value, tags2show=exposed_tags)
                message = {'operation': '2', 'query': cleared_data, 'model': model}
                result = json.loads(serverquery(message).decode('utf-8'))
                cleared_data = [' '.join(el) for el in cleared_data]
                if "Unknown to the model" in result:
                    return render_template("similar.html", error_sim=result["Unknown to the model"],
                                           other_lang=other_lang,
                                           languages=languages, models=our_models, tags2show=exposed_tags,
                                           tags=tags, query=cleared_data, url=url, usermodels=model_value)
                sim_history.append(result['similarities'])
                if len(sim_history) > 10:
                    sim_history = sim_history[-10:]
                str_sim_history = (json.dumps(sim_history, ensure_ascii=False))
                return render_template('similar.html', value=result['similarities'], model=model, query=cleared_data,
                                       models=our_models, tags=tags, other_lang=other_lang, tags2show=exposed_tags,
                                       languages=languages, url=url, usermodels=model_value,
                                       sim_hist=sim_history, str_sim_history=str_sim_history,
                                       frequencies=result['frequencies'])
            else:
                error_value = "Incorrect query!"
                return render_template("similar.html", error_sim=error_value, models=our_models, tags=tags,
                                       tags2show=exposed_tags,
                                       other_lang=other_lang, languages=languages, url=url, usermodels=[defaultmodel])
    return render_template('similar.html', models=our_models, tags=tags, other_lang=other_lang,
                           languages=languages, url=url, usermodels=[defaultmodel], tags2show=exposed_tags)
Exemplo n.º 13
0
import os
import socket  # for sockets
import sys
from collections import OrderedDict
import csv
import numpy as np
from flask import g
from flask import render_template, Blueprint, redirect, Response
from flask import request
from plotting import embed
from plotting import singularplot
from sparql import getdbpediaimage
# import strings data from respective module
from strings_reader import language_dicts

languages = '/'.join(list(language_dicts.keys())).upper()

config = configparser.RawConfigParser()
config.read('webvectors.cfg')

root = config.get('Files and directories', 'root')
modelsfile = config.get('Files and directories', 'models')
cachefile = config.get('Files and directories', 'image_cache')
temp = config.get('Files and directories', 'temp')
url = config.get('Other', 'url')

lemmatize = config.getboolean('Tags', 'lemmatize')
dbpedia = config.getboolean('Other', 'dbpedia_images')
languages_list = config.get('Languages', 'interface_languages').split(',')

if lemmatize:
Exemplo n.º 14
0
def binary(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == "GET":
        return render_template("binary.html",
                               other_lang=other_lang,
                               languages=languages,
                               model1=list(our_models.keys())[-2],
                               model2=list(our_models.keys())[-1],
                               models=our_models,
                               url=url)
    else:
        if request.form.getlist("word"):  # First time click
            word = request.form.getlist("word")[0]
            word = process_query(word)
            models = request.form.getlist("models")
            if len(models) != 2:
                error_value = 'Select 2 years!'
                return render_template('binary.html',
                                       error=error_value,
                                       models=our_models,
                                       url=url,
                                       other_lang=other_lang,
                                       model1=list(our_models.keys())[-3],
                                       model2=list(our_models.keys())[-2],
                                       languages=languages)

            model1, model2 = models
            if model1 == model2:
                error_value = "Identical models"
                return render_template('binary.html',
                                       error=error_value,
                                       model1=model1,
                                       model2=model2,
                                       other_lang=other_lang,
                                       languages=languages,
                                       models=our_models,
                                       url=url)

            if word == "Incorrect tag!":
                error_value = "Incorrect tag!"
                return render_template('binary.html',
                                       error=error_value,
                                       model1=model1,
                                       model2=model2,
                                       other_lang=other_lang,
                                       languages=languages,
                                       models=our_models,
                                       url=url)
            message = {
                'operation': '7',
                'word': word,
                'model1': model1,
                "model2": model2,
                'with_examples': True
            }

            result = json.loads(serverquery(message).decode('utf-8'))
            if word + " is unknown to the model" in result:
                error_value = "Unknown word"
                return render_template("binary.html",
                                       error=error_value,
                                       word=word,
                                       models=our_models,
                                       tags=tags,
                                       url=url,
                                       usermodels=[defaultmodel],
                                       tags2show=exposed_tags)
        else:  # User presses the 'Confirm' button to wait longer for the results
            word, model1, model2 = request.form.get("confirm").split()
            message = {
                'operation': '7',
                'word': word,
                'model1': model1,
                "model2": model2,
                'with_examples': "slow"
            }
            result = json.loads(serverquery(message).decode('utf-8'))
        label = result["label"]
        proba = float(result["proba"])
        examples = result["examples"]
        frequencies = result["frequencies"]
        if type(examples) is dict:
            examples_type = 1
        else:
            examples_type = 0

        if label == "1":
            ok_models = [model1, model2]

            m = hashlib.md5()
            hashword = ":".join(
                [",".join([str(i) for i in j]) for j in ok_models] + [word])
            hashword = hashword.encode('ascii', 'backslashreplace')
            m.update(hashword)

            if not os.path.isdir("data/images/heatmaps"):
                os.mkdir("data/images/heatmaps")

            fname = m.hexdigest()

            trajectory_message = {
                'operation': '6',
                'query': word,
                'pos': "ALL",
                'model': ok_models
            }
            trajectory_result = json.loads(
                serverquery(trajectory_message).decode('utf-8'))

            if not os.path.exists(root + 'data/images/tsne_shift'):
                os.makedirs(root + 'data/images/tsne_shift')
            if trajectory_result['word_list']:
                tsne_semantic_shifts(trajectory_result, fname)
            return render_template("binary.html",
                                   model1=model1,
                                   model2=model2,
                                   frequencies=frequencies,
                                   other_lang=other_lang,
                                   languages=languages,
                                   models=our_models,
                                   url=url,
                                   label=label,
                                   proba="{:.2f}".format(proba),
                                   word=word,
                                   examples=examples,
                                   examples_type=examples_type,
                                   fname=fname)

        return render_template("binary.html",
                               model1=model1,
                               model2=model2,
                               frequencies=frequencies,
                               other_lang=other_lang,
                               languages=languages,
                               models=our_models,
                               url=url,
                               label=label,
                               proba="{:.2f}".format(proba),
                               word=word,
                               examples=examples,
                               examples_type=examples_type)
Exemplo n.º 15
0
def associates_page(lang):
    global our_models
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]
    if request.method == 'POST':
        list_data = request.form['list_query']

        # Nearest associates queries
        if list_data != 'dummy' and list_data.replace('_', '').replace('-', '').\
                replace('::', '').replace(' ', '').isalnum():
            list_data = list_data.strip()
            query = process_query(list_data)

            model_value = request.form.getlist('model')

            if len(model_value) < 1:
                model_value = [defaultmodel]

            if query == "Incorrect tag!":
                error_value = "Incorrect tag!"
                return render_template('associates.html',
                                       error=error_value,
                                       word=list_data,
                                       models=our_models,
                                       tags2show=exposed_tags,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)
            userpos = []
            if tags:
                pos_value = request.form.getlist('pos')

                if len(pos_value) < 1:
                    pos = query.split('_')[-1]
                else:
                    pos = pos_value[0]
                if pos != 'ALL':
                    userpos.append(pos)
                if pos == 'Q':
                    pos = query.split('_')[-1]
            else:
                pos = 'ALL'

            images = {query.split('_')[0]: None}
            models_row = {}
            inferred = set()
            frequencies = {}
            labels, probas = list(), list()
            results = map(
                get_model_changes,
                [(model1, model2, query)
                 for (model1, model2) in zip(model_value, model_value[1:])])
            results = sorted(results)
            for model1, model2, result in results:
                if query + " is unknown to the model" in result:
                    label = "0"
                    proba = 0.0
                else:
                    label = result["label"]
                    proba = float(result["proba"])
                labels.append(label)
                probas.append(proba)

            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('associates.html',
                                           other_lang=other_lang,
                                           url=url,
                                           languages=languages,
                                           usermodels=model_value)

            results = map(get_model_neighbors,
                          [(x, query, pos) for x in model_value])
            results = sorted(results)
            ok_models = list()
            for model, result, model_query in results:
                frequencies[model] = result['frequencies']
                if model_query != query:
                    frequencies[model][query] = frequencies[model][model_query]
                if model_query + " is unknown to the model" in result:
                    models_row[model] = "Unknown!"
                    continue
                elif 'No results' in result:
                    models_row[model] = 'No results!'
                    continue
                else:
                    for word in result['neighbors']:
                        images[word[0].split('_')[0]] = None
                    models_row[model] = result['neighbors']
                    ok_models.append(model)
                    if dbpedia:
                        try:
                            images = get_images(images)
                        except TimeoutError:
                            pass
                    if 'inferred' in result:
                        inferred.add(model)

            m = hashlib.md5()
            hashword = ":".join(
                [",".join([str(i) for i in j])
                 for j in model_value] + [query, pos])
            hashword = hashword.encode('ascii', 'backslashreplace')
            m.update(hashword)

            fname = m.hexdigest()

            if len(ok_models) == 0:
                error_value = "Unknown word"
                return render_template("associates.html",
                                       error=error_value,
                                       models=our_models,
                                       tags=tags,
                                       url=url,
                                       usermodels=model_value,
                                       tags2show=exposed_tags)

            trajectory_message = {
                'operation': '6',
                'query': query,
                'pos': pos,
                'model': ok_models
            }
            trajectory_result = json.loads(
                serverquery(trajectory_message).decode('utf-8'))

            if query + " is unknown to the model" in trajectory_result:
                error_value = "Unknown word"
                return render_template("associates.html",
                                       error=error_value,
                                       models=our_models,
                                       tags=tags,
                                       url=url,
                                       usermodels=[defaultmodel],
                                       tags2show=exposed_tags)

            if not os.path.exists(root + 'data/images/tsne_shift'):
                os.makedirs(root + 'data/images/tsne_shift')
            if trajectory_result['word_list']:
                tsne_semantic_shifts(trajectory_result, fname)

            return render_template('associates.html',
                                   list_value=models_row,
                                   word=query,
                                   pos=pos,
                                   number=len(model_value),
                                   wordimages=images,
                                   models=our_models,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   tags2show=exposed_tags,
                                   url=url,
                                   usermodels=model_value,
                                   userpos=userpos,
                                   inferred=inferred,
                                   frequencies=frequencies,
                                   visible_neighbors=10,
                                   fname=fname,
                                   labels=labels,
                                   probas=probas)
        else:
            error_value = "Incorrect query!"
            return render_template("associates.html",
                                   error=error_value,
                                   models=our_models,
                                   tags=tags,
                                   url=url,
                                   usermodels=[defaultmodel],
                                   tags2show=exposed_tags)

    return render_template('associates.html',
                           models=our_models,
                           tags=tags,
                           other_lang=other_lang,
                           languages=languages,
                           url=url,
                           usermodels=['2013', '2014', '2015'],
                           tags2show=exposed_tags)
Exemplo n.º 16
0
def similar_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        input_data = 'dummy'
        list_data = 'dummy'
        try:
            input_data = request.form['query']
        except:
            pass
        try:
            list_data = request.form['list_query']
        except:
            pass
        # Similarity queries
        if input_data != 'dummy':
            if ' ' in input_data.strip():
                input_data = input_data.strip()
                if input_data.endswith(','):
                    input_data = input_data[:-1]
                cleared_data = []
                model_value = request.form.getlist('simmodel')
                if len(model_value) < 1:
                    model = defaultmodel
                else:
                    model = model_value[0]
                if not model.strip() in our_models:
                    return render_template('home.html',
                                           other_lang=other_lang,
                                           languages=languages,
                                           url=url,
                                           usermodels=model_value)
                for query in input_data.split(','):
                    if '' not in query.strip():
                        continue
                    query = query.split()
                    words = []
                    for w in query[:2]:
                        if w.replace('_',
                                     '').replace('-',
                                                 '').replace('::',
                                                             '').isalnum():
                            w = process_query(w)
                            if "Incorrect tag!" in w:
                                return render_template(
                                    'similar.html',
                                    value=["Incorrect tag!"],
                                    models=our_models,
                                    other_lang=other_lang,
                                    languages=languages,
                                    url=url,
                                    usermodels=model_value)
                            words.append(w.strip())
                    if len(words) == 2:
                        cleared_data.append(words[0].strip() + " " +
                                            words[1].strip())
                if len(cleared_data) == 0:
                    error_value = "Incorrect query!"
                    return render_template("similar.html",
                                           error_sim=error_value,
                                           other_lang=other_lang,
                                           languages=languages,
                                           url=url,
                                           usermodels=model_value)
                message = "2;" + ",".join(cleared_data) + ";" + model
                results = []
                result = serverquery(message)
                if 'does not know the word' in result:
                    return render_template("similar.html",
                                           error_sim=result.strip(),
                                           other_lang=other_lang,
                                           languages=languages,
                                           models=our_models,
                                           tags=tags,
                                           query=cleared_data,
                                           url=url,
                                           usermodels=model_value)
                for word in result.split():
                    w = word.split("#")
                    results.append((w[0].decode('utf-8'), w[1].decode('utf-8'),
                                    float(w[-1])))
                return render_template('similar.html',
                                       value=results,
                                       model=model,
                                       query=cleared_data,
                                       models=our_models,
                                       tags=tags,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)
            else:
                error_value = "Incorrect query!"
                return render_template("similar.html",
                                       error_sim=error_value,
                                       models=our_models,
                                       tags=tags,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=[defaultmodel])

        # Nearest associates queries
        if list_data != 'dummy' and list_data.replace('_', '').replace(
                '-', '').replace('::', '').isalnum():
            list_data = list_data.split()[0].strip()
            query = process_query(list_data)

            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model_value = [defaultmodel]

            if query == "Incorrect tag!":
                return render_template('similar.html',
                                       error=query,
                                       word=list_data,
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)
            if tags:
                pos_value = request.form.getlist('pos')
                if len(pos_value) < 1 or pos_value[0] == 'Q':
                    pos = query.split('_')[-1]
                else:
                    pos = pos_value[0]
            else:
                pos = 'All PoS'

            images = {query.split('_')[0]: None}
            models_row = {}
            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('home.html',
                                           other_lang=other_lang,
                                           languages=languages,
                                           url=url,
                                           usermodels=model_value)
                if tags:
                    message = "1;" + query + ";" + pos + ";" + model
                else:
                    message = "1;" + query + ";" + 'ALL' + ";" + model
                result = serverquery(message)
                associates_list = []
                if "unknown to the" in result:
                    models_row[model] = "Unknown!"
                    continue
                elif "No results" in result:
                    associates_list.append(result)
                    models_row[model] = associates_list
                    continue
                else:
                    output = result.split('&&&')
                    associates = output[0]
                    for word in associates.split():
                        w = word.split("#")
                        associates_list.append(
                            (w[0].decode('utf-8'), float(w[1])))
                        images[w[0].split('_')[0].decode('utf-8')] = None
                    models_row[model] = associates_list
                    try:
                        images = get_images(images)
                    except:
                        pass
            return render_template('similar.html',
                                   list_value=models_row,
                                   word=query,
                                   pos=pos,
                                   number=len(model_value),
                                   wordimages=images,
                                   models=our_models,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url,
                                   usermodels=model_value)
        else:
            error_value = "Incorrect query!"
            return render_template("similar.html",
                                   error=error_value,
                                   models=our_models,
                                   tags=tags,
                                   url=url,
                                   usermodels=[defaultmodel])
    return render_template('similar.html',
                           models=our_models,
                           tags=tags,
                           other_lang=other_lang,
                           languages=languages,
                           url=url,
                           usermodels=[defaultmodel])
Exemplo n.º 17
0
def finder(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        positive_data = ''
        positive2_data = ''
        negative_data = ''
        positive1_data = ''
        negative1_data = ''
        try:
            positive_data = request.form['positive']
            positive2_data = request.form['positive2']
            negative_data = request.form['negative']
        except:
            pass
        try:
            positive1_data = request.form['positive1']
            negative1_data = request.form['negative1']
        except:
            pass
        # Analogical inference
        if negative_data != '' and positive_data != '' and positive2_data != '':
            negative_data = negative_data.split()[0].split()
            positive_data = positive_data.split()[0]
            positive2_data = positive2_data.split()[0]
            positive_data_list = [positive_data, positive2_data]
            negative_list = [
                process_query(w) for w in negative_data
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace(
                    '::', '').isalnum()
            ]
            positive_list = [
                process_query(w) for w in positive_data_list
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace(
                    '::', '').isalnum()
            ]

            calcmodel_value = request.form.getlist('calcmodel')
            if len(calcmodel_value) < 1:
                calcmodel_value = [defaultmodel]

            if len(positive_list) < 2 or len(negative_list) == 0:
                error_value = "Incorrect query!"
                return render_template("calculator.html",
                                       error=error_value,
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=calcmodel_value)
            if "Incorrect tag!" in negative_list or "Incorrect tag!" in positive_list:
                return render_template('calculator.html',
                                       calc_value=["Incorrect tag!"],
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=calcmodel_value)
            if tags:
                calcpos_value = request.form.getlist('calcpos')
                if len(calcpos_value) < 1:
                    pos = defaulttag
                else:
                    pos = calcpos_value[0]
            else:
                pos = 'All PoS'

            models_row = {}
            images = {}
            for model in calcmodel_value:
                if not model.strip() in our_models:
                    return render_template('home.html',
                                           other_lang=other_lang,
                                           languages=languages,
                                           models=our_models,
                                           url=url,
                                           usermodels=calcmodel_value)
                if tags:
                    message = "3;" + ",".join(positive_list) + "&" + ','.join(
                        negative_list) + ";" + pos + ";" + model
                else:
                    message = "3;" + ",".join(positive_list) + "&" + ','.join(
                        negative_list) + ";" + 'ALL' + ";" + model
                result = serverquery(message)
                results = []
                if len(result) == 0 or 'No results' in result:
                    results.append("No similar words with this tag.")
                    models_row[model] = results
                    continue
                if "does not know" in result:
                    results.append(result)
                    models_row[model] = results
                    continue
                for word in result.split():
                    w = word.split("#")
                    results.append((w[0].decode('utf-8'), float(w[1])))
                    images[w[0].split('_')[0].decode('utf-8')] = None
                models_row[model] = results
                try:
                    images = get_images(images)
                except:
                    pass
            return render_template('calculator.html',
                                   analogy_value=models_row,
                                   pos=pos,
                                   plist=positive_list,
                                   nlist=negative_list,
                                   wordimages=images,
                                   models=our_models,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url,
                                   usermodels=calcmodel_value)

        # Calculator
        if positive1_data != '':
            negative_list = [
                process_query(w) for w in negative1_data.split()
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace(
                    '::', '').isalnum()
            ][:10]
            positive_list = [
                process_query(w) for w in positive1_data.split()
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace(
                    '::', '').isalnum()
            ][:10]

            calcmodel_value = request.form.getlist('calcmodel')
            if len(calcmodel_value) < 1:
                calcmodel_value = [defaultmodel]

            if len(positive_list) == 0:
                error_value = "Incorrect query!"
                return render_template("calculator.html",
                                       error=error_value,
                                       other_lang=other_lang,
                                       languages=languages,
                                       models=our_models,
                                       url=url,
                                       usermodels=calcmodel_value)
            if "Incorrect tag!" in negative_list or "Incorrect tag!" in positive_list:
                return render_template('calculator.html',
                                       calc_value=["Incorrect tag!"],
                                       other_lang=other_lang,
                                       languages=languages,
                                       models=our_models,
                                       url=url,
                                       usermodels=calcmodel_value)
            if tags:
                calcpos_value = request.form.getlist('calcpos')
                if len(calcpos_value) < 1:
                    pos = defaulttag
                else:
                    pos = calcpos_value[0]
            else:
                pos = 'ALL'

            models_row = {}
            images = {}
            for model in calcmodel_value:
                if not model.strip() in our_models:
                    return render_template('home.html',
                                           other_lang=other_lang,
                                           languages=languages,
                                           models=our_models,
                                           url=url,
                                           usermodels=calcmodel_value)
                message = "3;" + ",".join(positive_list) + "&" + ','.join(
                    negative_list) + ";" + pos + ";" + model
                result = serverquery(message)
                results = []
                if len(result) == 0 or "No results" in result:
                    results.append("No similar words with this tag.")
                    models_row[model] = results
                    continue
                if "does not know" in result:
                    results.append(result)
                    models_row[model] = results
                    continue
                for word in result.split():
                    w = word.split("#")
                    results.append((w[0].decode('utf-8'), float(w[1])))
                    images[w[0].split('_')[0].decode('utf-8')] = None
                models_row[model] = results
                try:
                    images = get_images(images)
                except:
                    pass
            return render_template('calculator.html',
                                   calc_value=models_row,
                                   pos=pos,
                                   plist2=positive_list,
                                   nlist2=negative_list,
                                   wordimages=images,
                                   models=our_models,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url,
                                   usermodels=calcmodel_value)

        else:
            error_value = "Incorrect query!"
            return render_template("calculator.html",
                                   calc_error=error_value,
                                   models=our_models,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url,
                                   usermodels=[defaultmodel])
    return render_template("calculator.html",
                           models=our_models,
                           tags=tags,
                           other_lang=other_lang,
                           languages=languages,
                           url=url,
                           usermodels=[defaultmodel])
Exemplo n.º 18
0
Arquivo: nlp.py Projeto: rusnlp/rusnlp
def paper(lang, fname, topn):
    # pass all required variables to template
    # repeated within each @nlpsearch.route function
    g.lang = lang
    s = {lang}
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if "." in fname or not fname:
        print("Error!", file=sys.stderr)
        return render_template(
            "rusnlp_paper.html",
            error="С вашим запросом что-то не так!",
            url=url,
            other_lang=other_lang,
            languages=languages,
        )

    query = fname.strip()
    if request.method == "POST":
        try:
            topn = int(request.form["topn"])
        except ValueError:
            pass

    message = [1, query, topn]
    results = json.loads(serverquery(message))
    metadata = results["meta"]

    if "not found" in metadata or "unknown to the model" in results:
        return render_template(
            "rusnlp_paper.html",
            error="Статья с таким идентификатором не найдена в модели",
            search=True,
            url=url,
            topn=topn,
            other_lang=other_lang,
            languages=languages,
            metadata=metadata)

    else:
        author_ids = set(metadata["author"])
        for res in results["neighbors"]:
            r_authors = res[2]
            author_ids |= set(r_authors)
        query = {"field": "author", "ids": list(author_ids)}
        message = [3, query, topn]
        author_map = json.loads(serverquery(message))["neighbors"]

        affiliation_ids = set(metadata["affiliation"])
        for res in results["neighbors"]:
            r_affiliations = res[6]
            affiliation_ids |= set(r_affiliations)
        query = {"field": "affiliation", "ids": list(affiliation_ids)}
        message = [3, query, topn]
        aff_map = json.loads(serverquery(message))["neighbors"]

        topics = results["topics"]
        return render_template("rusnlp_paper.html",
                               aff_map=aff_map,
                               result=results["neighbors"],
                               metadata=metadata,
                               search=True,
                               url=url,
                               author_map=author_map,
                               topn=topn,
                               topics=topics,
                               other_lang=other_lang,
                               languages=languages)
Exemplo n.º 19
0
Arquivo: nlp.py Projeto: rusnlp/rusnlp
def homepage(lang, conference, year, author, affiliation, keywords, topn):
    # принимаются все значения, указанные в defaults в @nlpsearch.route
    # pass all required variables to template
    # repeated within each @nlpsearch.route function
    g.lang = lang
    s = {lang}
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    descriptions = {}

    if (conference or year or author or affiliation or keywords
            or request.method == "POST"):
        if request.method == "POST":

            try:
                topn = int(request.form["topn"])
            except ValueError:
                pass

            keywords = request.form["keywords"].strip().lower().split()
            keywords = extend_keywords_with_abbs(keywords)
            author = request.form["author_query"].strip()
            affiliation = request.form["affiliation_query"].strip()
            title = request.form["query"].strip()
            conference = request.form.getlist("conf_query")
            if conference:
                query = {"field": "conference", "ids": conference}
                message = [5, query, topn]
                descriptions["conferences"] = json.loads(
                    serverquery(message))["neighbors"]
            year_min = request.form["year_query_min"]
            if year_min:
                year_min = int(year_min)
            year_max = request.form["year_query_max"]
            if year_max:
                year_max = int(year_max)
        else:
            title = ""
            if keywords:
                keywords = keywords.strip().lower().split("+")
                keywords = extend_keywords_with_abbs(keywords)
            if conference:
                conference = [conference]
                query = {"field": "conference", "ids": conference}
                message = [5, query, topn]
                descriptions["conferences"] = json.loads(
                    serverquery(message))["neighbors"]
            year_min = year
            year_max = year
        year = (year_min, year_max)
        if year[0] and year[1]:
            if year[0] > year[1]:
                return render_template("rusnlp.html",
                                       error="Проверьте даты!",
                                       url=url,
                                       other_lang=other_lang,
                                       languages=languages,
                                       search=True,
                                       years=year_dict,
                                       topn=topn)
        if len(conference) == 0:
            conference = ["Dialogue", "AIST", "AINL"]
        query = {
            "f_author": author,
            "f_year": year,
            "f_conf": conference,
            "f_title": title,
            "f_affiliation": affiliation,
            "keywords": keywords,
        }
        if (query["f_author"] == "" and query["f_affiliation"] == ""
                and query["f_title"] == "" and len(query["f_conf"]) == 3
                and query["keywords"] == [] and query["f_year"]
                == (year_dict['default_min'], year_dict['default_max'])):
            return render_template("rusnlp.html",
                                   error="Введите какой-нибудь запрос!",
                                   url=url,
                                   other_lang=other_lang,
                                   languages=languages,
                                   search=True,
                                   years=year_dict,
                                   topn=topn)
        message = [2, query, topn]
        results = json.loads(serverquery(message))
        if len(results["neighbors"]) == 0:
            return render_template("rusnlp.html",
                                   conf_query=conference,
                                   year_query=year,
                                   author_query=author,
                                   error="Поиск не дал результатов.",
                                   search=True,
                                   url=url,
                                   affiliation_query=affiliation,
                                   query=title,
                                   keywords=" ".join(keywords),
                                   other_lang=other_lang,
                                   languages=languages,
                                   years=year_dict,
                                   topn=topn)
        author_ids = set()
        for res in results["neighbors"]:
            r_authors = res[2]
            author_ids |= set(r_authors)
        query = {"field": "author", "ids": list(author_ids)}
        message = [3, query, topn]
        author_map = json.loads(serverquery(message))["neighbors"]
        if author.strip().isdigit():
            author = author_map[author]

        affiliation_ids = set()
        for res in results["neighbors"]:
            r_affiliations = res[6]
            affiliation_ids |= set(r_affiliations)
        query = {"field": "affiliation", "ids": list(affiliation_ids)}
        message = [3, query, topn]
        aff_map = json.loads(serverquery(message))["neighbors"]
        if affiliation.strip().isdigit():
            affiliation = aff_map[affiliation]

        return render_template("rusnlp.html",
                               result=results["neighbors"],
                               conf_query=conference,
                               author_query=author,
                               year_query=year,
                               search=True,
                               url=url,
                               query=title,
                               affiliation_query=affiliation,
                               descriptions=descriptions,
                               topics=results["topics"],
                               aff_map=aff_map,
                               keywords=" ".join(keywords),
                               author_map=author_map,
                               other_lang=other_lang,
                               languages=languages,
                               years=year_dict,
                               topn=topn)
    return render_template("rusnlp.html",
                           search=True,
                           url=url,
                           other_lang=other_lang,
                           languages=languages,
                           years=year_dict,
                           topn=topn)
Exemplo n.º 20
0
def raw_finder(lang, model, userquery):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    model = model.strip()
    if not model.strip() in our_models:
        return render_template('home.html',
                               other_lang=other_lang,
                               languages=languages,
                               url=url)
    if userquery.strip().replace('_', '').replace('-',
                                                  '').replace('::',
                                                              '').isalnum():
        query = process_query(userquery.strip())
        if tags:
            if len(query.split('_')) < 2:
                return render_template('wordpage.html',
                                       error=query,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url)
            pos_tag = query.split('_')[-1]
        else:
            pos_tag = 'ALL'
        message = "1;" + query + ";" + pos_tag + ";" + model
        result = serverquery(message)
        associates_list = []
        if "unknown to the" in result or "No results" in result:
            return render_template('wordpage.html',
                                   error=result.decode('utf-8'),
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url)
        else:
            output = result.split('&&&')
            associates = output[0]
            if len(associates) > 1:
                vector = ','.join(output[1:])
            else:
                vector = ''
            for word in associates.split():
                w = word.split("#")
                associates_list.append((w[0].decode('utf-8'), float(w[1])))
            m = hashlib.md5()
            name = query.encode('ascii', 'backslashreplace')
            m.update(name)
            fname = m.hexdigest()
            plotfile = root + 'data/images/singleplots/' + model + '_' + fname + '.png'
            if not os.access(plotfile, os.F_OK):
                vector2 = output[1].split(',')
                vector2 = [float(a) for a in vector2]
                singularplot(query, model, vector2)
            if dbpedia:
                imagecache = {}
                imagedata = codecs.open(root + cachefile, 'r', 'utf-8')
                for line in imagedata:
                    res = line.strip().split('\t')
                    if len(res) == 2:
                        (word, image) = res
                        image = image.strip()
                        if image == 'None':
                            image = None
                        imagecache[word.strip()] = image
                    else:
                        continue
                imagedata.close()
                image = getdbpediaimage(
                    query.split('_')[0].encode('utf-8'), imagecache)
            else:
                image = None
            return render_template('wordpage.html',
                                   list_value=associates_list,
                                   word=query,
                                   model=model,
                                   pos=pos_tag,
                                   vector=vector,
                                   image=image,
                                   vectorvis=fname,
                                   tags=tags,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url)
    else:
        error_value = u'Incorrect query: %s' % userquery
        return render_template("wordpage.html",
                               error=error_value,
                               tags=tags,
                               other_lang=other_lang,
                               languages=languages,
                               url=url)