Пример #1
0
def get_occurrences(url: hug.types.text,
                    key_word: hug.types.text,
                    case_sensitive=True):
    """
    Returns JSON containing number of occurrences of "key_word" in website
    given by "url" with case sensitivity defined by optional argument
    case_sensitive (True by default)
    """
    # get and decode web content
    content = requests.get(url).content
    html_text = content.decode('utf-8')

    # remove Javascript, CSS and HTML
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html_text, "lxml")
    for script in soup(["script", "style"]):
        script.extract()
    text = soup.get_text()

    # convert text and keyword to lowercase if case_sensitive = False
    if not case_sensitive:
        text = text.lower()
        key_word = key_word.lower()

    # split text into iterable list of words without punctuation
    text = re.split(r",|;|:|\W", text)

    # count keyword occurrences
    counter = 0
    for word in text:
        if word == key_word:
            counter += 1

    return ('{{"{0}": "{1}"}}'.format("Occurrences", counter))
Пример #2
0
def suggestion(text: hug.types.text):
    """
    For a given text, return possible terms from a suggest list in elastic search index

    example query: http://localhost:8888/api/suggestion?text=department
    """
    suggest_body = {
        "suggest": {
            "field-suggest": {
                "prefix": text,
                "completion": {
                    "field": "suggest"
                }
            }
        }
    }
    responses = es.search(index='penn-events', body=suggest_body)

    # return all possible full term from suggest list
    suggest_terms = []
    for response in responses['suggest']['field-suggest'][0]['options']:
        for s in response['_source']['suggest']:
            if text.lower() in s.lower():
                suggest_terms.append(s)
    return list(pd.unique(suggest_terms))
def _add_one_user(db: directives.PeeweeSession, username: hug.types.text, password: hug.types.text = None,
                  role: hug.types.one_of(UserRoles.user_roles()) = UserRoles.USER,
                  coupons: hug.types.number = 10):
    with db.atomic():
        name = username.lower()
        salt = get_random_string(2)
        secret_password = password or get_random_string(12)
        hashed_password = hash_pw(name, salt, secret_password)
        user = User.create(user_name=name, role=role, salt=salt,
                           password=hashed_password, coupons=coupons)
        user.save()
        return {"name": user.user_name, "password": secret_password}
Пример #4
0
def get_sim_score(seq1: hug.types.text,
                  seq2: hug.types.text,
                  method: hug.types.text = 'levenshtein',
                  response=None):
    """ Compare Similarity between sequences

    Args:
        Two Sequences for which similarity score needs to be calculated,
        and the method used to calculate the score. Available methods are
        levenshtein (default), jaccard, jaro-winkler, hamming and sequencer-matcher

    Returns:
        Scaled Score between 0.0 and 1.0 with
        0.0 - Sequences are not similar at all
        1.0 - Sequences are completely similar (case-insensitive)

    """
    logger.info("Method: %s", method)
    logger.info('Seq1: %s Seq2: %s', seq1, seq2)

    text = [seq1, seq2]
    method = method.lower()

    simscore = SimilarityMetric(text)

    if method == 'levenshtein':
        similarity = simscore.levenshtein()
        return {'sim_score': round(similarity, 4)}

    elif method == 'jaccard':
        similarity = simscore.jaccard()
        return {'sim_score': round(similarity, 4)}

    elif method == 'jaro-winkler':
        similarity = simscore.jaro_winkler()
        return {'sim_score': round(similarity, 4)}

    elif method == 'hamming':
        similarity = simscore.hamming()
        return {'sim_score': round(similarity, 4)}

    elif method == 'sequencer-matcher':
        similarity = simscore.sequencer_matcher()
        return {'sim_score': round(similarity, 4)}

    else:
        response.status = falcon.HTTP_400
        return {
            'error':
            'Unsupported method. Supported method types are Levenshtein, Jaccard, Jaro-Winkler, Hamming, Sequence-Matcher'
        }
def change_user_pw(db: directives.PeeweeSession, username: hug.types.text, password: hug.types.text, for_real: hug.types.smart_boolean = False):
    if not for_real:
        print(
            f"this would change {username}'s pw to {password}. Run with --for_real if you're sure.")
        sys.exit(1)
    with db.atomic():
        name = username.lower()
        salt = get_random_string(2)
        secret_password = password
        hashed_password = hash_pw(name, salt, secret_password)
        user = User.get(User.user_name == username)
        user.salt = salt
        user.password = hashed_password
        user.save()
        print(f"{user.user_name}'s pw successfully changed.")
Пример #6
0
def put_user(db: PeeweeSession, newUserName: hug.types.text, newUserPassword: hug.types.text,
             newUserPasswordConfirm: hug.types.text):
    if newUserPassword != newUserPasswordConfirm:
        raise hug.HTTPBadRequest
    with db.atomic():
        try:
            name = newUserName.lower()
            salt = get_random_string(2)
            secret_password = newUserPassword
            hashed_password = hash_pw(name, salt, secret_password)
            user = User.create(user_name=name, role=UserRoles.USER, salt=salt, password=hashed_password, coupons=10)
            user.save()
            return {
                "username": user.user_name
            }
        except IntegrityError:
            raise hug.HTTPConflict('User already exists.')
Пример #7
0
def search(conn: directive.connection,
           tables: directive.tables,
           locale: directive.locale,
           query: hug.types.text,
           limit: hug.types.in_range(1, 100) = 20,
           page: hug.types.in_range(1, 10) = 1):
    """ Search a route by name. `query` contains the string to search for.
        _limit_ ist the maximum number of results to return. _page_ the batch
        number of results to return, i.e. the requests returns results
        `[(page - 1) * limit, page * limit[`.
    """
    maxresults = page * limit

    res = RouteList(query=query, page=page)

    r = tables.routes.data
    base = sa.select(RouteItem.make_selectables(r))

    # First try: exact match of ref
    sql = base.where(
        sa.func.lower(r.c.ref) == query.lower()).limit(maxresults + 1)
    res.set_items(conn.execute(sql), locale)

    # If that did not work and the search term is a number, maybe a relation
    # number?
    if len(res) == 0 and len(query) > 3 and query.isdigit():
        sql = base.where(r.c.id == int(query))
        res.set_items(conn.execute(sql), locale)
        if len(res) > 0:
            return res

    # Second try: fuzzy matching of text
    if len(res) <= maxresults:
        remain = maxresults - len(res)
        # Preselect matches by doing a word match on name and intnames.
        primary_sim = r.c.name + sa.func.jsonb_path_query_array(
            r.c.intnames, '$.*', type_=sa.Text)
        primary_sim = primary_sim.op('<->>>', return_type=sa.Float)(query)
        primary_sim = primary_sim.label('sim')

        # Rerank by full match against main name
        second_sim = r.c.name.op('<->', return_type=sa.Float)(query)
        second_sim = second_sim.label('secsim')

        inner = base.add_columns(primary_sim, second_sim)\
                    .order_by(primary_sim)\
                    .limit(min(1100, remain * 10))\
                    .alias('inner')

        # Rerank by full match against main name
        rematch_sim = (inner.c.sim + inner.c.secsim).label('finsim')

        sql = sa.select(inner.c)\
                .add_columns(rematch_sim)\
                .order_by(rematch_sim)\
                .limit(remain)

        minsim = None
        for o in conn.execute(sql):
            if minsim is None:
                minsim = o['finsim']
            elif o['finsim'] - 0.3 > minsim:
                break
            res.add_item(o, locale)

    if page > 1:
        res.drop_leading_results((page - 1) * limit)

    return res
Пример #8
0
def synthesize_data(query: hug.types.text, method: hug.types.text):
    if query_ok(query):

        parsed = sqlparse.parse(query)[0]

        order_found = False
        order_clauses = []
        limit_found = False
        if parsed.get_type() == 'SELECT':
            for t in parsed.tokens:
                if(t.is_whitespace):
                    continue
                if (t.is_keyword and t.normalized == 'ORDER'):
                    order_found = True
                    continue
                if order_found:
                    if t.is_keyword and t.normalized != 'BY':
                        break
                    elif isinstance(t, (sqlparse.sql.Identifier, sqlparse.sql.IdentifierList)):
                        order_clauses.append(str(t))
            for t in parsed.tokens:
                if (t.is_keyword and t.normalized == 'LIMIT'):
                    limit_found = True

        # replace order by clauses with random()
        # as order by doesn't do anything once synthesis occurs
        fixed_query = query
        if order_found:
            i = query.rfind(order_clauses[0])
            fixed_query = fixed_query[:i] + "random()" + fixed_query[i + len(order_clauses[0]):]

            for o in order_clauses[1:]:
                i = fixed_query.rfind(o)
                fixed_query = fixed_query[:i] + fixed_query[i + len(o):]

            # some cleanup
            fixed_query = re.sub('random\(\),', 'random()', fixed_query, flags=re.M)
            fixed_query = re.sub('^\s+,', '', fixed_query, flags=re.M)
        # if no order by statement present, add it
        else:
            if limit_found:
                i = fixed_query.lower().rfind('limit')
                fixed_query = fixed_query[:i] + "\norder by random()\n" + fixed_query[i:]
            else:
                fixed_query += '\norder by random()'

        try:
            if method is not None:
                for m in kfpd.synthesis_methods:
                    if method.lower() == m.lower():
                        kfpd.plugin = globals()[m + 'Plugin']()
            df = kfpd.read_sql(fixed_query, db_conn)

            # if any order by clauses were present, re-apply them
            if len(order_clauses) > 0:
                sort_by = []
                asc_flags = []
                orig_columns = df.columns
                df.columns = df.columns.str.lower()

                for o in order_clauses:
                    sub_o = o.split(',')

                    # If you don’t specify the ASC or DESC keyword, SQLite uses ASC or ascending order by default.
                    for s in sub_o:
                        if s.lower().find(' desc') != -1:
                            asc_flags.append(False)
                        else:
                            asc_flags.append(True)
                        sort_by.append(re.sub('\s+asc|\s+desc', '', s, flags=re.IGNORECASE).strip().lower())

                df.sort_values(sort_by, ascending=asc_flags, inplace=True)
                df.columns = orig_columns

            df_html = (
                df.style
                .hide_index()
                .set_table_attributes("class='table table-hover'")
                .set_uuid('_')
                .render()
            )
            # pandas generated html has a lot of stuff we don't want returned
            # chuck it!
            df_html = re.sub(' id="T__row\d+_col\d+"', '', df_html)
            df_html = re.sub(' class="data row\d+ col\d+" ', '', df_html)

            return {
              'message': 'success',
              'query': '{0}'.format(query),
              'executed_query': fixed_query,
              'response': df_html,
              'csv': df.to_csv(index=False)}
        except Exception as e:
            print('web-service.synthesize_data() caught exception', str(e))
            return {
                'message': 'error',
                'query': '{0}'.format(query),
                'response': str(e)}
    else:
        return {
            'message': 'error',
            'query': '{0}'.format(query),
            'response': 'Invalid query provided.'}
Пример #9
0
def ip_bulk_by_category(category: hug.types.text):
    """Retrieve all IP addresses that are in feeds by feed category"""

    category_lower = category.lower()

    return FeedsAlchemy.db_ip_bulk_by_category(category_lower)
Пример #10
0
def maintainers_by_category(category: hug.types.text):
    """Retrieve all maintainers by category"""

    category = category.lower()

    return FeedsAlchemy.db_maintainers_by_category(category)
Пример #11
0
def maintainer_info(maintainer: hug.types.text):
    """Retrieve all available information about the maintainer by its name"""

    maintainer_lower = maintainer.lower()

    return FeedsAlchemy.db_maintainer_info(maintainer_lower)
Пример #12
0
def feed_info(feed_name: hug.types.text):
    """Retrieve all available information about the feed by its name"""

    feed_name_lower = feed_name.lower()

    return FeedsAlchemy.db_feed_info(feed_name_lower)