Пример #1
0
def requestid(request):
    if not has_scorpion():
        return {'error': "Scorpion not installed"}

    from scorpion.util import Status
    status = Status()
    requestid = status.reqid
    status.close()
    print "requestid", requestid
    return {'requestid': requestid}
Пример #2
0
def api_status(request):
    if not has_scorpion():
        return {'error': "Scorpion not installed"}

    from scorpion.util import Status
    rid = int(request.GET.get('requestid'))

    status = Status(rid)
    ret = status.latest_status()
    label_rules = status.get_rules()
    status.close()

    partial_rules = []
    for label, rules in label_rules:
        partial_rules.extend(rules)
    rules_hash = hash(str(partial_rules))

    return {'status': ret, 'results': partial_rules, 'hash': rules_hash}
Пример #3
0
def scorpion_run(db, requestdata, requestid):
    """
  badsel:  { alias: { x:, y:, xalias:, yalias:, } }
  goodsel: { alias: { x:, y:, xalias:, yalias:, } }
  """
    from db import db_type
    context = {}

    try:
        qjson = requestdata.get('query', {})
        dbname = qjson['db']
        tablename = qjson['table']
        parsed, params = create_sql_obj(db, qjson)
        print "parsed SQL"
        print parsed
    except Exception as e:
        traceback.print_exc()
        context["error"] = str(e)
        return context

    try:
        badsel = requestdata.get('badselection', {})
        goodsel = requestdata.get('goodselection', {})
        errtypes = requestdata.get('errtypes', {})
        erreqs = requestdata.get('erreqs', {})
        ignore_attrs = requestdata.get('ignore_cols', [])
        x = qjson['x']
        ys = qjson['ys']

        pdb.set_trace()
        obj = SharedObj(db, dbname=dbname, parsed=parsed, params=params)
        obj.dbname = dbname
        obj.C = 0.2
        obj.ignore_attrs = map(str, ignore_attrs)

        # fix aliases in select
        for nonagg in obj.parsed.select.nonaggs:
            nonagg.alias = x['alias']
        for agg in obj.parsed.select.aggs:
            y = [y for y in ys if y['expr'] == agg.expr][0]
            agg.alias = y['alias']

        xtype = db_type(db, tablename, x['col'])

        errors = []
        for agg in obj.parsed.select.aggregates:
            alias = agg.shortname
            if alias not in badsel:
                continue

            badpts = badsel.get(alias, [])
            badkeys = map(lambda pt: pt['x'], badpts)
            badkeys = extract_agg_vals(badkeys, xtype)
            goodpts = goodsel.get(alias, [])
            goodkeys = map(lambda pt: pt['x'], goodpts)
            goodkeys = extract_agg_vals(goodkeys, xtype)
            errtype = errtypes[alias]
            print "errtype", errtype
            erreq = []
            if errtype == 1:
                erreq = erreqs[alias]
                print "erreq", erreq

            # XXX: for better end-to-end performance
            #      sample the badkeys and goodkeys so there are ~20
            if len(badkeys) > 20:
                idxs = range(len(badkeys))
                newidxs = nprand.choice(idxs, 20, False)
                badkeys = map(badkeys.__getitem__, newidxs)
                if erreq:
                    erreq = map(erreq.__getitem__, newidxs)
            if len(goodkeys) > 20:
                idxs = range(len(goodkeys))
                newidxs = nprand.choice(idxs, 20, False)
                goodkeys = map(goodkeys.__getitem__, newidxs)

            err = AggErr(agg, badkeys, 20, errtype, {'erreq': erreq})
            obj.goodkeys[alias] = goodkeys
            errors.append(err)

        obj.errors = errors

        obj.status = Status(requestid)
        print "status requid = ", requestid

        start = time.time()
        print obj.rules_schema
        parallel_debug(obj,
                       parallel=True,
                       nstds=0,
                       errperc=0.001,
                       epsilon=0.008,
                       msethreshold=0.15,
                       tau=[0.001, 0.05],
                       c=obj.c,
                       complexity_multiplier=4.5,
                       l=0.7,
                       c_range=[0.1, 1.],
                       max_wait=20,
                       use_cache=False,
                       granularity=20,
                       ignore_attrs=obj.ignore_attrs,
                       DEBUG=False)
        cost = time.time() - start
        print "end to end took %.4f" % cost

        obj.update_status('serializing results')
        context['results'] = encode_best_rules(obj)
        context['top_k_results'] = encode_top_k(obj)

        obj.update_status('done!')

    except Exception as e:
        traceback.print_exc()
        context['error'] = str(e)
    finally:
        try:
            obj.status.close()
        except:
            pass

    return context
Пример #4
0
def scorpion(request):
    if not has_scorpion():
        print >> sys.stderr, "Could not load scorpionutil.  Maybe scorpion has not been installed?"
        return {'status': "error: could not load scorpion"}

    try:
        data = json.loads(str(request.GET.get('json')))
        username = request.GET.get('username')
        fake = request.GET.get('fake', False)
        requestid = request.GET.get('requestid')
        if not fake or fake == 'false':

            import core.db.backend.pg as pg
            qjson = data.get('query', {})
            repo = qjson['db']
            qjson['db'] = username
            qjson['table'] = "%s.%s" % (repo, qjson['table'])
            tablename = qjson['table']
            host = pg.host
            port = pg.port
            dburl = "postgresql://%s@%s:%s/%s" % (username, host, port,
                                                  username)
            engine = create_engine(dburl)
            db = engine.connect()
            import scorpionutil

            try:
                results = scorpionutil.scorpion_run(db, data, requestid)
                return results
            except:
                traceback.print_exc()
                return {}
            finally:
                try:
                    db.close()
                except:
                    pass
                try:
                    engine.dispose()
                except:
                    pass

    except:
        traceback.print_exc()
        return {}

    ret = {}
    results = [{
        'score':
        0.2,
        'c_range': [0, 1],
        'count':
        100,
        'clauses': [{
            'col': 'sensor',
            'type': 'str',
            'vals': map(str, [18])
        }],
        'alt_rules': [[{
            'col': 'humidity',
            'type': 'num',
            'vals': [0, 1.4]
        }]]
    }, {
        'score':
        0.2,
        'c_range': [0, 1],
        'count':
        100,
        'clauses': [{
            'col': 'voltage',
            'type': 'num',
            'vals': [0, 2.15]
        }, {
            'col': 'sensor',
            'type': 'str',
            'vals': ['18']
        }],
        'alt_rules': [[{
            'col': 'humidity',
            'type': 'num',
            'vals': [0, 1.4]
        }, {
            'col': 'humidity',
            'type': 'num',
            'vals': [0, 1.4]
        }],
                      [{
                          'col': 'humidity',
                          'type': 'num',
                          'vals': [0, 1.4]
                      }, {
                          'col': 'humidity',
                          'type': 'num',
                          'vals': [0, 1.4]
                      }]]
    }]

    top_k = [{
        'c':
        0,
        'score':
        0.2,
        'c_range': [0, 0],
        'count':
        100,
        'clauses': [{
            'col': 'sensor',
            'type': 'str',
            'vals': map(str, [18])
        }]
    }, {
        'c':
        0,
        'score':
        0.2,
        'c_range': [0, 0],
        'count':
        100,
        'clauses': [{
            'col': 'voltage',
            'type': 'num',
            'vals': [0, 2.15]
        }, {
            'col': 'sensor',
            'type': 'str',
            'vals': ['18']
        }]
    }, {
        'c':
        0.5,
        'score':
        0.2,
        'c_range': [0.5, .5],
        'count':
        100,
        'clauses': [{
            'col': 'sensor',
            'type': 'str',
            'vals': map(str, [18, 15])
        }]
    }, {
        'c':
        0.5,
        'score':
        0.2,
        'c_range': [0.5, .5],
        'count':
        100,
        'clauses': [{
            'col': 'voltage',
            'type': 'num',
            'vals': [-5, 2.5]
        }, {
            'col': 'sensor',
            'type': 'str',
            'vals': ['18', '15']
        }]
    }, {
        'c':
        1.0,
        'score':
        0.2,
        'c_range': [1.0, 1.0],
        'count':
        100,
        'clauses': [{
            'col': 'sensor',
            'type': 'str',
            'vals': map(str, [18, 30, 35])
        }]
    }, {
        'c':
        1.0,
        'score':
        0.2,
        'c_range': [1.0, 1.0],
        'count':
        100,
        'clauses': [{
            'col': 'humidity',
            'type': 'num',
            'vals': [-100, 40]
        }, {
            'col': 'sensor',
            'type': 'str',
            'vals': ['18', '19']
        }]
    }]

    from scorpion.util import Status
    status = Status(requestid)
    status.update_rules('label', results)
    status.close()

    time.sleep(1)

    ret['results'] = results
    ret['top_k_results'] = top_k
    return ret
Пример #5
0
from scorpion.db import *
from scorpion.util import Status

db = connect("status")

status = Status(db)
assert "initialized" == status.latest_status()
for i in xrange(10):
    status.update_status('testing %d' % i)
    assert ("testing %d" % i) == status.latest_status()
status.cleanup()
assert "no status yet" == status.latest_status()