示例#1
0
    def frame(self, data_type=None):
        log_it("LOAD DATA ON DATAWAREHOUSE via {}: {}".format(
            data_type or 'dict', self.slug), "bin-mining")
        if data_type:
            self.df = getattr(pandas, "read_{}".format(data_type))(self.data)
        else:
            self.df = DataFrame(self.data)

        if self.df.empty:
            self.pdict = {}
            log_it('[warning]Empty cube: {}!!'.format(self.cube),
                   "bin-mining")
            return

        try:
            self.df.columns = self.keys
        except AttributeError:
            self._keys(self.df.columns.tolist())

        # If the OML is active, it renders the script that there is
        if conf("oml").get("on") and self.cube.get("oml"):
            from oml import RunTime
            self.df.columns = self.keys
            df = RunTime(conf("oml").get("language", "lua"),
                         self.df.to_dict(orient='records'),
                         self.cube.get("oml"),
                         conf("oml").get("class", {"OML": "oml.base.OMLBase"}))
            self.df = DataFrame(df)
            self._keys(self.df.columns.tolist())

        self.df.head()
        self.pdict = map(fix_render, self.df.to_dict(orient='records'))
示例#2
0
文件: cube.py 项目: yenchih/mining
def process(_cube):
    try:
        log_it("START: {}".format(_cube['slug']), "bin-mining")

        mongo = MongoPlugin(uri=conf("mongodb")["uri"],
                            db=conf("mongodb")["db"],
                            json_mongo=True).get_mongo()

        c = CubeProcess(_cube)
        if _cube.get('type') == 'relational':
            c.load()
            c.frame()
            c.save()
        elif _cube.get('type') == 'cube_join':
            c.environment(_cube.get('type'))
            cube_join = CubeJoin(_cube)
            c._data(cube_join.none())
            c._keys(cube_join.none().columns.values)
            c.frame()
            c.save()

    except Exception, e:
        log_it(e, "bin-mining")
        log_it(traceback.format_exc(), "bin-mining")
        _cube['run'] = False
        mongo['cube'].update({'slug': _cube['slug']}, _cube)
示例#3
0
def process(_cube):
    try:
        log_it("START: {}".format(_cube['slug']), "bin-mining")

        mongo = MongoPlugin(
            uri=conf("mongodb")["uri"],
            db=conf("mongodb")["db"],
            json_mongo=True).get_mongo()

        c = CubeProcess(_cube)
        if _cube.get('type') == 'relational':
            c.load()
            c.frame()
            c.save()
        elif _cube.get('type') == 'cube_join':
            c.environment(_cube.get('type'))
            cube_join = CubeJoin(_cube)
            c._data(cube_join.none())
            c._keys(cube_join.none().columns.values)
            c.frame()
            c.save()

    except Exception, e:
        log_it(e, "bin-mining")
        log_it(traceback.format_exc(), "bin-mining")
        _cube['run'] = False
        mongo['cube'].update({'slug': _cube['slug']}, _cube)
示例#4
0
def process(_cube):
    try:
        log_it("START: {}".format(_cube["slug"]), "bin-mining")

        mongo = MongoPlugin(uri=conf("mongodb")["uri"], db=conf("mongodb")["db"], json_mongo=True).get_mongo()

        c = Cube(_cube)
        if _cube.get("type") == "relational":
            c.load()
            c.frame()
            c.save()
        elif _cube.get("type") == "cube_join":
            c.environment(_cube.get("type"))
            cube_join = CubeJoin(_cube)
            c._data(cube_join.none())
            c._keys(cube_join.none().columns.values)
            c.frame()
            c.save()
        elif _cube.get("type") == "url":
            c._data(requests.get(_cube.get("connection")).text)
            c.frame(data_type=_cube.get("url_type"))
            c.save()

    except Exception, e:
        log_it(e, "bin-mining")
        log_it(traceback.format_exc(), "bin-mining")
        _cube["run"] = False
        mongo["cube"].update({"slug": _cube["slug"]}, _cube)
示例#5
0
def element_cube(mongodb, slug=None):
    MyClient = riak.RiakClient(protocol=conf("riak")["protocol"],
                               http_port=conf("riak")["http_port"],
                               host=conf("riak")["host"])
    MyBucket = MyClient.bucket(conf("riak")["bucket"])
    data = MyBucket.get(u'{}-columns'.format(slug)).data or '{}'
    columns = json.loads(data)
    return {'columns': columns}
示例#6
0
文件: element.py 项目: pengjia/mining
def element_cube(mongodb, slug=None):
    MyClient = riak.RiakClient(
        protocol=conf("riak")["protocol"],
        http_port=conf("riak")["http_port"],
        host=conf("riak")["host"])
    MyBucket = MyClient.bucket(conf("riak")["bucket"])
    data = MyBucket.get(u'{}-columns'.format(slug)).data or '{}'
    columns = json.loads(data)
    return {'columns': columns}
示例#7
0
def scheduler_app():
    mongo = MongoPlugin(
        uri=conf("mongodb")["uri"],
        db=conf("mongodb")["db"],
        json_mongo=True).get_mongo()

    for cube in mongo['cube'].find({'scheduler_status': True}):
        rules(cube)

    for dashboard in mongo['dashboard'].find({'scheduler_status': True}):
        elements = [e['id'] for e in dashboard['element']]
        for e in elements:
            element = mongo['element'].find_one({'slug': e})
            cube = mongo['cube'].find_one({'slug': element['cube']})
            rules(cube, dashboard['scheduler_type'],
                  dashboard['scheduler_interval'])

    while True:
        for cube in mongo['cube'].find({'scheduler_status': True}):
            if cube['slug'] not in register:
                rules(cube)

        for dashboard in mongo['dashboard'].find({'scheduler_status': True}):
            elements = [e['id'] for e in dashboard['element']]
            for e in elements:
                element = mongo['element'].find_one({'slug': e})
                cube = mongo['cube'].find_one({'slug': element['cube']})
                if cube['slug'] not in register:
                    rules(cube, dashboard['scheduler_type'],
                          dashboard['scheduler_interval'],
                          dashboard['slug'])

        for cube in mongo['cube'].find({'scheduler_status': False}):
            if cube['slug'] in register:
                schedule.cancel_job(onrun[cube['slug']])
                del onrun[cube['slug']]
                register.remove(cube['slug'])

        for dashboard in mongo['dashboard'].find({'scheduler_status': False}):
            elements = [e['id'] for e in dashboard['element']]
            for e in elements:
                try:
                    element = mongo['element'].find_one({'slug': e})
                    cube = mongo['cube'].find_one({'slug': element['cube']})
                    jobn = u"{}-{}".format(cube['slug'], dashboard['slug'])
                    if jobn in register:
                        schedule.cancel_job(onrun[jobn])
                        del onrun[jobn]
                        register.remove(jobn)
                except:
                    pass

        schedule.run_pending()
        sleep(1)
示例#8
0
文件: cube.py 项目: yenchih/mining
    def __init__(self, _cube):

        log_it("START: {}".format(_cube['slug']), "bin-mining")

        self.mongo = MongoPlugin(uri=conf("mongodb")["uri"],
                                 db=conf("mongodb")["db"],
                                 json_mongo=True).get_mongo()

        del _cube['_id']
        self.cube = _cube
        self.slug = self.cube['slug']
示例#9
0
    def __init__(self, _cube):

        log_it("START: {}".format(_cube['slug']), "bin-mining")

        self.mongo = MongoPlugin(
            uri=conf("mongodb")["uri"],
            db=conf("mongodb")["db"],
            json_mongo=True).get_mongo()

        del _cube['_id']
        self.cube = _cube
        self.slug = self.cube['slug']
示例#10
0
def scheduler_app():
    mongo = MongoPlugin(uri=conf("mongodb")["uri"],
                        db=conf("mongodb")["db"],
                        json_mongo=True).get_mongo()

    for cube in mongo['cube'].find({'scheduler_status': True}):
        rules(cube)

    for dashboard in mongo['dashboard'].find({'scheduler_status': True}):
        elements = [e['id'] for e in dashboard['element']]
        for e in elements:
            element = mongo['element'].find_one({'slug': e})
            cube = mongo['cube'].find_one({'slug': element['cube']})
            rules(cube, dashboard['scheduler_type'],
                  dashboard['scheduler_interval'])

    while True:
        for cube in mongo['cube'].find({'scheduler_status': True}):
            if cube['slug'] not in register:
                rules(cube)

        for dashboard in mongo['dashboard'].find({'scheduler_status': True}):
            elements = [e['id'] for e in dashboard['element']]
            for e in elements:
                element = mongo['element'].find_one({'slug': e})
                cube = mongo['cube'].find_one({'slug': element['cube']})
                if cube['slug'] not in register:
                    rules(cube, dashboard['scheduler_type'],
                          dashboard['scheduler_interval'], dashboard['slug'])

        for cube in mongo['cube'].find({'scheduler_status': False}):
            if cube['slug'] in register:
                schedule.cancel_job(onrun[cube['slug']])
                del onrun[cube['slug']]
                register.remove(cube['slug'])

        for dashboard in mongo['dashboard'].find({'scheduler_status': False}):
            elements = [e['id'] for e in dashboard['element']]
            for e in elements:
                try:
                    element = mongo['element'].find_one({'slug': e})
                    cube = mongo['cube'].find_one({'slug': element['cube']})
                    jobn = u"{}-{}".format(cube['slug'], dashboard['slug'])
                    if jobn in register:
                        schedule.cancel_job(onrun[jobn])
                        del onrun[jobn]
                        register.remove(jobn)
                except:
                    pass

        schedule.run_pending()
        sleep(1)
示例#11
0
    def __init__(self, cube):
        self.cube = cube
        self.data = DataFrame({})

        MyClient = riak.RiakClient(
            protocol=conf("riak")["protocol"],
            http_port=conf("riak")["http_port"],
            host=conf("riak")["host"])

        self.MyBucket = MyClient.bucket(conf("riak")["bucket"])
        self.MyBucket.enable_search()

        method = getattr(self, cube.get('cube_join_type', 'none'))
        method()
示例#12
0
文件: cube.py 项目: rmoorman/mining
def run(cube_slug=None):
    mongo = MongoPlugin(uri=conf("mongodb")["uri"], db=conf("mongodb")["db"], json_mongo=True).get_mongo()

    pool = ThreadPool(20)

    for cube in mongo["cube"].find():
        slug = cube["slug"]
        if cube_slug and cube_slug != slug:
            continue

        pool.add_task(process, cube)

    pool.wait_completion()
    return True
示例#13
0
文件: cube.py 项目: rmoorman/mining
    def __init__(self, _cube):

        log_it("START: {}".format(_cube["slug"]), "bin-mining")

        self.mongo = MongoPlugin(uri=conf("mongodb")["uri"], db=conf("mongodb")["db"], json_mongo=True).get_mongo()

        MyClient = riak.RiakClient(
            protocol=conf("riak")["protocol"], http_port=conf("riak")["http_port"], host=conf("riak")["host"]
        )

        self.MyBucket = MyClient.bucket(conf("riak")["bucket"])
        self.MyBucket.enable_search()
        del _cube["_id"]
        self.cube = _cube
        self.slug = self.cube["slug"]
示例#14
0
文件: cube.py 项目: yenchih/mining
    def load(self):
        self.cube['run'] = 'run'
        self.mongo['cube'].update({'slug': self.slug}, self.cube)

        self.cube['start_process'] = datetime.now()

        _sql = self.cube['sql']
        if _sql[-1] == ';':
            _sql = _sql[:-1]
        self.sql = u"""SELECT * FROM ({}) AS CUBE;""".format(_sql)

        self.connection = self.mongo['connection'].find_one(
            {'slug': self.cube['connection']})['connection']

        log_it("CONNECT IN RELATION DATA BASE: {}".format(self.slug),
               "bin-mining")
        if 'sqlite' in self.connection:
            e = create_engine(self.connection)
        else:
            e = create_engine(self.connection,
                              **conf('openmining')['sql_conn_params'])
        Session = sessionmaker(bind=e)
        session = Session()

        resoverall = session.execute(text(self.sql))
        self.data = resoverall.fetchall()
        self.keys = resoverall.keys()
示例#15
0
    def load(self):
        self.cube['run'] = 'run'
        self.mongo['cube'].update({'slug': self.slug}, self.cube)

        self.cube['start_process'] = datetime.now()

        _sql = self.cube['sql']
        if _sql[-1] == ';':
            _sql = _sql[:-1]
        self.sql = u"""SELECT * FROM ({}) AS CUBE;""".format(_sql)

        self.connection = self.mongo['connection'].find_one({
            'slug': self.cube['connection']})['connection']

        log_it("CONNECT IN RELATION DATA BASE: {}".format(self.slug),
               "bin-mining")
        if 'sqlite' in self.connection:
            e = create_engine(self.connection)
        else:
            e = create_engine(self.connection,
                              **conf('openmining')['sql_conn_params'])
        Session = sessionmaker(bind=e)
        session = Session()

        resoverall = session.execute(text(self.sql))
        self.data = resoverall.fetchall()
        self.keys = resoverall.keys()
示例#16
0
文件: cube.py 项目: yenchih/mining
def run(cube_slug=None):
    mongo = MongoPlugin(uri=conf("mongodb")["uri"],
                        db=conf("mongodb")["db"],
                        json_mongo=True).get_mongo()

    pool = ThreadPool(20)

    for cube in mongo['cube'].find():
        slug = cube['slug']
        if cube_slug and cube_slug != slug:
            continue

        pool.add_task(process, cube)

    pool.wait_completion()
    return True
示例#17
0
文件: cube.py 项目: rmoorman/mining
    def load(self):
        self.cube["run"] = "run"
        self.mongo["cube"].update({"slug": self.slug}, self.cube)

        self.cube["start_process"] = datetime.now()

        _sql = self.cube["sql"]
        if _sql[-1] == ";":
            _sql = _sql[:-1]
        self.sql = u"""SELECT * FROM ({}) AS CUBE;""".format(_sql)

        self.connection = self.mongo["connection"].find_one({"slug": self.cube["connection"]})["connection"]

        log_it("CONNECT IN RELATION DATA BASE: {}".format(self.slug), "bin-mining")
        e = create_engine(self.connection, **conf("openmining")["sql_conn_params"])
        Session = sessionmaker(bind=e)
        session = Session()

        resoverall = session.execute(text(self.sql))
        self.data = resoverall.fetchall()
        self.keys = resoverall.keys()
示例#18
0
    def __init__(self, _cube):

        log_it("START: {}".format(_cube['slug']), "bin-mining")

        self.mongo = MongoPlugin(uri=conf("mongodb")["uri"],
                                 db=conf("mongodb")["db"],
                                 json_mongo=True).get_mongo()

        MyClient = riak.RiakClient(protocol=conf("riak")["protocol"],
                                   http_port=conf("riak")["http_port"],
                                   host=conf("riak")["host"])

        self.MyBucket = MyClient.bucket(conf("riak")["bucket"])
        self.MyBucket.enable_search()
        del _cube['_id']
        self.cube = _cube
        self.slug = self.cube['slug']
示例#19
0
# -*- coding: utf-8 -*-
from celery import Celery

from mining.utils import conf


celery_app = Celery(
    'mining.tasks',
    broker=conf("celery").get("broker", 'amqp://'),
    backend=conf("celery").get("backend", 'amqp://'),
    include=['mining.tasks'])

celery_app.conf.update(
    CELERY_TASK_RESULT_EXPIRES=conf("celery").get("result_expires", 3600),
    CELERYD_POOL="gevent"
)
示例#20
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bottle.ext import auth

from mining.utils import conf, __from__


auth_engine = __from__(conf('auth')['engine'])
if auth_engine == object:
    print 'Set valid auth engine'
    exit(0)

auth_import = conf('auth')['engine'].split('.')[-1]

callback = u"{}://{}".format(
    conf('openmining')['protocol'],
    conf('openmining')['domain'])
if conf('openmining')['domain_port'] not in ['80', '443']:
    callback = "{}:{}".format(callback, conf('openmining')['domain_port'])

if auth_import == 'Google':
    engine = auth_engine(
        conf('auth')['key'], conf('auth')['secret'], callback)
elif auth_import == 'Facebook':
    #  Not working requered parans
    engine = auth_engine()
elif auth_import == 'Twitter':
    #  Not working requered parans
    engine = auth_engine()
else:
    engine = auth_engine(callback_url=callback, field="username")
示例#21
0
 def __init__(self):
     self.conf = conf('datawarehouse')
示例#22
0
def login():
    return {"get_url": app.wrap_app.get_url, "lang": conf("openmining")["lang"]}
示例#23
0
def data(mongodb, slug, ext='xls'):
    MyClient = riak.RiakClient(protocol=conf("riak")["protocol"],
                               http_port=conf("riak")["http_port"],
                               host=conf("riak")["host"])

    MyBucket = MyClient.bucket(conf("riak")["bucket"])

    element = mongodb['element'].find_one({'slug': slug})

    columns = json.loads(MyBucket.get(
        '{}-columns'.format(element.get('cube'))).data or [])

    fields = columns
    if request.GET.get('fields', None):
        fields = request.GET.get('fields').split(',')

    filters = [i[0] for i in request.GET.iteritems()
               if len(i[0].split('filter__')) > 1]

    df = DataFrame(MyBucket.get(element.get('cube')).data, columns=fields)
    if len(filters) >= 1:
        for f in filters:
            s = f.split('__')
            field = s[1]
            operator = s[2]
            value = request.GET.get(f)
            if operator in ['like', 'regex']:
                df = DataFrameSearchColumn(df, field, value, operator)
            else:
                df = df.query(df_generate(df, value, f))

    groupby = []
    if request.GET.get('groupby', None):
        groupby = request.GET.get('groupby', ).split(',')
    if len(groupby) >= 1:
        df = df.groupby(groupby)

    if request.GET.get('orderby', None):
        orderby = request.GET.get('orderby', [])
        orderby__order = True
        if request.GET.get('orderby__order', 0) != 1:
            orderby__order = False
        df = df.sort(orderby, ascending=orderby__order)

    # CLEAN MEMORY
    del filters, fields, columns
    gc.collect()

    file_name = '{}/assets/exports/openmining-{}.{}'.format(
        PROJECT_PATH, element.get('cube'), ext)
    if ext == 'csv':
        df.to_csv(file_name, sep=";")
        contenttype = 'text/csv'
    else:
        df.to_excel(file_name)
        contenttype = 'application/vnd.ms-excel'

    response.set_header('charset', 'utf-8')
    response.set_header('Content-disposition', 'attachment; '
                        'filename={}.{}'.format(element.get('cube'), ext))
    response.content_type = contenttype

    ifile = open(file_name, "r")
    o = ifile.read()
    ifile.close()
    return o
示例#24
0
# -*- coding: utf-8 -*-
from mining.utils import conf, __from__

DW = __from__(conf('datawarehouse')['engine'])


class DataWarehouse(DW):
    pass
示例#25
0
def login():
    return {'get_url': app.wrap_app.get_url,
            'lang': conf('openmining')['lang']}
示例#26
0
def login():
    return {'get_url': app.wrap_app.get_url,
            'lang': conf('openmining')['lang']}


@click.group()
def cmds():
    pass


@cmds.command()
@click.option('--port', type=int, help=u'Set application server port!')
@click.option('--ip', type=str, help=u'Set application server ip!')
@click.option('--debug', default=False,
              help=u'Set application server debug!')
def runserver(port, ip, debug):

    if debug is None:
        server = WSGIServer((ip, port), app, handler_class=WebSocketHandler)
        server.serve_forever()

    click.echo(u'OpenMining start server at: {}:{}'.format(ip, port))
    run(app=app, host=ip, port=port, debug=debug,
        reloader=True, server=GeventWebSocketServer)


if __name__ == "__main__":
    default_map = {"runserver": conf('openmining')}
    default_map["runserver"]["debug"] = False
    cmds(default_map=default_map)
示例#27
0
from controllers.export import export_app

from mining.utils import conf
from mining.auth import auth
from mining.settings import TEMPLATE_PATH, STATIC_PATH


reload(sys)
sys.setdefaultencoding('utf-8')

parser = argparse.ArgumentParser(description=u'Open Mining!')
subparser = parser.add_subparsers()

arg_runserver = subparser.add_parser('runserver', help=u'Run application')
arg_runserver.add_argument('--port', help=u'Set application server port!',
                           type=int, default=conf('openmining')['port'])
arg_runserver.add_argument('--ip', help=u'Set application server IP!',
                           type=str, default=conf('openmining')['ip'])
arg_runserver.add_argument('--debug', '-v',
                           help=u'Set application server debug!',
                           action='count')

args = parser.parse_args()

T.insert(0, TEMPLATE_PATH)

session_opts = {
    'session.type': 'file',
    'session.data_dir': '/tmp/openmining.data',
    'session.lock_dir': '/tmp/openmining.lock',
    'session.cookie_expires': 50000,
示例#28
0
# -*- coding: utf-8 -*-
from celery import Celery

from mining.utils import conf


celery_app = Celery(
    'mining.tasks',
    broker=conf("celery").get("broker", 'amqp://'),
    backend=conf("celery").get("backend", 'amqp://'),
    include=['mining.tasks'])

celery_app.conf.update(**conf("celery").get("params", {}))
示例#29
0
from controllers.stream import stream_app
from controllers.export import export_app

from mining.utils import conf
from mining.auth import auth
from mining.settings import TEMPLATE_PATH, STATIC_PATH


reload(sys)
sys.setdefaultencoding("utf-8")

parser = argparse.ArgumentParser(description=u"Open Mining!")
subparser = parser.add_subparsers()

arg_runserver = subparser.add_parser("runserver", help=u"Run application")
arg_runserver.add_argument("--port", help=u"Set application server port!", type=int, default=conf("openmining")["port"])
arg_runserver.add_argument("--ip", help=u"Set application server IP!", type=str, default=conf("openmining")["ip"])
arg_runserver.add_argument("--debug", "-v", help=u"Set application server debug!", action="count")

args = parser.parse_args()

T.insert(0, TEMPLATE_PATH)

session_opts = {
    "session.type": "file",
    "session.data_dir": "/tmp/openmining.data",
    "session.lock_dir": "/tmp/openmining.lock",
    "session.cookie_expires": 50000,
    "session.auto": True,
}
示例#30
0
 def __init__(self):
     self.conf = conf('datawarehouse')
示例#31
0
文件: stream.py 项目: yenchih/mining
import gc

from bottle import Bottle, abort, request
from bottle.ext.websocket import websocket
from bottle.ext.mongo import MongoPlugin

from pandas import DataFrame

from mining.utils import conf
from mining.utils._pandas import df_generate, DataFrameSearchColumn
from mining.db.datawarehouse import DataWarehouse


stream_app = Bottle()
mongo = MongoPlugin(
    uri=conf("mongodb")["uri"],
    db=conf("mongodb")["db"],
    json_mongo=True)
stream_app.install(mongo)


@stream_app.route('/data/<slug>', apply=[websocket])
def data(ws, mongodb, slug):
    if not ws:
        abort(400, 'Expected WebSocket request.')

    DW = DataWarehouse()

    element = mongodb['element'].find_one({'slug': slug})

    element['page_limit'] = 50
示例#32
0
# -*- coding: utf-8 -*-
from celery import Celery

from mining.utils import conf

celery_app = Celery('mining.tasks',
                    broker=conf("celery").get("broker", 'amqp://'),
                    backend=conf("celery").get("backend", 'amqp://'),
                    include=['mining.tasks'])

celery_app.conf.update(**conf("celery").get("params", {}))
示例#33
0
文件: manage.py 项目: yenchih/mining
from mining.utils import conf
from mining.auth import auth
from mining.settings import TEMPLATE_PATH, STATIC_PATH

reload(sys)
sys.setdefaultencoding('utf-8')

parser = argparse.ArgumentParser(description=u'Open Mining!')
subparser = parser.add_subparsers()

arg_runserver = subparser.add_parser('runserver', help=u'Run application')
arg_runserver.add_argument('--port',
                           help=u'Set application server port!',
                           type=int,
                           default=conf('openmining')['port'])
arg_runserver.add_argument('--ip',
                           help=u'Set application server IP!',
                           type=str,
                           default=conf('openmining')['ip'])
arg_runserver.add_argument('--debug',
                           '-v',
                           help=u'Set application server debug!',
                           action='count')

args = parser.parse_args()

T.insert(0, TEMPLATE_PATH)

session_opts = {
    'session.type': 'file',
示例#34
0
 def __init__(self):
     self.conf = conf('datawarehouse')
     self.search = False
示例#35
0
def index():
    return {'get_url': app.wrap_app.get_url,
            'protocol': conf('openmining')['protocol'],
            'lang': conf('openmining')['lang']}
示例#36
0
@click.option(
    "--concurrency",
    type=int,
    default=4,
    help="""Number of child processes processing the queue. The
              default is the number of CPUs available on your system.""",
)
def celery(concurrency):
    click.echo(u"OpenMining start tasks")
    args = ["celery", "worker", "--concurrency={}".format(concurrency)]
    celery_app.start(args)


@cmds.command()
def scheduler():
    click.echo(u"OpenMining start scheduler")
    scheduler_app()


@cmds.command()
@click.option("--level", type=int, default=0, help="What level of data volume?")
def build_demo(level):
    click.echo(u"OpenMining load demo system")
    build(level)


if __name__ == "__main__":
    default_map = {"runserver": conf("openmining")}
    default_map["runserver"]["debug"] = False
    cmds(default_map=default_map)
示例#37
0
from bottle import Bottle, request
from bottle.ext.mongo import MongoPlugin

from mining.utils import conf, parse_dumps
from .base import get, post, put, delete, base

from element import collection as collection_element
from cube import collection as collection_cube
from filter import collection as collection_filter
from group import collection as collection_permissions_group

collection = 'dashboard'
collection_group = '{}_groups'.format(collection)

dashboard_app = Bottle()
mongo = MongoPlugin(uri=conf("mongodb")["uri"],
                    db=conf("mongodb")["db"],
                    json_mongo=True)
dashboard_app.install(mongo)


@dashboard_app.route('/', method='GET')
@dashboard_app.route('/<slug>', method='GET')
def dashboard_get(mongodb, slug=None):
    da = get(mongodb, collection, slug)
    if 'full' not in request.GET:
        return da
    response = json.loads(da)
    new_resp = []
    session = dict(request.environ.get('beaker.session'))
示例#38
0
def index():
    return {
        "get_url": app.wrap_app.get_url,
        "protocol": conf("openmining")["protocol"],
        "lang": conf("openmining")["lang"],
    }
示例#39
0
文件: stream.py 项目: rmoorman/mining
def data(ws, mongodb, slug):
    if not ws:
        abort(400, 'Expected WebSocket request.')

    MyClient = riak.RiakClient(protocol=conf("riak")["protocol"],
                               http_port=conf("riak")["http_port"],
                               host=conf("riak")["host"])

    MyBucket = MyClient.bucket(conf("riak")["bucket"])

    element = mongodb['element'].find_one({'slug': slug})

    element['page_limit'] = 50
    if request.GET.get('limit', True) is False:
        element['page_limit'] = 9999999999

    coll = MyBucket.get('{}-columns'.format(element.get('cube'))).data or []
    columns = json.loads(coll)

    fields = columns
    if request.GET.get('fields', None):
        fields = request.GET.get('fields').split(',')

    cube_last_update = mongodb['cube'].find_one({'slug': element.get('cube')})
    ws.send(json.dumps({'type': 'last_update',
                        'data': str(cube_last_update.get('lastupdate', ''))}))

    ws.send(json.dumps({'type': 'columns', 'data': fields}))

    filters = [i[0] for i in request.GET.iteritems()
               if len(i[0].split('filter__')) > 1]

    if element['type'] == 'grid':
        page = int(request.GET.get('page', 1))
        page_start = 0
        page_end = element['page_limit']
        if page >= 2:
            page_end = element['page_limit'] * page
            page_start = page_end - element['page_limit']
    else:
        page_start = None
        page_end = None

    df = DataFrame(MyBucket.get(element.get('cube')).data, columns=fields)
    if len(filters) >= 1:
        for f in filters:
            s = f.split('__')
            field = s[1]
            operator = s[2]
            value = request.GET.get(f)
            if operator == 'like':
                df = df[df[field].str.contains(value)]
            elif operator == 'regex':
                df = DataFrameSearchColumn(df, field, value, operator)
            else:
                df = df.query(df_generate(df, value, f))

    groupby = []
    if request.GET.get('groupby', None):
        groupby = request.GET.get('groupby', ).split(',')
    if len(groupby) >= 1:
        df = DataFrame(df.groupby(groupby).grouper.get_group_levels())

    if request.GET.get('orderby',
                       element.get('orderby', None)) and request.GET.get(
            'orderby', element.get('orderby', None)) in fields:

        orderby = request.GET.get('orderby', element.get('orderby', ''))
        if type(orderby) == str:
            orderby = orderby.split(',')
        orderby__order = request.GET.get('orderby__order',
                                         element.get('orderby__order', ''))
        if type(orderby__order) == str:
            orderby__order = orderby__order.split(',')
        ind = 0
        for orde in orderby__order:
            if orde == '0':
                orderby__order[ind] = False
            else:
                orderby__order[ind] = True
            ind += 1
        df = df.sort(orderby, ascending=orderby__order)

    ws.send(json.dumps({'type': 'max_page', 'data': len(df)}))

    # CLEAN MEMORY
    del filters, fields, columns
    gc.collect()
    categories = []
    for i in df.to_dict(outtype='records')[page_start:page_end]:
        if element.get('categories', None):
            categories.append(i[element.get('categories')])
        ws.send(json.dumps({'type': 'data', 'data': i}))

    # CLEAN MEMORY
    del df
    gc.collect()

    ws.send(json.dumps({'type': 'categories', 'data': categories}))
    ws.send(json.dumps({'type': 'close'}))

    # CLEAN MEMORY
    del categories
    gc.collect()
示例#40
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from mining.utils import conf, __from__


DW = __from__(conf('datawarehouse')['engine'])


class DataWarehouse(DW):
    def __init__(self):
        self.conf = conf('datawarehouse')
示例#41
0
import json
import gc

from bottle import Bottle, request, response
from bottle.ext.mongo import MongoPlugin

from pandas import DataFrame

from mining.settings import PROJECT_PATH
from mining.utils import conf
from mining.utils._pandas import df_generate, DataFrameSearchColumn
from mining.db.datawarehouse import DataWarehouse


export_app = Bottle()
mongo = MongoPlugin(uri=conf("mongodb")["uri"], db=conf("mongodb")["db"], json_mongo=True)
export_app.install(mongo)


@export_app.route("/data/<slug>.<ext>")
def data(mongodb, slug, ext="xls"):
    DW = DataWarehouse()

    element = mongodb["element"].find_one({"slug": slug})

    element["page_limit"] = 50
    if request.GET.get("limit", True) is False:
        element["page_limit"] = 9999999999

    data = DW.get(element.get("cube"))
    columns = data.get("columns") or []
示例#42
0
# -*- coding: utf-8 -*-
from celery import Celery

from mining.utils import conf

celery_app = Celery('mining.tasks',
                    broker=conf("celery").get("broker", 'amqp://'),
                    backend=conf("celery").get("backend", 'amqp://'),
                    include=['mining.tasks'])

celery_app.conf.update(CELERY_TASK_RESULT_EXPIRES=conf("celery").get(
    "result_expires", 3600),
                       CELERYD_POOL="gevent")