示例#1
0
def main():
    from dpark import DparkContext, optParser
    #optParser.set_default('master', 'mesos')
    optParser.add_option('-e', '--query', type='string', default='',
            help='execute the SQL qeury then exit')
    
    options, args = optParser.parse_args()

    load_history()
    
    if options.query:
        execute(options.query)
        sys.exit(0)

    shell()
示例#2
0
# -*- coding: utf-8 -*-

import uuid
import inspect
from dpark import DparkContext, optParser
from sql.parser import parse
from collections import OrderedDict
from models import Model

optParser.add_option("-s")   # "option used for py.test"
optParser.add_option("-x")


class Table(object):

    dialect = "excel"
    columns = ()

    def __init__(self, name, paths=None, columns=None, query=None):
        self.name = name
        self.columns = OrderedDict(columns or self.__class__.columns)
        self.paths = paths or []
        self.query = query

    def index(self, field):
        return self.columns.keys().index(field)

    def rdd(self, dpark=None):
        if self.query:
            return self.query.rdd
示例#3
0
			f.write('\n'.join(comb_ad_context))
		dtest_sample_temp = xgb.DMatrix(temp_dir + '.libsvm')
		ypred.append(' '.join(dp.parallelize(bst.predict(dtest_sample_temp)).map(lambda x:str(x)).collect()))
	with open(mix_dir + '.txt', 'w') as f:
		f.write('\n'.join(ypred))
	check_call('rm -rf %s' % temp_dir + '.libsvm', shell=True)
	## plot histogram
	#plt.hist(ypred_sample_temp,10)
	#plot_path = '/home2/songsiyu/data/models_%s/%s' % (options.feature_domain, model_date_str) + '/plots'
	#if not os.path.exists(plot_path):
	#	check_call('mkdir %s' % plot_path, shell=True)
	#plt.savefig(plot_path + '/ad%d' % curr_ad)


if __name__ == '__main__':
    optParser.add_option('--model_version', dest='model_version')    ##input=yesterday
    optParser.add_option('--feature_domain', dest='feature_domain')
    options, _ = optParser.parse_args()
    dp = DparkContext()

    if not options.model_version:
        model_date_str = (datetime.today() - timedelta(1)).strftime('%Y%m%d')   ##yestoday
    else:
        model_date_str = options.model_version

    # train
    logger.info('mixing %s' % options.feature_domain)
    _mix(dp, options.feature_domain, model_date_str)
    
    logger.info('mix.py done!')
示例#4
0
文件: dquery.py 项目: eclipselu/dpark
                    if s.lower().startswith(c):
                        arg = s[len(c):].strip()
                        getattr(self,c)(arg)
                        self.sql = ''
                        continue

                if not self.sql.rstrip().endswith(';'):
                    continue

                self.run_sql()
            except Exception, e:
                import traceback; traceback.print_exc()
            self.sql = ''

if __name__ == '__main__':
    from dpark import optParser
    optParser.set_default('master', 'flet6')
    optParser.add_option('-e', '--query', type='string', default='',
            help='execute the SQL qeury then exit')
    optParser.add_option('-s', '--script', type='string', default='',
            help='execute the SQL script file then exit')
    options, args = optParser.parse_args()
    console = Console()
    if options.query:
        console.run_script(options.query)
    elif options.script:
        with open(options.script) as f:
            console.run_script(f.read())
    else:
        console.run()
示例#5
0
文件: norm.py 项目: xunzhang/xz_utils
  f.seek(0)

  f2 = open(outfile, 'w')
  for line in f:
    l = line.strip('\n').split(sp)
    f2.write(l[0])
    f2.write(sp)
    l = l[1:]
    for i in xrange(len(minlst)):
      if maxlst[i] == minlst[i]:
        stf = maxlst[i]
        f2.write(str(stf))
	if i != len(minlst) - 1:
          f2.write(sp)
      else:
        stf = (float(l[i]) - minlst[i]) / (maxlst[i] - minlst[i])
        f2.write(str(stf))
	if i != len(minlst) - 1:
          f2.write(sp)
    f2.write('\n')
    
if __name__ == '__main__':
  from dpark import optParser
  optParser.add_option('--in', dest='inputf')
  optParser.add_option('--out', dest='outputf')
  options, args = optParser.parse_args()
  inf = options.inputf
  outf = options.outputf
  norm(inf, outf)
示例#6
0
import os, sys
from subprocess import check_call, call
from datetime import timedelta, datetime
from dpark import DparkContext, optParser
import random
import math

from config import PLAN_SETTINGS, logger
import ctr_rdds
from common.util import GeneralMap, is_spider
from db_tools import DBTools
sys.path.append('%s/user_profile' % os.path.dirname(os.path.realpath(__file__)))

optParser.add_option("--date", dest="date")
optParser.add_option("--collection", dest="collection")
options, _ = optParser.parse_args()
dp = DparkContext()


def feature_gen(current_date, collection):
    db_tools = DBTools()
    if collection == 'CPC':
        AD_TO_TAGS = dp.broadcast(db_tools.get_ad_to_tags())
    elif collection == 'market':
        AD_TO_TAGS = dp.broadcast(db_tools.get_market_ad_to_features())
    else:
        raise Exception
    AD_TO_ITEM = dp.broadcast(db_tools.get_ad_to_item())      ## extract from db
    AD_TO_ORDER = dp.broadcast(db_tools.get_ad_to_order())
    AD_TO_ACCOUNT = dp.broadcast(db_tools.get_ad_to_account())
    AD_TO_UNIT = dp.broadcast(db_tools.get_ad_to_units())
示例#7
0
    f.seek(0)

    f2 = open(outfile, 'w')
    for line in f:
        l = line.strip('\n').split(sp)
        f2.write(l[0])
        f2.write(sp)
        l = l[1:]
        for i in xrange(len(minlst)):
            if maxlst[i] == minlst[i]:
                stf = maxlst[i]
                f2.write(str(stf))
                if i != len(minlst) - 1:
                    f2.write(sp)
            else:
                stf = (float(l[i]) - minlst[i]) / (maxlst[i] - minlst[i])
                f2.write(str(stf))
                if i != len(minlst) - 1:
                    f2.write(sp)
        f2.write('\n')


if __name__ == '__main__':
    from dpark import optParser
    optParser.add_option('--in', dest='inputf')
    optParser.add_option('--out', dest='outputf')
    options, args = optParser.parse_args()
    inf = options.inputf
    outf = options.outputf
    norm(inf, outf)