Exemplo n.º 1
0
    def run(self):
        currval = self.initval
        amberoptcmd = [self.switch, str(currval)]
        MetaUtils.runtool(self.cmd + amberoptcmd)
        currfit = MetaUtils.fitness("%s_%s/log" % (self.logd, currval))
        self.k_values.append((currval,currfit))

        while True:
            sys.stderr.write("\n==> current value: %s, fitness: %s\n" \
                             % (str(currval),str(currfit)))
            neighval = self.neighbour(currval)
            sys.stderr.write("neighval: %s" % (str(neighval)))
            amberoptcmd = [self.switch, str(neighval)]
            MetaUtils.runtool(self.cmd + amberoptcmd)
            neighfit = MetaUtils.fitness("%s_%s/log" % (self.logd, neighval))
            sys.stderr.write("new fitness: %s " % str(neighfit))
            self.k_values.append((neighval,neighfit))

            fitvals = [f for _,f in self.k_values]
            if MetaUtils.localmax(fitvals, 3):
                MetaUtils.report(self.k_values)
                sys.exit(0)

            currval = neighval
            currfit = neighfit
Exemplo n.º 2
0
    def neighbour(self, v):
        fits = [f for _,f in self.k_values]
        if self.switch == "-n":
            if MetaUtils.move_by_step1(fits, STDDEV_CNT, STDDEV):
                return int(math.ceil(v * EX_STEP1))

            return int(math.ceil(v * EX_STEP2))

        if MetaUtils.move_by_step1(fits, STDDEV_CNT, STDDEV):
            return v + LEN_STEP1

        return v + LEN_STEP2
Exemplo n.º 3
0
    def __init__(self, cmd, logd, depth):
        self.cmd = cmd
        self.logd = logd
        self.k_values = []

        currd = depth
        fitw, currf = self.sinbad_seq(currd, WGTS)
        self.k_values.append((currd, fitw, currf))

        while True:
            msg = "\n==> current depth: %s, fitness: %s\n" % (currd, currf)
            sys.stderr.write(msg)
            neighd = self.neighbourd(self.k_values, currd)
            fitw, neighf = self.sinbad_seq(neighd, WGTS)
            self.k_values.append((neighd, fitw, neighf))

            print "------"
            if MetaUtils.localmax([f for _,_,f in self.k_values]):
                fitd, maxf = self.best(self.k_values)
                print "Max ambiguities found: %s" % maxf
                print "Best depth, weights: "
                for d,w in fitdw:
                        print "%s : %s " % (d,w)

                sys.exit(0)

            currd, currf = neighd, neighf
Exemplo n.º 4
0
    def get_booker_cc(self):
        """function to get the booker country for an account

        Returns:
            a panda dataframe with booker country
        """
        # get the campaign id, campaign name correspondence
        campaign_query = """
        SELECT id AS campaign_id,
               name AS campaign_name
          FROM mrktctl.account_1Bid_Campaign
        """
        msql = mu.getConnSql()
        msql.execute("USE mrktctl")
        campaigns = pd.read_sql_query(campaign_query, con=msql)
        msql.close()

        # get the pos, campaign id correspondence because in account_1, campaign could be
        # one pos or multiple pos
        cid = spark.table("spmeta.account_1_cc_campaign").withColumnRenamed(
            "cc", "pos")
        pd_cid = cid.toPandas()
        pd_cid.loc[pd_cid.pos == "TR", "campaign_id"] = 105
        booker_cc_df = campaigns.merge(pd_cid)

        return booker_cc_df
Exemplo n.º 5
0
    def get_id_pos(self):
        """get campaign id, campaign correspondence from mysql database and join with the filtered campaign lists

        returns: spark dataframe
        """
        # get the campaign id and campaign name correspondence from mysql
        msql = mu.getConnSql()
        msql.execute('USE mrktctl')
        qs = """
        SELECT id campaign_id,
               name pos
          FROM account_1Bid_Campaign"""
        account_1_campaign = pd.read_sql_query(qs, con=msql)
        msql.close()
        account_1_campaign = spark.createDataFrame(account_1_campaign)

        # filter for the campaign list
        if self.pos == ['All']:
            return account_1_campaign
        else:
            filtered_pos = spark.createDataFrame(
                pd.DataFrame(data=self.pos, columns=["pos"]))
            account_1_campaign = account_1_campaign.join(filtered_pos,
                                                         on="pos",
                                                         how="inner")
            return account_1_campaign
Exemplo n.º 6
0
    def sinbad(self, d, wgt):
        w = wgt
        d_cmd = ["-d", str(d)]
        w_cmd = ["-w", str(w)]
        MetaUtils.runtool(self.cmd + d_cmd + w_cmd)

        logp = "%s_-d_%s_-w_%s/log" % (self.logd, d, w)
        f = MetaUtils.fitness(logp)
        w_values = [(w,f)]

        while True:
            sys.stderr.write("\n(d, w, f) => (%s, %s, %s)\n" % \
                            (str(d), str(w), str(f)))
            neighw = self.neighbourw(w_values, w)
            w_cmd = ["-w", str(neighw)]
            MetaUtils.runtool(self.cmd + d_cmd + w_cmd)
            logp = "%s_-d_%s_-w_%s/log" % (self.logd, d, neighw)
            neighf = MetaUtils.fitness(logp)
            w_values.append((neighw,neighf))
            fits = [f for _,f in w_values]

            if MetaUtils.localmax(fits):
                for k,f in w_values:
                    print "(%s,%s)" % (k,f)

                fitw,maxf = self.best_w(w_values)
                msg = "\n%s ambiguities found" % maxf
                msg += "\nd=%s, w=%s\n" % (d, fitw)
                sys.stderr.write(msg)

                return fitw, maxf

            w, f = neighw, neighf
Exemplo n.º 7
0
def main():
    """
    Main function for when this module is called as a program.
    """
    ret_status = 0
    clopts, inp_name, targ_prog = get_command_line_data()

    #if not targ_prog in namer_constants.PROCESSABLE_PROGRAMS:
    if not targ_prog in PROCESSABLE_PROGRAMS:
        err_msg = 'Error!  The target program, "{0}", is not known.'.\
                  format(targ_prog)
        sys.exit(err_msg)
    if os.path.exists(inp_name):
        try:
            file_typer = get_obpg_file_type.ObpgFileTyper(inp_name)
            ftype, sensor = file_typer.get_file_type()
            if ftype == 'unknown':
                if MetaUtils.is_ascii_file(inp_name):
                    # Try treating the input file as a file list file.
                    data_files_info = get_data_files_info(inp_name)
                    if len(data_files_info) > 0:
                        next_level_name = get_multifile_next_level_name(
                            data_files_info, targ_prog, clopts)
                    else:
                        err_msg = "Error!  No OBPG files found in {0}".\
                                  format(inp_name)
                        sys.exit(err_msg)
                else:
                    # The input file wasn't a file list file.
                    err_msg = "File {0} is not an OBPG file.".format(inp_name)
                    sys.exit(err_msg)
            else:
                # The file is an OBPG file
                stime, etime = file_typer.get_file_times()
                file_metadata = file_typer.attributes
                data_file = obpg_data_file.ObpgDataFile(
                    inp_name, ftype, sensor, stime, etime, file_metadata)
                next_level_name = get_1_file_name(data_file, targ_prog, clopts)
            print('Output Name: ' + next_level_name)
        except SystemExit as sys_ex:
            # The intention here is to catch exit exceptions we throw in other
            # parts of the program and continue with the exit, outputting
            # whatever error message was created for the exit.
            sys.exit(sys_ex)
        except:
            handle_unexpected_exception(sys.exc_info())
    else:
        err_msg = "Error!  File {0} was not found.".format(inp_name)
        sys.exit(err_msg)
    return ret_status
Exemplo n.º 8
0
    def get_cancellations(self):
        """get cancellation data from mysql at desired aggregated dimension and filter for the selected point of
        sales.

        Returns: spark dataframe with cancelled commission, cancelled bookings and cancelled roomnights at desired
                 aggregated dimension
        """
        msql = mu.getConnSql()
        msql.execute('USE ppc_sp')
        cancellation_query = """
            SELECT r.date_cancelled yyyy_mm_dd
                 , a.distribution pos
                 , CAST(coalesce(r.dest_id, r.hotel_id) AS INT) hotel_id
                 , CAST(sum(1) AS INT) cancellations
                 , sum(commission_amount_euro) cancelled_commission
                 , CAST(sum(roomnights) AS INT) cancelled_roomnights
             FROM {reservation_table} r force index (cancel)
             JOIN {affiliate_table} a on (a.affiliate_id = r.affiliate_id)
            WHERE r.date_cancelled >= '{start_date}'
              AND r.date_cancelled < '{end_date}'
              AND r.status not in ('fraudulent','test','unknown')
              AND r.partner_id = {account_4_partner_id}
            GROUP BY yyyy_mm_dd, pos, coalesce(r.dest_id, r.hotel_id)
            """.format(reservation_table=self.reservation_table,
                       affiliate_table=self.affiliate_table,
                       start_date=self.start_date,
                       end_date=self.end_date,
                       account_4_partner_id=self.partner_id)

        cancellations = pd.read_sql_query(cancellation_query, con=msql)
        msql.close()
        cancellations = spark.createDataFrame(cancellations)

        cancellations_agg = cancellations.groupBy(*self.agg_on)\
                            .agg(f.sum("cancelled_commission").alias("cancelled_commission")
                                ,f.sum("cancelled_roomnights").alias("cancelled_roomnights")
                                ,f.sum("cancellations").alias("cancellations"))

        if self.pos == ['All']:
            return cancellations_agg
        else:
            filtered_pos = spark.createDataFrame(
                pd.DataFrame(data=self.pos, columns=["pos"]))
            cancellations_agg = cancellations_agg.join(filtered_pos,
                                                       on="pos",
                                                       how="inner")
            return cancellations_agg
Exemplo n.º 9
0
    def run(self):
        currlen = self.length
        ambidextoptcmd = ["-k",str(currlen)]
        MetaUtils.runtool(self.cmd + ambidextoptcmd)
        currfit = MetaUtils.fitness("%s_%s/log" % (self.logd, currlen))
        self.k_values.append((currlen,currfit))

        while True:
            sys.stderr.write("\n==> current length: %s, fitness: %s\n" \
                            % (str(currlen),str(currfit)))
            neighlen = self.neighbour(currlen)
            ambidextoptcmd = ["-k",str(neighlen)]
            MetaUtils.runtool(self.cmd + ambidextoptcmd)
            neighfit = MetaUtils.fitness("%s_%s/log" % (self.logd, neighlen))
            sys.stderr.write("new fitness: %s " % str(neighfit))
            self.k_values.append((neighlen,neighfit))

            fitvals = [f for _,f in self.k_values]
            if MetaUtils.localmax(fitvals, NBEST):
                MetaUtils.report(self.k_values)
                sys.exit(0)

            currlen = neighlen
            currfit = neighfit
Exemplo n.º 10
0
    def __init__(self, cmd, logd, depth):
        self.cmd = cmd
        self.logd = logd
        self.k_values = []

        currd = depth
        currf = self.sinbad(currd)
        self.k_values.append((currd, currf))

        while True:
            msg = "\n==> current depth: %s, fitness: %s\n" % (currd, currf)
            sys.stderr.write(msg)
            neighd = self.neighbour(self.k_values, currd)
            neighf = self.sinbad(neighd)
            self.k_values.append((neighd, neighf))

            print "------"
            if MetaUtils.localmax([f for _,f in self.k_values], NBEST):
                fitd, maxf = self.best(self.k_values)
                print "Max ambiguities found: %s" % maxf
                print "Best depths: " , fitd
                sys.exit(0)

            currd, currf = neighd, neighf
Exemplo n.º 11
0
    def neighbourw(self, key_fits, w):
        fits = [f for _,f in key_fits]
        if MetaUtils.move_by_step1(fits, NNEIGH, STDDEV):
            return (w * WGT_STEP1)

        return (w * WGT_STEP2)
Exemplo n.º 12
0
    def neighbourd(self, key_fits, d):
        fits = [f for _,_,f in key_fits]
        if MetaUtils.move_by_step1(fits, NNEIGH, STDDEV):
            return d + D_STEP1

        return d + D_STEP2
Exemplo n.º 13
0
 def sinbad(self, d):
     MetaUtils.runtool(self.cmd + ["-d", str(d)])
     f = MetaUtils.fitness("%s_-d_%s/log" % (self.logd, d))
     return f
Exemplo n.º 14
0
    def neighbour(self, l):
        fits = [f for _,f in self.k_values]
        if MetaUtils.move_by_step1(fits, NNEIGH, STDDEV):
            return l + LEN_STEP1

        return l + LEN_STEP2