예제 #1
0
    def start(self, hidden=False):
        if FIRST_RUN:
            global LANG_MPAA
            langs = ["us", "de", "nl", "au"]
            LANG_MPAA = langs[util.dialogSelect(l("Choose_your_MPAA_system"), langs)]
            util.setting("mpaaLang", LANG_MPAA)
            util.settingBool("firstMpaaRun", False)

        if hidden:
            global HIDE_MPAA
            HIDE_MPAA = True

        movies = util.getMoviesWith('imdbnumber', 'mpaa')
        total = len(movies)

        if total > 0:
            self.startProcess(movies, total)
        else:
            util.dialogOk(l("Info"), l("The_video_library_is_empty_or_the_IMDb_id_doesn't_exist!"))

        return HIDE_MPAA
예제 #2
0
################
# IMDB Update  #
# by Jandalf   #
################

import httplib, socket, json, util

RATING_DIFF = 0.001
ENABLE_DIFF = util.settingBool("enableDiff")
SEPARATOR = util.setting("separator").strip()

class imdbMovie(object):
    
    def __init__(self, imdbID, httphandler):
        self.__rating = ""
        self.__votes = ""
        self.__error = False
        self.__imdbID = imdbID
        
        self.getData(httphandler)

    def getData(self, httphandler):
        try:
            httphandler.request("GET", "/?i=%s" % self.__imdbID)
            response = httphandler.getresponse()
        except (httplib.HTTPException, socket.timeout, socket.gaierror, socket.error):
            self.__error = True
        else:
            if response.status == 200:
                try:
                    data = json.loads(response.read().decode('utf8'))
예제 #3
0
################
# MPAA Update  #
# by semool    #
################

from util import l
from imdbmpaa import imdbMpaa
import util, httplib

HIDE_MPAA       = util.settingBool("hideMpaa")
LANG_MPAA       = util.setting("mpaaLang")
FORM_MPAA       = util.setting("mpaaPrefix")
CHANGED_PREFIX  = util.settingBool("enableMpaaPrefix")
FIRST_RUN       = util.settingBool("firstMpaaRun")

class Mpaa:
    def start(self, hidden=False):
        if FIRST_RUN:
            global LANG_MPAA
            langs = ["us", "de", "nl", "au"]
            LANG_MPAA = langs[util.dialogSelect(l("Choose_your_MPAA_system"), langs)]
            util.setting("mpaaLang", LANG_MPAA)
            util.settingBool("firstMpaaRun", False)

        if hidden:
            global HIDE_MPAA
            HIDE_MPAA = True

        movies = util.getMoviesWith('imdbnumber', 'mpaa')
        total = len(movies)
예제 #4
0
def do_integerizing(
        trace_label,
        control_spec,
        control_totals,
        incidence_table,
        float_weights,
        total_hh_control_col):
    """

    Parameters
    ----------
    trace_label : str
        trace label indicating geography zone being integerized (e.g. PUMA_600)
    control_spec : pandas.Dataframe
        full control spec with columns 'target', 'seed_table', 'importance', ...
    control_totals : pandas.Series
        control totals explicitly specified for this zone
    incidence_table : pandas.Dataframe
    float_weights : pandas.Series
        balanced float weights to integerize
    total_hh_control_col : str
        name of total_hh column (preferentially constrain to match this control)

    Returns
    -------
    integerized_weights : pandas.Series
    status : str
        as defined in integerizer.STATUS_TEXT and STATUS_SUCCESS
    """

    # incidence table should only have control columns
    incidence_table = incidence_table[control_spec.target]

    if total_hh_control_col not in incidence_table.columns:
        raise RuntimeError("total_hh_control column '%s' not found in incidence table"
                           % total_hh_control_col)

    zero_weight_rows = (float_weights == 0)
    if zero_weight_rows.any():
        logger.debug("omitting %s zero weight rows out of %s"
                     % (zero_weight_rows.sum(), len(incidence_table.index)))
        incidence_table = incidence_table[~zero_weight_rows]
        float_weights = float_weights[~zero_weight_rows]

    total_hh_control_value = control_totals[total_hh_control_col]

    status = None
    if setting('INTEGERIZE_WITH_BACKSTOPPED_CONTROLS') \
            and len(control_totals) < len(incidence_table.columns):

        ##########################################
        # - backstopped control_totals
        # Use balanced float weights to establish target values for all control values
        # note: this more frequently results in infeasible solver results
        ##########################################

        relaxed_control_totals = \
            np.round(np.dot(np.asanyarray(float_weights), incidence_table.as_matrix()))
        relaxed_control_totals = \
            pd.Series(relaxed_control_totals, index=incidence_table.columns.values)

        # if the incidence table has only one record, then the final integer weights
        # should be just an array with 1 element equal to the total number of households;
        assert len(incidence_table.index) > 1

        integerizer = Integerizer(
            incidence_table=incidence_table,
            control_importance_weights=control_spec.importance,
            float_weights=float_weights,
            relaxed_control_totals=relaxed_control_totals,
            total_hh_control_value=total_hh_control_value,
            total_hh_control_index=incidence_table.columns.get_loc(total_hh_control_col),
            control_is_hh_based=control_spec['seed_table'] == 'households',
            trace_label='backstopped_%s' % trace_label
        )

        # otherwise, solve for the integer weights using the Mixed Integer Programming solver.
        status = integerizer.integerize()

        logger.debug("Integerizer status for backstopped %s: %s" % (trace_label, status))

    # if we either tried backstopped controls or failed, or never tried at all
    if status not in STATUS_SUCCESS:

        ##########################################
        # - unbackstopped partial control_totals
        # Use balanced weights to establish control totals only for explicitly specified controls
        # note: this usually results in feasible solver results, except for some single hh zones
        ##########################################

        balanced_control_cols = control_totals.index
        incidence_table = incidence_table[balanced_control_cols]
        control_spec = control_spec[control_spec.target.isin(balanced_control_cols)]

        relaxed_control_totals = \
            np.round(np.dot(np.asanyarray(float_weights), incidence_table.as_matrix()))
        relaxed_control_totals = \
            pd.Series(relaxed_control_totals, index=incidence_table.columns.values)

        integerizer = Integerizer(
            incidence_table=incidence_table,
            control_importance_weights=control_spec.importance,
            float_weights=float_weights,
            relaxed_control_totals=relaxed_control_totals,
            total_hh_control_value=total_hh_control_value,
            total_hh_control_index=incidence_table.columns.get_loc(total_hh_control_col),
            control_is_hh_based=control_spec['seed_table'] == 'households',
            trace_label=trace_label
        )

        status = integerizer.integerize()

        logger.debug("Integerizer status for unbackstopped %s: %s" % (trace_label, status))

    if status not in STATUS_SUCCESS:
        logger.error("Integerizer failed for %s status %s. "
                     "Returning smart-rounded original weights" % (trace_label, status))
    elif status != 'OPTIMAL':
        logger.warn("Integerizer status non-optimal for %s status %s." % (trace_label, status))

    integerized_weights = pd.Series(0, index=zero_weight_rows.index)
    integerized_weights.update(integerizer.weights['integerized_weight'])
    return integerized_weights, status
seed = 0
device = "cuda:0"
epochs = 1
n_labels = len(AUX_COLUMNS) + 1
max_len = 300
batch_size = 16
base_lr = 2e-5
gammas = [0.75, 0.5, 0.25]
accumulation_steps = 8
# train_size = 1200000
valid_size = 100000
exp = "exp5"
seed_torch(seed)
setup_logger(out_file=LOGGER_PATH)
mkdir(WORK_DIR)
setting(BERT_MODEL_PATH, WORK_DIR)


@contextmanager
def timer(name):
    t0 = time.time()
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s')


def convert_to_bool(df, col_name):
    df[col_name] = np.where(df[col_name] >= 0.5, True, False)


def convert_dataframe_to_bool(df):
    bool_df = df.copy()
예제 #6
0
파일: lp.py 프로젝트: jfdman/populationsim
def use_cvxpy():

    return setting('USE_CVXPY', False)
예제 #7
0
def use_simul_integerizer():

    # use_simul_integerizer it if we can it unless told not to
    return setting('USE_SIMUL_INTEGERIZER', True)
예제 #8
0
################
# MPAA Update  #
# by semool    #
################

from util import l
from imdbmpaa import imdbMpaa
import util, httplib

HIDE_MPAA = util.settingBool("hideMpaa")
LANG_MPAA = util.setting("mpaaLang")
FORM_MPAA = util.setting("mpaaPrefix")
CHANGED_PREFIX = util.settingBool("enableMpaaPrefix")
FIRST_RUN = util.settingBool("firstMpaaRun")


class Mpaa:
    def start(self, hidden=False):
        if FIRST_RUN:
            global LANG_MPAA
            langs = ["us", "de", "nl", "au"]
            LANG_MPAA = langs[util.dialogSelect(l("Choose_your_MPAA_system"),
                                                langs)]
            util.setting("mpaaLang", LANG_MPAA)
            util.settingBool("firstMpaaRun", False)

        if hidden:
            global HIDE_MPAA
            HIDE_MPAA = True

        movies = util.getMoviesWith('imdbnumber', 'mpaa')