def insert_parasites(raw_data, to_taxon_id):
    """Insert parasites."""
    log(f'Inserting {DATASET_ID} parasites')

    parasite_records = []
    for _, row in raw_data.iterrows():

        parasite_records.append(parasite_record(
            row, to_taxon_id['lice'], 'lice_total',
            ['Ecomorph Notes', 'lice ages']))

        parasite_records.append(parasite_record(
            row, to_taxon_id['feather_mites'], 'feather_mites_total'))

        parasite_records.append(parasite_record(
            row, to_taxon_id['ticks'], 'ticks_total'))

        parasite_records.append(parasite_record(
            row, to_taxon_id['flies'], 'flies_total'))

        parasite_records.append(
            parasite_record(row, to_taxon_id['fleas'], 'fleas_total'))

        parasite_records.append(
            parasite_record(row, to_taxon_id['others'], 'others'))

    parasites = pd.DataFrame(parasite_records)
    parasites['parasite_id'] = db.get_ids(parasites, 'parasites')

    parasites.loc[:, db.PARASITE_COLUMNS].to_sql(
        'parasites', db.connect(), if_exists='append', index=False)
示例#2
0
def transform_datetime(df: pd.DataFrame, config: Config):
    date_parts = ["year", "weekday", "month", "day", "hour"]

    if "date_columns" not in config:
        config["date_columns"] = {}

        for c in [c for c in df if c.startswith("datetime_")]:
            config["date_columns"][c] = []
            for part in date_parts:
                part_col = c + "_" + part
                df[part_col] = getattr(df[c].dt, part).astype(np.uint16 if part == "year" else np.uint8).values

                if not (df[part_col] != df[part_col].iloc[0]).any():
                    log(part_col + " is constant")
                    df.drop(part_col, axis=1, inplace=True)
                else:
                    config["date_columns"][c].append(part)

            df.drop(c, axis=1, inplace=True)
    else:
        for c, parts in config["date_columns"].items():
            for part in parts:
                part_col = c + "_" + part
                df[part_col] = getattr(df[c].dt, part)
            df.drop(c, axis=1, inplace=True)
示例#3
0
def delete_dataset(dataset_id):
    """Clear dataset from the database."""
    log(f'Deleting old {dataset_id} records')

    cxn = connect()

    cxn.execute('DELETE FROM datasets WHERE dataset_id = ?', (dataset_id, ))

    sql = """DELETE FROM sites
              WHERE dataset_id NOT IN (SELECT dataset_id FROM datasets)"""
    cxn.execute(sql)

    sql = """DELETE FROM hosts
              WHERE site_id NOT IN (SELECT site_id FROM sites)"""
    cxn.execute(sql)

    sql = """DELETE FROM samples
              WHERE host_id NOT IN (SELECT host_id FROM hosts)"""
    cxn.execute(sql)

    sql = """DELETE FROM parasite_groups
              WHERE sample_id NOT IN (SELECT sample_id FROM samples)"""
    cxn.execute(sql)

    sql = """DELETE FROM parasites
              WHERE parasite_group_id
                NOT IN (SELECT parasite_group_id FROM parasite_groups)"""
    cxn.execute(sql)

    cxn.commit()
示例#4
0
文件: model.py 项目: vykhand/sdsj2018
def hyperopt_lightgbm(X: pd.DataFrame, y: pd.Series, params: Dict, config: Config):
    X_train, X_val, y_train, y_val = data_split(X, y, test_size=0.5)
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_val, label=y_val)

    space = {
        "learning_rate": hp.uniform("learning_rate", 0.01, 0.05),
        "max_depth": hp.choice("max_depth", [-1, 4,  6, 10, 16]),
        "num_leaves": hp.choice("num_leaves", np.linspace(10, 200, 50, dtype=int)),
        "feature_fraction": hp.quniform("feature_fraction", 0.5, 1.0, 0.1),
        "bagging_fraction": hp.quniform("bagging_fraction", 0.5, 1.0, 0.1),
        "bagging_freq": hp.choice("bagging_freq", np.linspace(0, 50, 10, dtype=int)),
        "reg_alpha": hp.uniform("reg_alpha", 0, 30),
        "reg_lambda": hp.uniform("reg_lambda", 0, 30),
        "min_child_weight": hp.uniform('min_child_weight', 0.5, 50),
    }

    def objective(hyperparams):
        model = lgb.train({**params, **hyperparams}, train_data, 300, valid_data,
                          early_stopping_rounds=100, verbose_eval=100)

        score = model.best_score["valid_0"][params["metric"]]
        if config.is_classification():
            score = -score

        return {'loss': score, 'status': STATUS_OK}

    trials = Trials()
    best = hyperopt.fmin(fn=objective, space=space, trials=trials, algo=tpe.suggest, max_evals=50, verbose=1,
                         rstate=np.random.RandomState(1))

    hyperparams = space_eval(space, best)
    log("{:0.4f} {}".format(trials.best_trial['result']['loss'], hyperparams))
    return hyperparams
    def _get_raw(cls, resource, part, max_results=None, **kwargs):
        if max_results is not None and max_results < cls._max_results_per_request:
            max_results_per_request = max_results
        else:
            max_results_per_request = cls._max_results_per_request

        results = []
        request = resource.list(part=part,
                                maxResults=max_results_per_request,
                                **kwargs)

        while request and (max_results is None or len(results) < max_results):
            util.log('Requesting {} ...', request.uri)

            try:
                response = request.execute()
            except googleapiclient.http.HttpError as e:
                # The HttpError class is currently broken and does not decode the received data before parsing it.
                if isinstance(e.content, bytes):
                    e.content = e.content.decode()

                raise

            results.extend(map(_Item.wrap_json, response.get('items', [])))

            request = resource.list_next(request, response)

        return results[:max_results]
示例#6
0
def rename_id_columns(df: pd.DataFrame, config: Config):
    if "id_columns" not in config:
        config["id_columns"] = dict([(c, 'string_' + c) for c in df
                                     if c.startswith("id_")])
        log("Id columns: " + ", ".join(config["id_columns"]), config.verbose)
    if len(config["id_columns"]) > 0:
        df.rename(columns=config["id_columns"], inplace=True)
示例#7
0
def drop_constant_columns(df: pd.DataFrame, config: Config):
    if "constant_columns" not in config:
        config["constant_columns"] = [c for c in df if c.startswith("number_") and not (df[c] != df[c].iloc[0]).any()]
        log("Constant columns: " + ", ".join(config["constant_columns"]))

    if len(config["constant_columns"]) > 0:
        df.drop(config["constant_columns"], axis=1, inplace=True)
示例#8
0
def backup_database():
    """Backup the SQLite3 database."""
    log('Backing up SQLite3 database')
    now = datetime.now()
    backup = f'{DB_FILE[:-3]}_{now.strftime("%Y-%m-%d")}.db'
    cmd = f'cp {DB_FILE} {backup}'
    subprocess.check_call(cmd, shell=True)
	def _get_raw(cls, resource, part, max_results = None, **kwargs):
		if max_results is not None and max_results < cls._max_results_per_request:
			max_results_per_request = max_results
		else:
			max_results_per_request = cls._max_results_per_request
		
		results = []
		request = resource.list(part = part, maxResults = max_results_per_request, **kwargs)
		
		while request and (max_results is None or len(results) < max_results):
			util.log('Requesting {} ...', request.uri)
			
			try:
				response = request.execute()
			except googleapiclient.http.HttpError as e:
				# The HttpError class is currently broken and does not decode the received data before parsing it. 
				if isinstance(e.content, bytes):
					e.content = e.content.decode()
				
				raise
			
			results.extend(map(_Item.wrap_json, response.get('items', [])))
			
			request = resource.list_next(request, response)
		
		return results[:max_results]
示例#10
0
def validate(preds: pd.DataFrame, target_csv: str, mode: str, verbose: int=0) -> np.float64:
    # .rename(columns={'prediction':'target'})
    df = pd.merge(preds, pd.read_csv(target_csv), on="line_id", left_index=True)
    score = roc_auc_score(df.target.values, df.prediction.values) if mode == "classification" else \
        np.sqrt(mean_squared_error(df.target.values, df.prediction.values))
    log("Score: {:0.4f}".format(score), verbose)
    return score
示例#11
0
def preprocess_pipeline(df: pd.DataFrame, config: Config):

    drop_columns(df, config)

    date_cols = list(df.filter(like='datetime_'))
    str_cols = list(df.filter(like='string_'))
    num_cols = list(df.filter(like='number_'))
    id_cols = list(df.filter(like='id_'))

    for c in id_cols + num_cols:
        if str(df[c].dtype) == 'object':
            log(f'column {c} is object (expected numerical type), casted as category'
                )
            df[c] = df[c].astype('category').cat.as_ordered().cat.codes

    df = add_is_na_cols(df, config)
    df = fillna(df, config)
    df = downcast(df, config)

    non_negative_target_detect(df, config)

    if len(date_cols) != 0:
        df = process_datetime(df, date_cols, config)

    if len(str_cols) != 0:
        df = process_strings(df, str_cols, config)
        df = mean_encode_kf(df, str_cols, 5, config)

    return df
示例#12
0
 def fetchData(self, force=False):
     self.income.fetchData(force)
     self.balance.fetchData(force)
     self.cashflow.fetchData(force)
     self.keyRatio.fetchData(force)
     self.quote.fetchData(force)
     log('Fetch all data finish!')
示例#13
0
def ingest(args):
    """Ingest datasets into the SQLite3 database."""
    for ingest, module in INGESTS:
        if ingest in args.datasets:
            log(SEPARATOR)
            module.ingest()
    log(SEPARATOR)
示例#14
0
def set_choice(chosen_id):
	log("Updating tracer choice to " + str(chosen_id))
	trace = get_trace()	

	trace.choice = chosen_id
	db.session.add(trace)
	db.session.commit()
	log("tracer updated")
示例#15
0
文件: db.py 项目: yaseng/yafinger
 def query(self, sql):
     try:
         self._cur.execute("SET NAMES utf8") 
         result = self.execute(sql)
     except MySQLdb.Error, e:
         self.error_code = e.args[0]
         util.log("Query sql error:%d  %s" % (e.args[0], e.args[1]), 3, "mysql")
         result = False
示例#16
0
文件: model.py 项目: vykhand/sdsj2018
def train_lightgbm(X: pd.DataFrame, y: pd.Series, config: Config):
    params = {
        "objective": "regression" if config["mode"] == "regression" else "binary",
        "metric": "rmse" if config["mode"] == "regression" else "auc",
        "verbosity": -1,
        "seed": 1,
    }

    X_sample, y_sample = data_sample(X, y)
    hyperparams = hyperopt_lightgbm(X_sample, y_sample, params, config)

    n_split = config["n_split_lgb"]
    kf = KFold(n_splits=n_split, random_state=2018, shuffle=True)
    config["model"] = []
    oofs = np.zeros((X.shape[0],))
    scores = []

    #time_reserved = (config["h2o_min_time_allowance"] + config["other_time_allowance"])

    iter_time = 0
    iter_times = []
    for i, (train_ind, test_ind) in enumerate(kf.split(X)):
        time_spent = (time.time() - config["start_time"])

        time_left = max(0, (config["time_limit"] - time_spent))

        # reserving time for h2o if needed
        #if config["train_h2o"] and (time_left > time_reserved): time_left = max(0, time_left - time_reserved)

        max_iter_time = max(iter_times) if len(iter_times) > 0 else 0
        #assume iterations take same time. if no time left, break
        if max_iter_time * config["iter_time_coeff"] > time_left:
            break



        iter_start  = time.time()

        X_train, X_val = X.iloc[train_ind, :], X.iloc[test_ind,:]
        y_train, y_val = y[train_ind], y[test_ind]
        train_data = lgb.Dataset(X_train, label=y_train)
        valid_data = lgb.Dataset(X_val, label=y_val)
        mdl = lgb.train({**params, **hyperparams},
                                         train_data, 3000, valid_data,
                                         early_stopping_rounds=50, verbose_eval=100)
        config["model"].append(mdl)
        oof = mdl.predict(X_val)
        oofs[test_ind] = oof
        if config["mode"] == "regression":
            score = np.sqrt(mean_squared_error(y_val, oof ))
        else:
            score = roc_auc_score(y_val,oof )
        scores.append(score)
        iter_time = time.time() - iter_start
        iter_times.append(iter_time)
        log(f"FOLD: {i}, Score: {round(score,2)} , time: {iter_time:.2f}")

    log(f"Total score: {np.mean(scores)} , std: {np.std(scores)}")
示例#17
0
 def _load_users(self):
     users = getpwall()
     if self.limit_to_group:
         users = [u for u in users if u.pw_gid == self.group.gr_gid]
     if len(users) < self.minimum_users_count:
         log("too few users found... check configuration (got %u, need %u)"
             % (len(users), self.minimum_users_count))
         exit(1)
     self.users = users
def get_taxa():
    """Build a dictionary of scientific names and taxon_ids."""
    log(f'Getting {DATASET_ID} taxa')
    sql = """SELECT taxon_id, sci_name
               FROM taxa
              WHERE sci_name IN ({})"""
    sql = sql.format(','.join([f"'{x}'" for x in TARGETS]))
    taxa = pd.read_sql(sql, db.connect())
    return taxa.set_index('sci_name').taxon_id.to_dict()
示例#19
0
 def _load_users(self):
     users = getpwall()
     if self.limit_to_group:
         users = [u for u in users if u.pw_gid == self.group.gr_gid]
     if len(users) < self.minimum_users_count:
         log("too few users found... check configuration (got %u, need %u)" % (
         len(users), self.minimum_users_count))
         exit(1)
     self.users = users
示例#20
0
文件: db.py 项目: yaseng/yafinger
 def update(self, sql):
     try:
         self._cur.execute("SET NAMES utf8") 
         result=self.execute(sql)
         self._conn.commit()         
     except MySQLdb.Error, e:
         self.error_code = e.args[0]
         util.log("[MySQL]Update sql error:%d  %s" % (e.args[0], e.args[1]), 3, "mysql")
         result = False
示例#21
0
def drop_constant_columns(df: pd.DataFrame, config: Config):
    if "constant_columns" not in config:
        config["constant_columns"] = get_constant_columns(df)

        log("Constant columns: " + ", ".join(config["constant_columns"]),
            config.verbose)

    if len(config["constant_columns"]) > 0:
        df.drop(config["constant_columns"], axis=1, inplace=True)
示例#22
0
文件: db.py 项目: yaseng/yafinger
 def insert(self, sql):
     try:
         self._cur.execute("SET NAMES utf8")
         self.execute(sql)
         self._conn.commit()
         return   int(self._cur.lastrowid)
     except MySQLdb.Error, e:
         self.error_code = e.args[0]
         util.log("[MySQL]Insert sql error:%d  %s" % (e.args[0], e.args[1]), 3, "mysql")
         return False
示例#23
0
def validate(preds: pd.DataFrame, target_csv: str, mode: str,
             config: Config) -> np.float64:
    df = pd.merge(preds,
                  pd.read_csv(target_csv),
                  on="line_id",
                  left_index=True)
    score = roc_auc_score(df.target.values, df.prediction.values) if mode == "classification" else \
        np.sqrt(mean_squared_error(df.target.values, df.prediction.values))
    log("Score: {:0.4f}".format(score))
    return score
示例#24
0
def hyperopt_xgboost(X: pd.DataFrame, y: pd.Series, params: Dict,
                     config: Config):
    X_train, X_val, y_train, y_val = data_split(X, y, test_size=0.5)

    train_data = xgb.DMatrix(X_train, label=y_train)
    test_data = xgb.DMatrix(X_val, label=y_val)

    space = {
        "max_depth":
        hp.choice("max_depth", [4, 5, 6]),
        "min_child_weight":
        hp.choice("min_child_weight", [4, 8, 12, 16]),
        "gamma":
        hp.quniform("gamma", 0.1, 0.5, 0.1),
        "subsample":
        hp.choice("subsample", [i / 10.0 for i in range(6, 10)]),
        "colsample_bytree":
        hp.choice("colsample_bytree", [i / 10.0 for i in range(6, 10)]),
        "reg_alpha":
        hp.choice("reg_alpha", [0, 0.001, 0.005, 0.01, 0.05]),
    }

    def objective(hyperparams):
        watchlist = [(train_data, "train"), (test_data, "test")]

        mdl = xgb.train({
            **params,
            **hyperparams
        },
                        train_data,
                        evals=watchlist,
                        num_boost_round=300,
                        early_stopping_rounds=100,
                        verbose_eval=100)

        score = mdl.best_score

        if config.is_classification():
            score = -score

        return {'loss': score, 'status': STATUS_OK}

    trials = Trials()
    best = hyperopt.fmin(fn=objective,
                         space=space,
                         trials=trials,
                         algo=tpe.suggest,
                         max_evals=50,
                         verbose=1,
                         rstate=np.random.RandomState(1))

    hyperparams = space_eval(space, best)
    log("{:0.4f} {}".format(trials.best_trial['result']['loss'], hyperparams))
    return hyperparams
示例#25
0
def drop_constant_columns(df: pd.DataFrame, config: Config):
    if "constant_columns" not in config:
        config["constant_columns"] = [
            c for c in df if df[c].nunique(dropna=False) < 2
        ]
        log("Constant columns: " + ", ".join(config["constant_columns"]))

    if len(config["constant_columns"]) > 0:
        df.drop(config["constant_columns"],
                axis=1,
                inplace=True,
                errors='ignore')
def read_raw_data():
    """Get the raw data."""
    log(f'Reading {DATASET_ID} raw data')
    converters = {c: str for c
                  in pd.read_excel(DATA_XLSX, sheet_name=DATA_SHEET).columns}

    raw_data = pd.read_excel(
        DATA_XLSX, sheet_name=DATA_SHEET, converters=converters)
    raw_data = raw_data.rename(columns={'Genus': 'genus'})
    raw_data['dataset_id'] = DATASET_ID

    return raw_data
示例#27
0
文件: db.py 项目: yaseng/yafinger
 def __init__(self, dbconfig):
     try:
         self._conn = MySQLdb.connect(host=dbconfig['host'],
                                      port=dbconfig['port'],
                                      user=dbconfig['user'],
                                      passwd=dbconfig['passwd'],
                                      db=dbconfig['db'],
                                      charset=dbconfig['charset'])
     except MySQLdb.Error, e:
         self.error_code = e.args[0]
         util.log("MySQL error:%d  %s" % (e.args[0], e.args[1]), 3, "mysql")
         return  
示例#28
0
def get_trace():
	if 'active_trace' not in session:
		log("No active trace found")
		return

	tid = session['active_trace']
	trace = models.Trace.query.get(tid)
	if trace is None:
		log("ERR Trace is empty")
		return

	return trace
示例#29
0
def optimize_dataframe(df):
    """Optimize pandas dataframe size:
    - downcast numeric (int and float) types columns.
    - convert to Categorical type categorical columns with 2x or more "values/unique" values rate.
    :param df:
    :return:
    """

    #return df  # TODO: remove - check for failure!!!

    int_cols = []
    float_cols = []
    category_cols = []
    other_cols = []

    old_size = sys.getsizeof(df)

    for col_name in df.columns:
        col_type = df.dtypes[col_name]

        if col_type in ['int', 'int32', 'int64']:
            int_cols.append(col_name)
        elif col_type in ['float', 'float32', 'float64']:
            float_cols.append(col_name)
        elif col_type == 'object':
            total = len(df[col_name])
            n_uniq = df[col_name].nunique()
            if n_uniq / total < 0.5:
                category_cols.append(col_name)
            else:
                other_cols.append(col_name)
        else:
            other_cols.append(col_name)

    df_opt = pd.DataFrame()

    if len(int_cols) > 0:
        df_opt[int_cols] = df[int_cols].apply(pd.to_numeric, downcast='integer')

    if len(float_cols) > 0:
        df_opt[float_cols] = df[float_cols].apply(pd.to_numeric, downcast='float')

    if len(category_cols) > 0:
        df_opt[category_cols] = df[category_cols].astype('category')

    if len(other_cols) > 0:
        df_opt[other_cols] = df[other_cols]

    new_size = sys.getsizeof(df_opt)
    log('optimize dataframe ({} to {}, ratio: {})'.format(old_size, new_size, round(old_size/new_size, 2)))

    return df
示例#30
0
def create():
    """Create the database."""
    log(f'Creating database')

    script = fspath(SCRIPT_PATH / 'create_db.sql')
    cmd = f'sqlite3 {DB_FILE} < {script}'

    if exists(DB_FILE):
        remove(DB_FILE)

    subprocess.check_call(cmd, shell=True)

    insert_db_version()
示例#31
0
文件: score.py 项目: wanghia/sdsj2018
def validate_dataset(alias: str, mode: str, train_limit: int) -> np.float64:
    log(alias)

    automl = AutoML("models/check_{}".format(alias))

    automl.config["time_limit"] = train_limit
    automl.train("data/check_{}/train.csv".format(alias), mode)

    automl.config["time_limit"] = 300
    _, score = automl.predict("data/check_{}/test.csv".format(alias),
                              "predictions/check_{}.csv".format(alias))

    return score
示例#32
0
def subsample(df: pd.DataFrame, config: Config, max_size_mb: float=2.0):
    if config.is_train():
        df_size_mb = df.memory_usage(deep=True).sum() / 1024 / 1024
        if df_size_mb > max_size_mb:
            mem_per_row = df_size_mb / len(df)
            sample_rows = int(max_size_mb / mem_per_row)

            log("Size limit exceeded: {:0.2f} Mb. Dataset rows: {}. Subsample to {} rows.".format(df_size_mb, len(df), sample_rows))
            _, df_drop = train_test_split(df, train_size=sample_rows, random_state=1)
            df.drop(df_drop.index, inplace=True)

            config["nrows"] = sample_rows
        else:
            config["nrows"] = len(df)
示例#33
0
def get_capture_date(file_path):
    try:
        metadata = exiftool(file_path)
    except CommandError as e:
        log('{}', e)

        return None

    capture_date_attributes = \
        'DateTimeOriginal MediaCreateDate CreateDate CreationDate ' \
        'DateCreated'.split()

    for i in capture_date_attributes:
        value = metadata.get(i)

        if value is not None:
            break
    else:
        log(
            'Did not find any of the recognized metadata fields {}. File {} has the fields {}.',
            ', '.join(capture_date_attributes),
            file_path,
            metadata)

        return None

    pattern = '(?P<year>[0-9]{4})[:-]' \
              '(?P<month>[0-9]{2})[:-]' \
              '(?P<day>[0-9]{2}) ' \
              '(?P<hour>[0-9]{2}):' \
              '(?P<minute>[0-9]{2}):' \
              '(?P<second>[0-9]{2})(\.[0-9]+)?([+-[0-9]{2}:[0-9]{2})?'

    match = re.match(pattern, value)

    assert match, 'Could not parse date: {}'.format(value)

    return datetime.datetime(
        *(
            int(match.group(i))
            for i in 'year month day hour minute second'.split()))
示例#34
0
文件: __init__.py 项目: lpirl/homes
    def execute_safely(self, function, *args, **kwargs):
        """
        Method prints what would be done if simulating or
        does it otherwise.
        """
        def call_as_pretty_string():
            return "%s.%s(%s, %s)" % (
                function.__module__,
                function.__name__,
                ', '.join((repr(arg) for arg in args)),
                ', '.join(( "%s=%s" % (repr(k), repr(v))
                            for k, v in kwargs.items())),
            )

        if self.simulate:
            log("simulating - would execute %s otherwise" % (
                call_as_pretty_string()
            ))
            return None
        else:
            log("executing " + call_as_pretty_string())
            return function(*args, **kwargs)
示例#35
0
文件: obsoletes.py 项目: lpirl/homes
    def do_archive_directory(self, directory_path):
        """
        Archives directory contents to trash if not empty.

        Returns True on success, False otherwise
        """
        trash_file_path = self.trash_file_path(directory_path)
        archive_path = self.execute_safely(
            make_archive,
            trash_file_path,
            "bztar",
            dirname(directory_path),
            basename(directory_path)
        )

        if not archive_path:
            log(u"ERROR: something went wrong - no archive " +
                "file name found after archive creation!")
            return False

        self.execute_safely(    chmod,
                                archive_path,
                                self.octal_permissions)
        return True
示例#36
0
def help_and_exit():
    log("script for maintaining an UNIX groups accounts and home directories")
    log("")
    log("usage: [python3] ./sftponly.py [config file]")
    log("  explicit call of 'python3' turns on debug")
    exit(0)
    for i in walk_visible_files(dir):
        file_dir, file_name = os.path.split(i)
        name_part, _ = os.path.splitext(file_name)

        try:
            date = datetime.datetime.strptime(
                name_part[:19],
                '%Y-%m-%d %H.%M.%S')

            target_dir = dir_for_date(date)
        except ValueError:
            target_dir = os.path.join(dir, 'unknown')

        move_to(i, os.path.join(target_dir, file_name))
        check_empty_dirs.append(file_dir)

    while check_empty_dirs:
        empty_dirs = check_empty_dirs
        check_empty_dirs = []

        for i in empty_dirs:
            if is_empty(i):
                remove_dir(i)
                check_empty_dirs.append(os.path.dirname(i))


try:
    main(*sys.argv[1:])
except KeyboardInterrupt:
    log('Operation interrupted.')