Exemplo n.º 1
0
    def _create_task(self):
        if not self.results:
            time_lapsed = round(time.time() - self.time_start)
            if time_lapsed > self.results_wait:
                self.terminate_counter += 1
                logger.debug("Waiting to terminate {}/{}".format(
                              self.terminate_counter, self.termination_limit))
            return
        self._tasks_lock.acquire()
        task_results = self.results[:self.results_len]
        self.results = self.results[self.results_len:]
        try:
            result_ids, versions = zip(*task_results)
            result_ids = sorted(set(result_ids))
            model_version = min(versions)
            self.curr_task_id += 1
            task_name = f"task-{self.curr_task_id}-model-{model_version}"
            result_ids = [p for p in result_ids if os.path.exists(p)
                            and (os.path.basename(p) not in self.seen_results)]

            [self.seen_results.add(os.path.basename(p)) for p in result_ids]
            if not result_ids:
                self.terminate_counter += 1
                return
            with self.train_lock:
                task_id = self.cli.tasks_create(task_name, self.labels, result_ids)
                self.pending_tasks[task_id] = result_ids
            self.time_start = time.time()
        except (requests.exceptions.HTTPError,
                requests.exceptions.ConnectionError,
                requests.exceptions.RequestException) as e:
            self._tasks_lock.release()
            logger.critical(e)
Exemplo n.º 2
0
def authenticate(email, password):
    auth_url = 'https://api.jinka.fr/apiv2/user/auth'
    auth_dict = {'email':email, 'password':password}
    s = requests.Session()
    r_auth = s.post(auth_url, auth_dict)
    if r_auth.status_code == 200:
        logger.info('Authentification succeeded (200)')
        access_token = r_auth.json()['access_token']
    else:
        logger.critical(f'Authentification failed with error {r_auth.status_code}')
        return None, None

    headers = {
    'Accept': '*/*',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36',
    'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {access_token}',
    'Origin': 'https://www.jinka.fr',
    'Connection': 'keep-alive',
    'DNT': '1',
    'Sec-GPC': '1',
    'If-None-Match': 'W/f46-qWZd5Nq9sjWAv9cj3oEhFaxFuek',
    'TE': 'Trailers',
    }

    return s, headers
Exemplo n.º 3
0
    def translate(
        self,
        source: str,
        from_lang: Optional[str] = None,
        to_lang: Optional[str] = None,
        proxy: Optional[str] = None,
    ) -> str:
        '''
        if from_lang to_lang: None, use instance's

        if proxy != self.proxy: use new client
        '''

        if from_lang is None:
            self.from_lang_ = self.from_lang
        else:
            self.from_lang_ = from_lang
        if to_lang is None:
            self.to_lang_ = self.to_lang
        else:
            self.to_lang_ = to_lang

        if self.to_lang_ == self.from_lang_:
            return source

        # self.source_list = wrap(source, 1000, replace_whitespace=False)

        # last "." not converted to "。" in dest chinese, attache ' _xx', remove 'XXX' [:-4]
        if to_lang == 'zh':
            # source_list = wrap(source + ' . _xx', 500, replace_whitespace=False)
            # a chinese char about 3 bytes
            source_list = wrap(source + ' . _xx',
                               160,
                               replace_whitespace=False)
        else:
            source_list = wrap(source, 500, replace_whitespace=False)
        # self.source_list = wrap(source, 1695, replace_whitespace=False)
        #  MAX ALLOWED QUERY : 500 bytes/CHARS

        try:
            _ = (self._get_translation(elm, proxy=proxy)
                 for elm in source_list)
            seq = ' '.join(_)
        except Exception as exc:
            logger.warning(" seq = ' '.join, exc %s ", exc)
            # return None
            raise

        # if seq[:17] == 'MYMEMORY WARNING:':
        if seq.startswith(('MYMEMORY WARNING:', )):
            logger.critical("MYMEMORY WARNING: %s", seq)
            raise QuotaError(seq)

        # last "." not converted to "。" in dest chinese, attache ' _xx', remove 'XXX' [:-4]
        if self.to_lang == 'zh':
            return seq[:-5]

        return seq
Exemplo n.º 4
0
 def connect(self):
     self.connect = False
     try:
         print('~'*100 + f'\nConnecting to device: {self.ip}')
         self.connect = ConnectHandler(ip=self.ip, device_type='cisco_ios', username=self.username, password=self.password)
     except (NetMikoAuthenticationException, NetMikoTimeoutException) as conn_error:
         logger.warning(f'Unable to connect to device:\n{conn_error}')
     if not self.connect:
         logger.critical('No devices to connect to')
         sys.exit()
Exemplo n.º 5
0
def foo():

    logfile("/home/kaktus74/nasze_logi.log")

    for i in range(1, 100):

        logger.debug("SUper szczegolowe informacje")
        logger.info("Cos siw wydarzylo!")
        logger.warning("Chyba cos sie wywrocilo")
        logger.error("Oj, cos sie na pewno wywrocilo!")
        rok = 19
        logger.critical(f"Wykryto wirusa covid-{rok}! {i}")
Exemplo n.º 6
0
    def include_all_items_in_live(
        self
    ):  # If some items are "excluded" from the live plan, it will break the service planner app's item order. Run this to make all items "included" in the live plan.
        r = requests.get(
            f'https://api.planningcenteronline.com/services/v2/service_types/'
            f'{self.service_type}/plans/{self.plan_id}/items/?&include=item_times',
            auth=(APP_ID, SECRET))
        r = r.json()

        excluded = [
        ]  # /service_types/#/plans/#/items/#/item_times/id // list of dicts of item time ids that are excluded

        for item_time in r['included']:
            if item_time['attributes']['exclude'] is True:

                item = {
                    'item_id':
                    item_time['relationships']['item']['data']['id'],
                    'time_id': item_time['id']
                }

                excluded.append(item)
        if len(excluded) > 0:

            request_headers = {
                'Content-type': 'application/json',
                'Accept': 'text/plain'
            }
            payload = {
                "data": {
                    "type": "ItemTime",
                    "attributes": {
                        "exclude": False,
                    },
                }
            }

            for item in excluded:
                r = requests.patch(
                    f'https://api.planningcenteronline.com/services/v2/service_types/'
                    f'{self.service_type}/plans/{self.plan_id}/items/{item["item_id"]}/item_times/{item["time_id"]}',
                    headers=request_headers,
                    data=json.dumps(payload),
                    auth=(APP_ID, SECRET))
                if r.status_code != 200:
                    logger.critical(
                        'pco_plan.include_all_items_in_live: ERROR including item time %s',
                        item)
                else:
                    logger.info(
                        'pco_plan.include_all_items_in_live: updated live item to be included in plan: %s',
                        item)
Exemplo n.º 7
0
def devices_connect(devices):
    connect = False
    n = int(0)
    for device in devices:
        try:
            print('~'*100 + f'\nConnecting to device: {device}')
            connect[n] = ConnectHandler(ip=device, device_type='cisco_ios', username=username, password=password)
            n = n+1
        except (NetMikoAuthenticationException, NetMikoTimeoutException) as conn_error:
            logger.warning(f'Unable to connect to device:\n{conn_error}')
    if not connect:
        logger.critical('No devices to connect to')
        sys.exit()
Exemplo n.º 8
0
def remove_expired(session, df, last_deleted_path):
    df_expired = df.loc[df["expired_at"].notna(), :]
    if len(df_expired)>15:
        logger.critical('Df slicing error')
        exit()
    logger.info('Starting the cleaning of expired offers.')
    for appart_id, row in tqdm(df_expired.iterrows()):
        post_url = 'https://api.jinka.fr/apiv2/alert/' + row['alert_id'] + '/abuses'
        data = {'ad_id':appart_id, 'reason':'ad_link_404'}
        session.post(post_url, data=data)
    df_expired.to_json(last_deleted_path, orient='columns')
    cleaned_df = df.loc[df['expired_at'].isna(), :]
    logger.info(f'Finished cleaning the {len(df_expired)} expired appartments.')
    return cleaned_df
Exemplo n.º 9
0
 def download_template(self):
     logger.debug('trying to connect to GITLAB')
     try:
         git_req = requests.get('http://172.17.0.3/api/v4/projects/1/repository/files/router/raw?ref=master',
                                headers={'PRIVATE-TOKEN': 'x6sP6xf57gb5sxxiXutq'})
     except requests.exceptions.RequestException as req_error:
         logger.critical(f'unable to connect to GITLAB:\n{req_error}')
         return False
     if git_req.status_code == 200:
         logger.debug('connection to GITLAB successful')
         self.template = git_req.text.splitlines()
         return True
     else:
         logger.info('Problem with downloading the template')
         return False
Exemplo n.º 10
0
def readConfig():
    configFile = "config.yml"

    try:
        with open(configFile, "r") as configString:
            parsedConfig = load(configString, Loader=FullLoader)
            log.debug("Read config file {0}, got {1}".format(
                configFile, parsedConfig))
            return parsedConfig

    except FileNotFoundError:
        log.critical("Can't read {0}!".format(configFile))
        print("Konnte die Configdatei nicht laden!")
        log.info("Program end.")
        sys.exit(1)
Exemplo n.º 11
0
 async def connect_retry(self):
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     sock.setblocking(False)
     while True:
         try:
             await self.loop.sock_connect(sock, self.address)
             logger.info(
                 f"Connected to {self.address[0]}:{self.address[1]}.")
             return sock
         except (ConnectionRefusedError, ConnectionAbortedError,
                 socket.gaierror, OSError):
             logger.critical(
                 f"Connect call to {self.address[0]}:{self.address[1]} failed, retrying in {self.retry_timeout} "
                 "second(s).")
             await asyncio.sleep(self.retry_timeout)
    async def connect(self):
        while True:
            try:
                self.conn = await r.connect(self.addr, self.port)
                sock = self.conn._instance._streamwriter.get_extra_info(
                    'socket')
                print(sock)
                set_keepalive(sock)
                break
            except r.ReqlDriverError:
                logger.critical(
                    f"Failed to connect to database, retrying in {self.retry_timeout} seconds."
                )
                await asyncio.sleep(self.retry_timeout)

        self.connection_available.set()
        self.connection_available.clear()
Exemplo n.º 13
0
        def receive(message):
            try:
                type = message[Keys.TYPE]

                if type not in Types.all:
                    logger.critical(f"Unknown message type {type}")
                    return

                if type not in dispatch_table:
                    logger.debug(
                        f"Message type {type} does not have an handler.")
                    return

                handler = dispatch_table[type]
                asyncio.ensure_future(handler(self, stream, message))
            except:
                logger.exeption("Unexpected exception in message handling")
Exemplo n.º 14
0
 def __prepare(self, infile, cities):
     scripture = MongoClient(settings.MONGO).scripture
     df = pandas.read_csv(infile, error_bad_lines=False)
     # keys = df.keys().tolist()
     cities = cities and Cities.from_json(cities) or Cities()
     cities.hub = MongoClient(settings.HUB_MONGO).hub
     for idx, row in df.iterrows():
         isnull = row.isnull()
         if isnull.get('ta'):
             continue
         bonotel = not isnull.get('bonotel', True) and \
             int(row.bonotel) or None
         roomsxml = not isnull.get('roomsxml', True) and \
             int(row.roomsxml) or None
         hotelspro = not isnull.get('hotelspro', True) and \
             str(row.hotelspro) or None
         hotelbeds = not isnull.get('hotelbeds', True) and \
             int(row.hotelbeds) or None
         jactravel = not isnull.get('jactravel', True) and \
             int(row.jactravel) or None
         ta = row.get('ta')
         hcom = row.get('hotel_URL')
         if pandas.isnull(hcom):
             continue
         try:
             hcom_id = self.__parse_hcom_id(hcom)
         except AttributeError:
             continue
         try:
             _city = row.get('city') or row.get('城市')
             city_name = self._CITY_EN_RE.match(_city).group(1)
             city = cities.name(city_name.strip())
             yield Hotel(
                 db=scripture,
                 hotel_id=roomsxml,
                 hotels_cn_id=hcom_id,
                 bonotel=bonotel,
                 hotelspro=hotelspro,
                 hotelbeds=hotelbeds,
                 jactravel=jactravel,
                 comments_url=ta,
                 city=city
             )
         except (KeyError, IndexError, TypeError) as e:
             logger.critical('hcom(%s), city(%s)', hcom_id, _city)
             logger.exception(e)
Exemplo n.º 15
0
    def _check_status(self):
        with self.train_lock:
            task_ids = self.pending_tasks.keys()
            if not task_ids:
                return
            try:
                for i in self._get_completed_tasks(task_ids):
                    task_id, image_ids = self.cli.tasks_dump(i)
                    self._add_train(task_id, image_ids)
                    # updating task start time to prevent a new task
                    # creation immediatelly after releasing lock
                    self.time_start = time.time()
                    self._tasks_lock.release()

            except (requests.exceptions.HTTPError,
                    requests.exceptions.ConnectionError,
                    requests.exceptions.RequestException) as e:
                logger.critical(e)
Exemplo n.º 16
0
def interrupt_experiment_on_unhealthy_probe(probe: Probe,
                                            run: Run,
                                            configuration: Configuration,
                                            secrets=Secrets) -> None:
    if experiment_finished.is_set():
        return

    tolerance = probe.get("tolerance")
    checked = within_tolerance(tolerance,
                               run["output"],
                               configuration=configuration,
                               secrets=secrets)
    if not checked and not guardian.interrupted:
        guardian.interrupted = True
        if not experiment_finished.is_set():
            logger.critical(
                "Safeguard '{}' triggered the end of the experiment".format(
                    probe["name"]))
            exit_gracefully()
Exemplo n.º 17
0
def run_all(email, password, expired):
    s, headers = authenticate(email, password)

    if s == None:
        logger.critical('Aborting search, check your credentials.')
        quit()
    df_alerts = get_alerts(s, headers)
    df_apparts, expired_index = get_all_apparts(df_alerts, s, headers)
    df_apparts = cleaner(df_apparts)
    df_apparts = features_engineering(df_apparts)
    df_history = append_history_df(df_apparts, HISTORY_PATH)
    df_apparts = df_apparts.loc[~df_apparts.index.duplicated()]
    df_apparts = get_all_links(s, df_apparts, expired, APPARTS_DB_PATH)
    if expired:
        df_history = update_history_df(df_apparts, df_history, expired_index)
        df_apparts = remove_expired(s, df_apparts, LAST_DELETED_PATH)
    df_apparts.to_csv(APPARTS_CSV_PATH, sep=';', encoding='utf-8')
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            df_apparts.to_excel(APPARTS_XLSX_PATH, encoding='utf-8')
        except IllegalCharacterError as e:
            logger.warn(
                "Some illegal characters were replaced in the dataframe.")
            ILLEGAL_CHARACTERS_RE = re.compile(
                r'[\000-\010]|[\013-\014]|[\016-\037]')
            df_apparts.applymap(lambda x: ILLEGAL_CHARACTERS_RE.sub(r'', x)
                                if isinstance(x, str) else x).to_excel(
                                    APPARTS_XLSX_PATH, encoding='utf-8')

    df_history.to_csv(HISTORY_PATH, sep=';', encoding='utf-8')

    if upload:
        uploader = Uploader(credentials_path=CREDS_PATH,
                            token_file_path=TOKEN_FILE_PATH,
                            secret_client_path=SECRET_CLIENT_PATH)
        uploader.push_table(
            df_apparts,
            spreadsheet_id='131UoWqQwZfydMJ3yqVe-L6TY6NKtJx8zVNppo034dT4',
            worksheet_name='apparts',
            index=True)
Exemplo n.º 18
0
    def process_failure(self, failure, message_id, sign_method, sign_params,
                        request_counter):
        if not isinstance(failure.value, ServiceException):
            # All handled exceptions should inherit from ServiceException class
            # Throwing other exception class means that it is unhandled error
            # and we should log it
            logger.critical(failure)

        sign = False
        code = getattr(failure.value, "code", -1)

        if message_id is not None:
            # Other party does not care of error state for notifications
            if os.environ.get("debug"):
                tb = failure.getBriefTraceback()
            else:
                tb = None
            self.writeJsonError(code, failure.getErrorMessage(), tb,
                                message_id, sign, sign_method, sign_params)

        request_counter.decrease()
Exemplo n.º 19
0
def push_standard_config():
    logger.debug('trying to connect to GITLAB')
    try:
        git_req = requests.get('http://172.17.0.3/api/v4/projects/1/repository/files/start/raw?ref=master', headers={'PRIVATE-TOKEN': 'x6sP6xf57gb5sxxiXutq'})
    except requests.exceptions.RequestException as req_error:
        logger.critical(f'unable to connect to GITLAB:\n{req_error}')
        return False
    logger.debug('checking if connection was successful')
    if git_req.status_code == 200:
        logger.debug('connection to GITLAB successful')
        out = git_req.text.splitlines()
        if isinstance(out, list):
            logger.debug(f'about to send this configuration to device:\n{out}')
            logger.debug(f'connecting to devices:\n{devices_ip}')
            devices_connect(devices=devices_ip_strip)
            try:
                abc = devices_connect.
                sendconf = connect.send_config_set(out)
            except Exception as sendconf_error:
                logger.critical(sendconf_error)
                return False
            if (re.search('\^', sendconf)) is not None:
                conf_apply_error = ConfigInputException('Error in some command! Check log')
                logger.critical(f'{conf_apply_error} output from device:\n{sendconf}')
                raise conf_apply_error
            else:
                conf_success = 'Configuration applied successfully'
                logger.info(conf_success + f'output from device:\n{sendconf}')
                return True
        else:
            list_error = AttributeError('Problem with converting template to list')
            logger.critical(list_error)
            raise list_error
    else:
        logger.info('Problem with downloading the template')
        return False
Exemplo n.º 20
0
 def push_template(self):
     if isinstance(self.template, list):
         logger.debug(f'about to send this configuration to device:\n{self.template}')
         logger.debug(f'connecting to device:\n{self.ip}')
         try:
             sendconf = self.connect.send_config_set(self.template)
         except Exception as sendconf_error:
             logger.critical(sendconf_error)
             return False
         if (re.search('\^', sendconf)) is not None:
             conf_apply_error = ConfigInputException('Error in some command! Check log')
             logger.critical(f'{conf_apply_error} output from device:\n{sendconf}')
             raise conf_apply_error
         else:
             conf_success = 'Configuration applied successfully'
             logger.info(conf_success + f'output from device:\n{sendconf}')
             return True
     else:
         list_error = AttributeError('Problem with converting template to list')
         logger.critical(list_error)
         raise list_error
Exemplo n.º 21
0
async def print_loop() -> None:
    """main loop - checks if entries exist periodically and prints them"""
    await client.wait_until_ready()
    # setup global variables
    guilds = list(iter(client.guilds))
    if len(guilds) != 1:
        logger.critical("This bot should only be used on one server")
        await client.logout()
        sys.exit(1)
    channels = guilds[0].channels
    client.feed_channel = get(channels, name="feed")
    client.nsfw_feed_channel = get(channels, name="nsfw-feed")
    if client.feed_channel is None:
        logger.critical("Couldn't find the 'feed' channel")
    if client.nsfw_feed_channel is None:
        logger.critical("Couldn't find the 'nsfw-feed' channel")
    client.old_db = OldDatabase(filepath=old_db_file)
    client.loop.create_task(export_loop())
    while not client.is_closed():
        # if there are new entries, print them
        await print_new_embeds()
        logger.debug(f"Sleeping for {client.period}")
        await sleep(client.period)
Exemplo n.º 22
0
from pathlib import Path
from collections import defaultdict, namedtuple

from logzero import logger
from PyTexturePacker import Packer
from PyTexturePacker.MaxRectsPacker.MaxRectsPacker import MaxRectsPacker
from pymongo import MongoClient
from settings import settings


try:
    from sh import crunch, ErrorReturnCode
except:
    crunch = lambda *args, **kwargs: logger.critical(
        "Please install crunch for PNG optimization"
    )

try:
    from sh import cwebp
except:
    cwebp = lambda *args, **kwargs: logger.critical(
        "Please install cwebp for WEBP compression"
    )


RE_DIM = re.compile(r"\d+")

Rect = namedtuple("Rect", ["x", "y", "w", "h"])
Dimensions = namedtuple("Dimensions", ["w", "h"])
Point = namedtuple("Point", ["x", "y"])
Exemplo n.º 23
0
from logzero import logger

logger.critical('critical')
logger.error('error')
logger.warning('warning')
logger.info('info')
logger.debug('debug')
Exemplo n.º 24
0
    logzero.formatter(logzero.LogFormatter(datefmt="%Y-%m-%d %H:%M:%S"))

    if args["--log"]:
        logzero.logfile(
            args["--log"],
            encoding="utf-8",
            formatter=logzero.LogFormatter(datefmt="%Y-%m-%d %H:%M:%S",
                                           color=False),
        )

    if args["--format"] == "grobid":
        parse_file = parse_tei_file
    elif args["--format"] == "parscit":
        parse_file = parse_parscit
    else:
        log.critical(f"Unknown --format: {args['--format']}")
        exit(1)

    cited_years = {}
    for dirname in args["<dir>"]:
        if not os.path.exists(dirname):
            log.error(f"Directory not found: {dirname}")
            continue
        dir_diff, dir_files, total_files = 0, 0, 0
        for filename in glob(f"{dirname}/*.xml"):
            base = os.path.basename(filename)
            file_id = base.split(".")[0]
            if file_id.endswith("-parscit"):
                file_id = file_id[:-8]
            log.debug(f"Parsing {base}")
            cited_years[file_id], diff = parse_file(filename)
Exemplo n.º 25
0
logzero.loglevel(loglevelFromCli)

# Do we want to log as json?
if (jsonLogFromCli == "Y" or jsonLogFromCli == "YES"):
    logzero.json()

log.debug("Command Line Parameters: {0}".format(args))

# Load config
configFile = "config.yml"
try:
    with open(configFile, "r") as configString:
        config = yaml.load(configString, Loader=yaml.FullLoader)

except FileNotFoundError:
    log.critical("Can't read {0}!".format(configFile))
    exit(1)

log.info("Library Name: {0}".format(config["libraryName"]))
log.info("Locales to generate: {0}".format(config["languages"]))
log.info("Default locale: {0}".format(config["defaultLanguage"]))
log.info("Timezone: {0}".format(config["timezone"]))

# Defaults
sourceFile = args["source"] if args[
    "source"] else "~/library-media-inventory/inventory.csv"
log.info("Source file: {0}".format(sourceFile))

## Some variables, init jinja2
# Current folder
workDir = os.path.dirname(os.path.realpath(__file__))
Exemplo n.º 26
0
def main(args):
    """ Let's a go... """

    # set log level
    if args.verbose > 0:
        logzero.loglevel(logging.DEBUG)
    else:
        logzero.loglevel(logging.INFO)

    mode = ""
    # are we dealing with a file or a directory?
    if os.path.isdir(args.path):
        mode = "DIR"
        if not args.path.endswith('/'):
            args.path += '/'
    elif os.path.isfile(args.path):
        mode = "FILE"
    else:
        logger.critical("Invalid file or directory path specified.")

    # check and set output dir if necessary otherwise
    # we use the input dir to write our new files to

    # if the output_dir is specified we create dirs as required
    #
    # if it isn't and we are dealing with a directory of files
    # then we just make sure the input has a trailing slash
    #
    # if we are in file mode we'll and we have no output_dir
    # specified we'll use the input path to the file
    output_dir = ""
    if args.output_dir:
        output_dir = args.output_dir

        if not output_dir.endswith('/'):
            output_dir += '/'

        # create any dirs we need to
        if not os.path.exists(os.path.dirname(output_dir)):
            try:
                os.makedirs(os.path.dirname(output_dir))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
    elif mode == "DIR":
        output_dir = args.path
        if not output_dir.endswith('/'):
            output_dir += '/'

    # directory so let's loop over and do 'em all
    if mode == "DIR":
        logger.debug("Directory mode, scanning: " + args.path)

        # now let's get to the files and extract the transcripts from each
        directory = os.fsencode(args.path)

        for file in os.listdir(directory):
            filename = os.fsdecode(file)

            full_path = output_dir + filename
            if filename.endswith(".vtt"):
                logging.debug("Attempting to extract from file: " + filename)
                plaintext = extract_plaintext_from_webvtt(args.path + filename)
                output_string_to_file(plaintext, full_path, 'txt')
                continue
            else:
                logger.debug("Skipping file: " + full_path)
                continue

    elif mode == "FILE":
        # single file specified so just the one extraction to do
        logger.debug("Single file mode, scanning: " + args.path)

        if args.output_dir:
            # need to split the filename from the path as output_dir has been specified
            path, filename = os.path.split(args.path)
            output_path = output_dir + filename
        else:
            # we're plopping the files from whence they came so just use the input path to the file
            output_path = args.path

        try:
            with open(args.path):
                plaintext = extract_plaintext_from_webvtt(args.path)
                output_string_to_file(plaintext, output_path, 'txt')

        except IOError:
            logger.error("Input file not accessible, please check the path: " +
                         args.filepath)
Exemplo n.º 27
0
    def _initialize_bucket_structure(self):
        """
        This function creates the base structure for the
        bucket regarding this experiment.
        """
        storage_creator = StorageCreator(self.storage_config)
        storage_object = storage_creator.build_storage_object()

        # Dump some test data
        FILE_TEST_SIZE = int(5e6)  # Approx 5MB
        MAX_RETRIES = 2
        retries = 0
        file_path = os.path.join(self.experiment_dir,
                                 "initialization-service-test")
        generate_big_random_bin_file(file_path, FILE_TEST_SIZE)

        while retries < 2:
            # Perform Storage Connection Test
            logger.info("####### Testing Connection to Storage Endpoint ....")
            storage_object.create_bucket(DEFAULT_BUCKET_NAME)
            try:
                storage_interfacer = JobStorageInterface(
                    storage_obj=storage_object)
                storage_interfacer.put_job_data(
                    bucket=DEFAULT_BUCKET_NAME,
                    username=self.username,
                    project_id=self.project_name,
                    experiment_id=self.experiment_id,
                    variant="Initialization Service",
                    job_id="inital_testing",
                    local_path=file_path)
                logger.info("File Upload to Storage Endpoint Succeeded....")
                break
            except ConnectionError as e:
                logger.error(
                    "File Upload failed, please check to storage config ....\m"
                    f"{e}")

                if retries == MAX_RETRIES - 1:
                    import sys
                    logger.critical("\nProblem connecting to Storage. Exiting")
                    sys.exit(status="Problem connecting to Storage. Exiting")
                else:
                    logger.error(
                        f"Trying to reconnect. {MAX_RETRIES - retries} remaining"
                    )
                    retries += 1
                    continue

        file_path = os.path.join(self.experiment_dir,
                                 "initialization-service-test_restore")
        try:
            storage_interfacer.get_job_data(bucket=DEFAULT_BUCKET_NAME,
                                            username=self.username,
                                            project_id=self.project_name,
                                            experiment_id=self.experiment_id,
                                            variant="Initialization Service",
                                            job_id="inital_testing",
                                            local_path=file_path)
            logger.info("File Download from Storage Endpoint Succeeded....")
        except ConnectionError as e:
            logger.warning(
                "File Download failed, connection might be unstable ....\m"
                f"{e}")
Exemplo n.º 28
0
    def one(self, crawled_hotel, collection_name):
        if "en" not in crawled_hotel:
            if "country" not in crawled_hotel:
                logger.error("Country is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            country_name = crawled_hotel["country"]
            if "city" not in crawled_hotel:
                logger.error("City is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            city = crawled_hotel["city"]
            if "name" not in crawled_hotel:
                logger.error("Name is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            name = crawled_hotel["name"]
            if "address" not in crawled_hotel:
                logger.error("Address is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            address = crawled_hotel["address"]
            if "latitude" not in crawled_hotel:
                logger.error("Latitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            latitude = crawled_hotel["latitude"]
            if "longitude" not in crawled_hotel:
                logger.error("longitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            longitude = crawled_hotel["longitude"]
            country = self._find_country_by_cn_name(country_name)
        else:
            if "country" not in crawled_hotel["en"]:
                logger.error("Country is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            country_name = crawled_hotel["en"]["country"]
            if "city" not in crawled_hotel["en"]:
                logger.error("City is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            city = crawled_hotel["en"]["city"]
            if "name" not in crawled_hotel["en"]:
                logger.error("Name is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            name = crawled_hotel["en"]["name"]
            if "address" not in crawled_hotel["en"]:
                logger.error("Address is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            address = crawled_hotel["en"]["address"]
            if "latitude" not in crawled_hotel["en"]:
                logger.error("Latitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            latitude = crawled_hotel["en"]["latitude"]
            if "longitude" not in crawled_hotel["en"]:
                logger.error("Longitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            longitude = crawled_hotel["en"]["longitude"]
            country = self._find_country_by_en_name(country_name)
        if not country:
            country = self._find_country_by_partial_name(country_name)
        if not country:
            logger.error('Country("%s") of Hotel(%s) not found.', country_name,
                         name)
            return None
        country_code = country["code_cca2"]
        try:
            destination = self._destination_matching(
                country_code=country_code,
                latitude=latitude,
                longitude=longitude,
                address=address,
            )
        except Exception as e:
            logger.exception(e)
            return None
        if not destination:
            logger.critical(
                "Bad destination of Hotel(%s) at "
                "Country(%s) with City(%s)",
                name,
                country_name,
                city,
            )
            return None

        try:
            matches = self._hotel_matching(
                name=name,
                address=address,
                longitude=longitude,
                latitude=latitude,
                phone=None,
                wg_destination_id=destination.get("destination_id"),  #
                wg_city_id=destination.get("city_id"),
                wg_province_id=destination.get("province_id"),
                wg_country_id=destination.get("country_id"),
            )
        except Empty:
            logger.critical(
                "Similarities of Hotel(%s) at Destination(%s) is empty.",
                name,
                destination,
            )
            return None
        except Exception as e:
            logger.critical(
                "Falied to got matched hotels. "
                "Hotel('%s'), Destination(%s)",
                name,
                destination,
                exc_info=e,
            )
            return None
        for _matched in matches:
            logger.info(
                self._set_relationships(
                    _matched["provider"],
                    _matched["oid"],
                    collection_name,
                    str(crawled_hotel["_id"]),
                ))
        return True