Пример #1
0
    def save_traffic(traffic_obj_list, id, piece=3000):
        """

        :param traffic_obj_list:
        :param id: task id
        :param piece: default 3000 per piece
        :return:
        """
        traffic_path = Engine.get_traffic_path(id)
        if len(traffic_obj_list) > 0:
            saved_traffic_list = [i for i in traffic_obj_list]
            # slice traffic if too large
            if len(saved_traffic_list) > piece:
                traffic_divided_path = []
                traffic_divided = divide_list(saved_traffic_list, piece)
                for i in range(len(traffic_divided)):
                    traffic_divided_path.append(traffic_path + str(i))
                    with open(traffic_path + str(i), 'w') as traffic_f:
                        cPickle.dump(traffic_divided[i], traffic_f)
                LOGGER.info('Traffic of %s has been divided and saved to %s.' %
                            (id, ','.join(traffic_divided_path)))
            else:
                with open(traffic_path, 'w') as traffic_f:
                    cPickle.dump(saved_traffic_list, traffic_f)
                    LOGGER.info('Traffic of %s has been saved to %s.' %
                                (id, traffic_path))
Пример #2
0
def init_script():
    """Batch download all Snapchat memories."""
    for media_type in SNAPCHAT_MEDIA_URLS.keys():
        if SNAPCHAT_MEDIA_URLS[media_type] is None:
            parse_and_decode_urls(media_type)
        download_snapchat_memories(SNAPCHAT_MEDIA_URLS[media_type], media_type)
    LOGGER.success(f"Completed downloading all Snapchat memories.")
Пример #3
0
def waitForCtrlC():
    try:
        while True:
            pass
    except KeyboardInterrupt:
        LOGGER.write(log.LOGTAGS[0],'ShareLockHomes','shutting down...')
        quit(False, logTag=0, message='Exiting by user.')
Пример #4
0
    def get_author(self, author_id: int) -> Optional[List[str]]:
        """
        Fetch single Ghost author.

        :param int author_id: ID of Ghost author to fetch.

        :returns: Optional[List[str]]
        """
        try:
            params = {"key": self.content_api_key}
            headers = {
                "Content-Type": "application/json",
            }
            resp = requests.get(
                f"{self.content_api_url}/authors/{author_id}/",
                params=params,
                headers=headers,
            )
            if resp.status_code == 200:
                return resp.json()["authors"]
        except HTTPError as e:
            LOGGER.error(
                f"Failed to fetch Ghost authorID={author_id}: {e.response.content}"
            )
        except KeyError as e:
            LOGGER.error(
                f"KeyError while fetching Ghost authorID={author_id}: {e}")
Пример #5
0
    def get_pages(self) -> Optional[dict]:
        """
        Fetch Ghost pages.

        :returns: Optional[dict]
        """
        try:
            headers = {
                "Authorization": f"Ghost {self.session_token}",
                "Content-Type": "application/json",
            }
            endpoint = f"{self.admin_api_url}/pages"
            resp = requests.get(endpoint, headers=headers)
            if resp.json().get("errors") is not None:
                LOGGER.error(
                    f"Failed to fetch Ghost pages: {resp.json().get('errors')[0]['message']}"
                )
                return None
            post = resp.json()["pages"]
            LOGGER.info(f"Fetched Ghost pages` ({endpoint})")
            return post
        except HTTPError as e:
            LOGGER.error(f"Ghost HTTPError while fetching pages: {e}")
        except KeyError as e:
            LOGGER.error(f"KeyError for `{e}` occurred while fetching pages")
        except Exception as e:
            LOGGER.error(
                f"Unexpected error occurred while fetching pages: {e}")
Пример #6
0
async def bulk_transform_images(directory: Optional[str] = Query(
    default=None,
    title="directory",
    description=
    "Subdirectory of remote CDN to transverse and transform images.",
    max_length=50,
)) -> JSONResponse:
    """
    Apply transformations to images uploaded within the current month.
    Optionally accepts a `directory` parameter to override image directory.

    :param Optional[str] directory: Remote directory to recursively fetch images and apply transformations.

    :returns: JSONResponse
    """
    if directory is None:
        directory = settings.GCP_BUCKET_FOLDER
    transformed_images = {
        "purged": images.purge_unwanted_images(directory),
        "retina": images.retina_transformations(directory),
        "mobile": images.mobile_transformations(directory),
        # "standard": gcs.standard_transformations(directory),
    }
    response = []
    for k, v in transformed_images.items():
        if v is not None:
            response.append(f"{len(v)} {k}")
        else:
            response.append(f"0 {k}")
    LOGGER.success(f"Transformed {', '.join(response)} images")
    return JSONResponse(transformed_images)
Пример #7
0
    def get_json_backup(self) -> Optional[dict]:
        """
        Download JSON snapshot of Ghost database.

        Optional[dict]
        """
        self._https_session()
        headers = {
            "Authorization": self.session_token,
            "accept": "text/html,application/xhtml+xml,application/xml;\
                                q=0.9,image/webp,image/apng,*/*;\
                                q=0.8,application/signed-exchange;\
                                v=b3;q=0.9",
            "accept-encoding": "gzip, deflate, br",
            "Origin": "hackersandslackers.com",
            "Authority": "hackersandslackers.com",
        }
        endpoint = f"{self.admin_api_url}/db/"
        try:
            resp = requests.get(endpoint, headers=headers)
            return resp.json()
        except HTTPError as e:
            LOGGER.error(f"HTTPError occurred while fetching JSON backup: {e}")
        except Exception as e:
            LOGGER.error(
                f"Unexpected error occurred while fetching JSON backup: {e}")
Пример #8
0
async def optimize_post_image(post_update: PostUpdate) -> PlainTextResponse:
    """
    Generate retina version of a post's feature image if one doesn't exist.

    :param PostUpdate post_update: Incoming payload for an updated Ghost post.

    :returns: PlainTextResponse
    """
    new_images = []
    post = post_update.post.current
    feature_image = post.feature_image
    title = post.title
    if feature_image:
        new_images.append(images.create_retina_image(feature_image))
        new_images.append(images.create_mobile_image(feature_image))
        new_images = [image for image in new_images if image is not None]
        if bool(new_images):
            LOGGER.info(
                f"Generated {len(new_images)} images for post `{title}`: {new_images}"
            )
            return PlainTextResponse(f"{post.title}: {new_images}")
        return PlainTextResponse(
            content=f"Retina & mobile images already exist for {post.title}.")
    return PlainTextResponse(
        content=f"Post `{post.slug}` ignored; no image exists for optimization."
    )
Пример #9
0
 def _https_session(self) -> None:
     """Authorize HTTPS session with Ghost admin."""
     endpoint = f"{self.admin_api_url}/session/"
     headers = {"Authorization": self.session_token}
     resp = requests.post(endpoint, headers=headers)
     LOGGER.info(
         f"Authorization resulted in status code {resp.status_code}.")
Пример #10
0
async def save_user_search_queries() -> JSONResponse:
    """
    Save top search analytics for the current week.

    :returns: JSONResponse
    """
    weekly_searches = persist_algolia_searches(settings.ALGOLIA_TABLE_WEEKLY,
                                               7)
    monthly_searches = persist_algolia_searches(settings.ALGOLIA_TABLE_MONTHLY,
                                                90)
    if weekly_searches is None or monthly_searches is None:
        HTTPException(500, "Unexpected error when saving search query data.")
    LOGGER.success(
        f"Inserted {len(weekly_searches)} rows into `{settings.ALGOLIA_TABLE_WEEKLY}`, \
            {len(monthly_searches)} into `{settings.ALGOLIA_TABLE_MONTHLY}`")
    return JSONResponse({
        "7-Day": {
            "count": len(weekly_searches),
            "rows": weekly_searches,
        },
        "90-Day": {
            "count": len(monthly_searches),
            "rows": monthly_searches,
        },
    })
Пример #11
0
 def run(self):
     for i in Processor.get_process_chains():
         func = getattr(self, i)
         try:
             func()
         except Func_timeout_error, e:
             LOGGER.warn(str(e) + self.request.url)
Пример #12
0
 def write(self):
     with open(self.filePath, 'w') as f:
         jsonEncoded = json.dumps(self.configuration, sort_keys=True, indent=4)
         f.write(jsonEncoded)
         f.flush()
         f.close()
         LOGGER.write(log.LOGTAGS[0],'Default config is written to "' + self.filePath + '"')           
Пример #13
0
def welcome_newsletter_subscriber(
    subscriber: Member, ) -> Optional[SubscriptionWelcomeEmail]:
    """
    Send welcome email to newsletter subscriber.

    :param Member subscriber: New Ghost member with newsletter subscription.

    :returns: Optional[SubscriptionWelcomeEmail]
    """
    body = {
        "from": settings.MAILGUN_FROM_SENDER,
        "to": [subscriber.email],
        "subject": settings.MAILGUN_SUBJECT_LINE,
        "template": settings.MAILGUN_NEWSLETTER_TEMPLATE,
        "h:X-Mailgun-Variables": {
            "name": subscriber.name
        },
        "o:tracking": True,
    }
    response = mailgun.send_email(body)
    if response.status_code != 200:
        LOGGER.error(f"Mailgun failed to send welcome email: {body}")
        return None
    return SubscriptionWelcomeEmail(
        from_email=settings.MAILGUN_PERSONAL_EMAIL,
        to_email=subscriber.email,
        subject=settings.MAILGUN_SUBJECT_LINE,
        template=settings.MAILGUN_NEWSLETTER_TEMPLATE,
    )
Пример #14
0
    def create_session(self, session_id, model_key):
        if session_id in self._SESSION_MAP:
            raise ValueError("session_id %d has already been created" %
                             (session_id))

        if model_key not in MODELS:
            raise ValueError(
                "%s is not a valid model, check the keys in models.json" %
                (model_key))

        worker = self._WORKER_POOL.get(
        )  # this will block until we have a free one
        port = get_free_tcp_port()
        MODELS[model_key].update({"gpu_id": worker["gpu_id"], "port": port})

        if worker["type"] == "local":
            gpu_id = worker["gpu_id"]
            process = self._spawn_local_worker(**MODELS[model_key])
            model = PytorchUNet(MODELS[model_key]["fn"], gpu_id,
                                MODELS[model_key]["inputShape"])
            session = Session(session_id, model)
            self._SESSION_MAP[session_id] = session
            self._SESSION_INFO[session_id] = {
                "worker": worker,
                "process": process
            }
            LOGGER.info("Created a local worker for (%s) on GPU %d" %
                        (session_id, gpu_id))

        elif worker["type"] == "remote":
            raise NotImplementedError("Remote workers aren't implemented yet")
        else:
            raise ValueError("Worker type %s isn't recognized" %
                             (worker["type"]))
Пример #15
0
def new_ghost_subscription(
        user: NetlifyAccount) -> Optional[Dict[str, List[Dict]]]:
    """
    Create Ghost member from Netlify identity signup.

    :param NetlifyAccount user: New user account from Netlify auth.

    :returns: Optional[str, Dict[List[Dict]]]
    """
    body = {
        "accounts": [{
            "name": user.user_metadata.full_name,
            "email": user.email,
            "note": "Subscribed from Netlify",
            "subscribed": True,
            "comped": False,
            "labels": user.user_metadata.roles,
        }]
    }
    response, code = ghost.create_member(body)
    if code != 200:
        error_type = response["errors"][0]["type"]
        if error_type == "ValidationError":
            LOGGER.info(
                f"Skipped Ghost member creation for existing user: {user.user_metadata.full_name} <{user.email}>"
            )
    else:
        LOGGER.success(
            f"Created new Ghost member: {user.user_metadata.full_name} <{user.email}>"
        )
        return body
Пример #16
0
async def upvote_comment(upvote_request: UpvoteComment,
                         db: Session = Depends(get_db)):
    """
    Cast a user upvote for another user's comment.

    :param UpvoteComment upvote_request: User-generated request to upvote a comment.
    :param Session db: ORM Database session.
    """
    existing_vote = get_comment_upvote(db, upvote_request.user_id,
                                       upvote_request.comment_id)
    if upvote_request.vote and existing_vote is None:
        submit_comment_upvote(db, upvote_request.user_id,
                              upvote_request.comment_id)
        return upvote_request
    elif upvote_request.vote and existing_vote:
        LOGGER.warning(
            f"Upvote already submitted for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`."
        )
        raise HTTPException(
            status_code=400,
            detail=
            f"Upvote already submitted for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`.",
        )
    elif upvote_request.vote is False and existing_vote:
        remove_comment_upvote(db, upvote_request.user_id,
                              upvote_request.comment_id)
        return upvote_request
    LOGGER.warning(
        f"Can't delete non-existent upvote for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`."
    )
    raise HTTPException(
        status_code=400,
        detail=
        f"Can't delete non-existent upvote for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`.",
    )
Пример #17
0
    def purge_unwanted_images(self, folder: str) -> List[str]:
        """
        Delete images which have been compressed or generated multiple times.

        :param str folder: Directory to recursively apply image transformations.

        :returns: List[str]
        """
        images_purged = []
        LOGGER.info("Purging unwanted images...")
        substrings = [
            "@2x@2x",
            "_o",
            "psd",
            "?",
            "@2x-",
            "-1-1",
            "-1-2",
            ".webp",
            "_retina/_retina",
            "_retina/_mobile/",
        ]
        blobs = self.get(
            folder,
        )
        image_blob_names = [blob.name for blob in blobs]
        for image_blob_name in image_blob_names:
            if any(substr in image_blob_name for substr in substrings):
                self.bucket.delete_blob(image_blob_name)
                images_purged.append(image_blob_name)
                LOGGER.info(f"Deleted {image_blob_name}.")
        return images_purged
Пример #18
0
 def save_analysis(self):
     LOGGER.info(
         'Total multipart is: %s,redirect is: %s,request exception is: %s' %
         (len(MULTIPART), len(REDIRECT), len(REQUEST_ERROR)))
     self.save_multipart()
     self.save_redirect()
     self.save_request_exception()
Пример #19
0
def test_select_query(rdbms):
    posts_sql = fetch_sql_files("posts/selects")
    parsed_posts_sql = parse_sql_batch(posts_sql)
    query_result = rdbms.execute_query(parsed_posts_sql[0], "hackers_dev")
    assert len(posts_sql) > 0
    assert type(parsed_posts_sql[0]) == str
    assert type(query_result) == LegacyCursorResult
    LOGGER.debug(query_result.rowcount)
Пример #20
0
async def list_states() -> list:
    resp = await session.get(URLS.get('GET_STATES'), headers=headers)
    if resp.status == 200:
        states_string = json.loads(await resp.text())
        return states_string.get('states')
    else:
        LOGGER.debug(f"{resp.status}:  {await resp.text()}")
        return []
Пример #21
0
 def _get_ssh_key(self):
     """ Fetch locally stored SSH key."""
     try:
         self.ssh_key = RSAKey.from_private_key_file(self.ssh_key_filepath)
         LOGGER.info(f"Found SSH key at self {self.ssh_key_filepath}")
         return self.ssh_key
     except SSHException as e:
         LOGGER.error(e)
Пример #22
0
 def function_timer(*args, **kwargs):
     LOGGER.info("Start running {0} ...".format(fn.__name__))
     t0 = time.time()
     result = fn(*args, **kwargs)
     t1 = time.time()
     LOGGER.info("Total time running {0}: {1} seconds".format(
         fn.__name__, round(t1 - t0, 3)))
     return result
Пример #23
0
async def member_unsubscribe(subscriber: Subscriber):
    """
    Log user unsubscribe events.

    :param Subscriber subscriber: Current Ghost newsletter subscriber.
    """
    subscriber = subscriber.previous
    LOGGER.info(f"`{subscriber.name}` unsubscribed from newsletter.")
Пример #24
0
 def _remove_repeat_blobs(self, image_blobs):
     images_purged = []
     r = re.compile("-[0-9]-[0-9]@2x.jpg")
     repeat_blobs = list(filter(r.match, image_blobs))
     for repeat_blob in repeat_blobs:
         self.bucket.delete_blob(repeat_blob)
         images_purged.append(repeat_blob)
         LOGGER.info(f"Deleted {repeat_blob}")
Пример #25
0
 def run(self, version):
     self.VERSION = version
     LOGGER.debug('Running setup')
     self.setup()
     with open('./lib/bot/token.0', 'r', encoding="utf-8") as tf:
         self.TOKEN = tf.read().strip()
     LOGGER.info("Running bot")
     super().run(self.TOKEN, reconnect=True)
Пример #26
0
 def _upload_ssh_key(self):
     try:
         system(
             f"ssh-copy-id -i {self.ssh_key_filepath}.pub {self.user}@{self.host}>/dev/null 2>&1"
         )
         LOGGER.info(f"{self.ssh_key_filepath} uploaded to {self.host}")
     except FileNotFoundError as error:
         LOGGER.error(error)
Пример #27
0
async def list_dist(state: str) -> list:
    url = URLS.get("GET_DIST").replace("{state_id}", state)
    resp = await session.get(url=url, headers=headers)
    if resp.status == 200:
        dist_string = json.loads(await resp.text())
        return dist_string.get('districts')
    else:
        LOGGER.debug(f"{resp.status}:  {await resp.text()}")
        return []
Пример #28
0
def log_info(fmt, *args):
    """deprecated"""
    warnings.warn("log_info is deprecated, use LOGGER.info instead",
                  DeprecationWarning,
                  stacklevel=2)
    if args and len(args) > 0:
        LOGGER.info(fmt.format(*args))
    else:
        LOGGER.info(fmt)
Пример #29
0
def get_domain_from_url(url):
    """get domain from url"""
    domain = ''
    # url is http://a.b.com/ads/asds
    if re.search(r'://.*?/', url):
        try:
            domain = url.split('//', 1)[1].split('/', 1)[0]
        except IndexError, e:
            LOGGER.warn('Get domain error,%s,%s' % (url, e))
Пример #30
0
async def github_pr(request: Request) -> JSONResponse:
    """
    Send SMS and Discord notifications upon PR creation in HackersAndSlackers Github projects.

    :param Request request: Incoming Github payload for newly opened PR.

    :returns: JSONResponse
    """
    payload = await request.json()
    action = payload.get("action")
    user = payload["sender"].get("login")
    pull_request = payload["pull_request"]
    repo = payload["repository"]
    if user in (settings.GITHUB_USERNAME, "dependabot-preview[bot]",
                "renovate[bot]"):
        return JSONResponse({
            "pr": {
                "id": pull_request["number"],
                "time": get_current_time(),
                "status": "ignored",
                "trigger": {
                    "type": "github",
                    "repo": repo["full_name"],
                    "title": pull_request["title"],
                    "user": user,
                    "action": action,
                },
            }
        })
    message = f'PR {action} for `{repo["name"]}`: \n \
     {pull_request["title"]}  \
     {pull_request["body"]} \
     {pull_request["url"]}'

    sms_message = sms.send_message(message)
    LOGGER.info(f"Github PR {action} for {repo['name']} generated SMS message")
    return JSONResponse({
        "pr": {
            "id": pull_request["number"],
            "time": get_current_time(),
            "status": sms_message.status,
            "trigger": {
                "type": "github",
                "repo": repo["full_name"],
                "title": pull_request["title"],
                "user": user,
                "action": action,
            },
        },
        "sms": {
            "phone_recipient": sms_message.to,
            "phone_sender": sms_message.from_,
            "date_sent": sms_message.date_sent,
            "message": sms_message.body,
        },
    })
Пример #31
0
 def denyStation(self, stationIdentifier):
     LOGGER.write(log.LOGTAGS[0],'Deny Station "'+ stationIdentifier +'"', 'to read "' + self.filePath + '"')
     if not stationIdentifier in self.allowedStations:
         LOGGER.write(log.LOGTAGS[1],'Station "'+ stationIdentifier +'"', 'already denied.')
         self.lastChange = False
         return False
     del self.allowedStations[self.getStationPosition(stationIdentifier)]
     self.updateInformation('allowedStations', self.allowedStations)
     self.lastChange = True
     return True
Пример #32
0
 def allowStation(self, stationIdentifier):
     LOGGER.write(log.LOGTAGS[0],'Allow Station "'+ stationIdentifier +'"', 'to read "' + self.filePath + '"')
     if stationIdentifier in self.allowedStations: 
         LOGGER.write(log.LOGTAGS[1],'Station "'+ stationIdentifier +'"', 'already allowed.')
         self.lastChange = False
         return False
     self.allowedStations.append(stationIdentifier)
     self.updateInformation('allowedStations', self.allowedStations)
     self.lastChange = True
     return True
Пример #33
0
async def update_tags_metadata() -> JSONResponse:
    """
    Enrich tag metadata upon update.

    :returns: JSONResponse
    """
    tag_update_queries = collect_sql_queries("tags")
    update_results = rdbms.execute_queries(tag_update_queries, "hackers_dev")
    LOGGER.success(f"Updated tags metadata: {update_results}")
    return JSONResponse(update_results, status_code=200)
Пример #34
0
async def check_dist(dist_id: str, date: str) -> list:
    url = URLS.get("CHECK_DISTRICT").replace("{dist_id}",
                                             dist_id).replace("{date}", date)
    resp = await session.get(url=url, headers=headers)
    if resp.status == 200:
        print(await resp.text())
        dist_string = json.loads(await resp.text())
        return dist_string.get('centers')
    else:
        LOGGER.debug(f"{resp.status}:  {await resp.text()}")
        return []
Пример #35
0
def get_sample_results_by_naive_bayes(classifier, tags_info, words_info):
    """ This generator will read the test samples and
        return a sequence of predicted results.  """
    LOGGER.debug("Creating the naive bayes classifier...")
    classifier = create_classifier(tags_info, words_info)

    # Getting the test samples
    LOGGER.debug("Start to process the samples")
    test_samples = get_test_samples()

    # Start to process the test samples
    for line in test_samples:
        LOGGER.debug("Processing sample...")

        # -- Parse the records
        # Each line of the sample is make up by three parts
        #   - id
        #   - words
        #   - tags
        # A typical line will look like this:
        #   question_id;word_id1:count<tag>word_id2:count;tag_id1<tab>tag_id2
        segments = line[:-1].split(";")
        words = (to_ints(elem.split(":")) for elem in segments[1].split())
        words = dict(elem for elem in words if (elem[0] in words_info))
        tags = to_ints(segments[2].split(), lambda t: t in tags_info)

        # -- Classifying
        LOGGER.debug("Classifying sample %s..." % segments[0])
        tags_with_score = classifier.classify(words)

        yield tags, tags_with_score
Пример #36
0
def quit(noError=True, **keyWordArgs):
    try: 
        logTagID = keyWordArgs['logTag']
    except KeyError:
        logTagID = 2
    try: 
        message = keyWordArgs['message']
    except KeyError:
        message = 'unknown error!'       

    if noError == False:
        LOGGER.write(log.LOGTAGS[logTagID],message)
        sys.exit(1)
    LOGGER.write(log.LOGTAGS[0],'Exiting without error')
    sys.exit()
Пример #37
0
    def __init__(self, RequestHandlerClass, configuration):
        listenaddress = configuration["listen"]
        listenport = configuration["port"]
        x509cert = configuration["cert"]
        x509key = configuration["key"]

        LOGGER.write(LOGTAGS[0], "Starting SockServer on " + listenaddress + ":" + str(listenport) + " TCP")
        SocketServer.BaseServer.__init__(self, (listenaddress, listenport), RequestHandlerClass)
        ctx = SSL.Context(SSL.SSLv23_METHOD)
        ctx.use_certificate_file(x509cert)
        ctx.use_privatekey_file(x509key)

        self.socket = SSL.Connection(ctx, socket.socket(self.address_family, self.socket_type))
        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.server_bind()
        self.server_activate()
Пример #38
0
def createDirectories(*dirs):
    for path in dirs:
        LOGGER.write(log.LOGTAGS[0],'Try to create directory "' + path + '"')
        try:
            os.makedirs(path, 0664)
            LOGGER.write(log.LOGTAGS[0],'Directory "' + path + '" created.')
        except OSError as error:
            errorCode, errorMessage = getOSErrorMessage(error)
            logTagID = errorCodeToLogTagID(errorCode)
            LOGGER.write(log.LOGTAGS[logTagID],'Not able to create directory "' + path + '":',errorMessage+'!')
Пример #39
0
 def getConfigFromFile(self, filePath):
     if not os.path.isfile(filePath):
         LOGGER.write(log.LOGTAGS[1],'Configfile "' + filePath + '" does not exists! Will use default config.')
         return {}
     if not isJsonFile(filePath):
         LOGGER.write(log.LOGTAGS[1],'Configfile "' + filePath + '" is no valid json file! Will use default config.')
         return {}
     with open(filePath, 'r') as f:
         LOGGER.write(log.LOGTAGS[0],'Configfile "' + filePath + '" is valid.')
         return json.load(f)
Пример #40
0
def initiateParameterAndConfig():
    parameters = getParameters()
    configFilePath = getParameter('config', 'sharelockhomes.conf')
    logFilePath = getParameter('log')
    if not logFilePath == False:
        LOGGER.activateFileMode(logFilePath)
    LOGGER.write(log.LOGTAGS[0],'Try to use config from file "' + configFilePath + '"')
    configuration = Config(configFilePath)
    logging = getConfigValue(configuration, 'logging')
    logFilePath = getConfigValue(configuration, 'logFilePath', 'sharelockhomes.log')
    if logging == True and LOGGER.writeToFile == False:
        LOGGER.activateFileMode(logFilePath)
    dbPathParameter = getParameter('db')
    dbPathConfig = getConfigValue(configuration, 'databasePath','db')
    dbPath = useParameterIfExistsElseUseConfig(dbPathParameter, dbPathConfig)
    createDirectories(dbPath)      
    return configuration 
Пример #41
0
                LOGGER.debug(log_message)

        except Exception as e:
            LOGGER.error("Error occurs %s" % (str(e)))

    evaluations = []
    for name, evaluator in settings["evaluators"].items():
        evaluation = evaluator.get_evaluation()
        LOGGER.info("%s Precision: %f\t Recall: %f" % (name, evaluation[0], evaluation[1]))
        evaluations.append(evaluation)
    return evaluations


if __name__ == "__main__":
    # Reading the words debug and tags debug from files
    LOGGER.debug("Reading tags and words...")
    tags_info, words_info = prediction.read_tags_and_words()
    LOGGER.debug("Read %d tags and %d words" % (len(tags_info), len(words_info)))

    EXPERIMENT_CONFIG = {
        "classifier": "naive_bayes",
        "evaluator_file": "../../data/stat",
        "predicted_tag_count": [3, 5, 10, 15, 20, 25],
        "tags_info": tags_info,
        "words_info": words_info,
        "should_rerank": False,
        "rounds": 5,
        "sample_count": 10,
        "NAME": "knn.100.stat",
        "is_from_classifier": False
    }
Пример #42
0
def run_experiment(predicted_results, settings, limit, predicted_tag_count):
    """ Run the experiment with configuration """
    tags_info = settings["tags_info"]
    sample_count = config.CLASSIFIER["sample_count"]
    # predicted_tag_count = settings["predicted_tag_count"]
    LOGGER.debug("Sample count: %d" % sample_count)
    LOGGER.debug("Max predicted tag count: %d" % predicted_tag_count)

    get_similarity = settings["get_similarity"]

    # run the test
    for index, predict_result in enumerate(predicted_results):
        if index > limit:
            break
        try:
            LOGGER.debug("%d/%d sample" % (index, sample_count))
            orignal, scored_predicted = predict_result
            # TODO: HARD CODED Code again.
            if settings["should_rerank"]:
                scored_predicted = rerank_tags(scored_predicted[:30], get_similarity)
            scored_predicted = scored_predicted[:predicted_tag_count]

            predicted = [t for t, s in scored_predicted]

            # TODO: SOME PROBLEM may raise here
            predicted = predicted[:predicted_tag_count]

            for name, evaluator in settings["evaluators"].items():
                evaluation = evaluator.update(orignal, predicted)
                log_message = "\nOriginal Result: %s\n"\
                              "Predicted Result: %s\n"\
                              "Evaluator Type: %s\n"\
                              "\tPrecision: %f\n"\
                              "\tRecall: %f\n" % (
                                    str(to_named_tags(orignal, tags_info)),
                                    str(to_named_tags(predicted, tags_info)),
                                        name, evaluation[0], evaluation[1])
                LOGGER.debug(log_message)

        except Exception as e:
            LOGGER.error("Error occurs %s" % (str(e)))

    evaluations = []
    for name, evaluator in settings["evaluators"].items():
        evaluation = evaluator.get_evaluation()
        LOGGER.info("%s Precision: %f\t Recall: %f" % (name, evaluation[0], evaluation[1]))
        evaluations.append(evaluation)
    return evaluations
Пример #43
0
 def center(self, num):
     return self._text.center(num).encode(LOGGER.getLocale())
Пример #44
0
 def full(self, num):
     length = len(self._text)
     text = self._text + (' '*(num-length))
     return text.encode(LOGGER.getLocale())
Пример #45
0
 def part(self, start=0, end = None):
     if end == None:
         text = self._text[start:]
     else:
         text = self._text[start:end]
     return text.encode(LOGGER.getLocale())
Пример #46
0
    def __init__(self, basket_info_file = "",
                 train_data_file = "", support = 10):
        """ ***Note***: If both basket_info_file and train_data_file
                        exist, ignore the "train_data_file". """
        evaluator.Evaluator.__init__(self)

        # -- Create basket info from test data
        if not os.path.exists(basket_info_file):
            LOGGER.info("Basket info file %s not found" % basket_info_file)
            LOGGER.info("Get basket form training data...")
            baskets = _create_baskets(train_data_file)
            tags_info = analyse_baskets(baskets)
            LOGGER.info(
                "Writing back the basket info to " + basket_info_file)
            # Save the tags info to the disk
            pickle.dump(tags_info, open(basket_info_file, "wb"))
        else:
            LOGGER.info("basket info file %s found" % basket_info_file)
            LOGGER.info("Loading the basket_info_file ...")
            # Read the tag info from the file
            tags_info = pickle.load(open(basket_info_file, "rb"))
            LOGGER.info("Basket info read!")

        self.tag_counts, self.cooccurrences, self.total_count = tags_info
        self.support = support
Пример #47
0
def create_classifier(all_tags, all_words):
    LOGGER.debug("Creating classifier ...")
    conf = config.INPUT
    base_path = conf["base_path"]
    model_path = os.path.join(base_path, "bayes.model")

    # Create classifier from scratch or from already persisted model
    if not config.CLASSIFIER["retrain_model"] and os.path.exists(model_path):
        LOGGER.debug("Creating classifier from file ...")
        classifier = persistence.load_model(model_path)
        LOGGER.debug("Reading completed.")
    else:
        LOGGER.debug("Creating empty classifier ...")
        classifier = make_classifier_from_config(all_tags, all_words)
        LOGGER.debug("Traing completed.")

    if config.CLASSIFIER["retrain_model"]:
        LOGGER.debug("Writing the model to %s ..." % model_path)
        persistence.save_model(classifier, model_path)
        LOGGER.debug("Writing model completed.")
    return classifier
Пример #48
0
 def onscreen(self):
     return self._text.encode(LOGGER.getLocale())