def save_traffic(traffic_obj_list, id, piece=3000): """ :param traffic_obj_list: :param id: task id :param piece: default 3000 per piece :return: """ traffic_path = Engine.get_traffic_path(id) if len(traffic_obj_list) > 0: saved_traffic_list = [i for i in traffic_obj_list] # slice traffic if too large if len(saved_traffic_list) > piece: traffic_divided_path = [] traffic_divided = divide_list(saved_traffic_list, piece) for i in range(len(traffic_divided)): traffic_divided_path.append(traffic_path + str(i)) with open(traffic_path + str(i), 'w') as traffic_f: cPickle.dump(traffic_divided[i], traffic_f) LOGGER.info('Traffic of %s has been divided and saved to %s.' % (id, ','.join(traffic_divided_path))) else: with open(traffic_path, 'w') as traffic_f: cPickle.dump(saved_traffic_list, traffic_f) LOGGER.info('Traffic of %s has been saved to %s.' % (id, traffic_path))
def init_script(): """Batch download all Snapchat memories.""" for media_type in SNAPCHAT_MEDIA_URLS.keys(): if SNAPCHAT_MEDIA_URLS[media_type] is None: parse_and_decode_urls(media_type) download_snapchat_memories(SNAPCHAT_MEDIA_URLS[media_type], media_type) LOGGER.success(f"Completed downloading all Snapchat memories.")
def waitForCtrlC(): try: while True: pass except KeyboardInterrupt: LOGGER.write(log.LOGTAGS[0],'ShareLockHomes','shutting down...') quit(False, logTag=0, message='Exiting by user.')
def get_author(self, author_id: int) -> Optional[List[str]]: """ Fetch single Ghost author. :param int author_id: ID of Ghost author to fetch. :returns: Optional[List[str]] """ try: params = {"key": self.content_api_key} headers = { "Content-Type": "application/json", } resp = requests.get( f"{self.content_api_url}/authors/{author_id}/", params=params, headers=headers, ) if resp.status_code == 200: return resp.json()["authors"] except HTTPError as e: LOGGER.error( f"Failed to fetch Ghost authorID={author_id}: {e.response.content}" ) except KeyError as e: LOGGER.error( f"KeyError while fetching Ghost authorID={author_id}: {e}")
def get_pages(self) -> Optional[dict]: """ Fetch Ghost pages. :returns: Optional[dict] """ try: headers = { "Authorization": f"Ghost {self.session_token}", "Content-Type": "application/json", } endpoint = f"{self.admin_api_url}/pages" resp = requests.get(endpoint, headers=headers) if resp.json().get("errors") is not None: LOGGER.error( f"Failed to fetch Ghost pages: {resp.json().get('errors')[0]['message']}" ) return None post = resp.json()["pages"] LOGGER.info(f"Fetched Ghost pages` ({endpoint})") return post except HTTPError as e: LOGGER.error(f"Ghost HTTPError while fetching pages: {e}") except KeyError as e: LOGGER.error(f"KeyError for `{e}` occurred while fetching pages") except Exception as e: LOGGER.error( f"Unexpected error occurred while fetching pages: {e}")
async def bulk_transform_images(directory: Optional[str] = Query( default=None, title="directory", description= "Subdirectory of remote CDN to transverse and transform images.", max_length=50, )) -> JSONResponse: """ Apply transformations to images uploaded within the current month. Optionally accepts a `directory` parameter to override image directory. :param Optional[str] directory: Remote directory to recursively fetch images and apply transformations. :returns: JSONResponse """ if directory is None: directory = settings.GCP_BUCKET_FOLDER transformed_images = { "purged": images.purge_unwanted_images(directory), "retina": images.retina_transformations(directory), "mobile": images.mobile_transformations(directory), # "standard": gcs.standard_transformations(directory), } response = [] for k, v in transformed_images.items(): if v is not None: response.append(f"{len(v)} {k}") else: response.append(f"0 {k}") LOGGER.success(f"Transformed {', '.join(response)} images") return JSONResponse(transformed_images)
def get_json_backup(self) -> Optional[dict]: """ Download JSON snapshot of Ghost database. Optional[dict] """ self._https_session() headers = { "Authorization": self.session_token, "accept": "text/html,application/xhtml+xml,application/xml;\ q=0.9,image/webp,image/apng,*/*;\ q=0.8,application/signed-exchange;\ v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", "Origin": "hackersandslackers.com", "Authority": "hackersandslackers.com", } endpoint = f"{self.admin_api_url}/db/" try: resp = requests.get(endpoint, headers=headers) return resp.json() except HTTPError as e: LOGGER.error(f"HTTPError occurred while fetching JSON backup: {e}") except Exception as e: LOGGER.error( f"Unexpected error occurred while fetching JSON backup: {e}")
async def optimize_post_image(post_update: PostUpdate) -> PlainTextResponse: """ Generate retina version of a post's feature image if one doesn't exist. :param PostUpdate post_update: Incoming payload for an updated Ghost post. :returns: PlainTextResponse """ new_images = [] post = post_update.post.current feature_image = post.feature_image title = post.title if feature_image: new_images.append(images.create_retina_image(feature_image)) new_images.append(images.create_mobile_image(feature_image)) new_images = [image for image in new_images if image is not None] if bool(new_images): LOGGER.info( f"Generated {len(new_images)} images for post `{title}`: {new_images}" ) return PlainTextResponse(f"{post.title}: {new_images}") return PlainTextResponse( content=f"Retina & mobile images already exist for {post.title}.") return PlainTextResponse( content=f"Post `{post.slug}` ignored; no image exists for optimization." )
def _https_session(self) -> None: """Authorize HTTPS session with Ghost admin.""" endpoint = f"{self.admin_api_url}/session/" headers = {"Authorization": self.session_token} resp = requests.post(endpoint, headers=headers) LOGGER.info( f"Authorization resulted in status code {resp.status_code}.")
async def save_user_search_queries() -> JSONResponse: """ Save top search analytics for the current week. :returns: JSONResponse """ weekly_searches = persist_algolia_searches(settings.ALGOLIA_TABLE_WEEKLY, 7) monthly_searches = persist_algolia_searches(settings.ALGOLIA_TABLE_MONTHLY, 90) if weekly_searches is None or monthly_searches is None: HTTPException(500, "Unexpected error when saving search query data.") LOGGER.success( f"Inserted {len(weekly_searches)} rows into `{settings.ALGOLIA_TABLE_WEEKLY}`, \ {len(monthly_searches)} into `{settings.ALGOLIA_TABLE_MONTHLY}`") return JSONResponse({ "7-Day": { "count": len(weekly_searches), "rows": weekly_searches, }, "90-Day": { "count": len(monthly_searches), "rows": monthly_searches, }, })
def run(self): for i in Processor.get_process_chains(): func = getattr(self, i) try: func() except Func_timeout_error, e: LOGGER.warn(str(e) + self.request.url)
def write(self): with open(self.filePath, 'w') as f: jsonEncoded = json.dumps(self.configuration, sort_keys=True, indent=4) f.write(jsonEncoded) f.flush() f.close() LOGGER.write(log.LOGTAGS[0],'Default config is written to "' + self.filePath + '"')
def welcome_newsletter_subscriber( subscriber: Member, ) -> Optional[SubscriptionWelcomeEmail]: """ Send welcome email to newsletter subscriber. :param Member subscriber: New Ghost member with newsletter subscription. :returns: Optional[SubscriptionWelcomeEmail] """ body = { "from": settings.MAILGUN_FROM_SENDER, "to": [subscriber.email], "subject": settings.MAILGUN_SUBJECT_LINE, "template": settings.MAILGUN_NEWSLETTER_TEMPLATE, "h:X-Mailgun-Variables": { "name": subscriber.name }, "o:tracking": True, } response = mailgun.send_email(body) if response.status_code != 200: LOGGER.error(f"Mailgun failed to send welcome email: {body}") return None return SubscriptionWelcomeEmail( from_email=settings.MAILGUN_PERSONAL_EMAIL, to_email=subscriber.email, subject=settings.MAILGUN_SUBJECT_LINE, template=settings.MAILGUN_NEWSLETTER_TEMPLATE, )
def create_session(self, session_id, model_key): if session_id in self._SESSION_MAP: raise ValueError("session_id %d has already been created" % (session_id)) if model_key not in MODELS: raise ValueError( "%s is not a valid model, check the keys in models.json" % (model_key)) worker = self._WORKER_POOL.get( ) # this will block until we have a free one port = get_free_tcp_port() MODELS[model_key].update({"gpu_id": worker["gpu_id"], "port": port}) if worker["type"] == "local": gpu_id = worker["gpu_id"] process = self._spawn_local_worker(**MODELS[model_key]) model = PytorchUNet(MODELS[model_key]["fn"], gpu_id, MODELS[model_key]["inputShape"]) session = Session(session_id, model) self._SESSION_MAP[session_id] = session self._SESSION_INFO[session_id] = { "worker": worker, "process": process } LOGGER.info("Created a local worker for (%s) on GPU %d" % (session_id, gpu_id)) elif worker["type"] == "remote": raise NotImplementedError("Remote workers aren't implemented yet") else: raise ValueError("Worker type %s isn't recognized" % (worker["type"]))
def new_ghost_subscription( user: NetlifyAccount) -> Optional[Dict[str, List[Dict]]]: """ Create Ghost member from Netlify identity signup. :param NetlifyAccount user: New user account from Netlify auth. :returns: Optional[str, Dict[List[Dict]]] """ body = { "accounts": [{ "name": user.user_metadata.full_name, "email": user.email, "note": "Subscribed from Netlify", "subscribed": True, "comped": False, "labels": user.user_metadata.roles, }] } response, code = ghost.create_member(body) if code != 200: error_type = response["errors"][0]["type"] if error_type == "ValidationError": LOGGER.info( f"Skipped Ghost member creation for existing user: {user.user_metadata.full_name} <{user.email}>" ) else: LOGGER.success( f"Created new Ghost member: {user.user_metadata.full_name} <{user.email}>" ) return body
async def upvote_comment(upvote_request: UpvoteComment, db: Session = Depends(get_db)): """ Cast a user upvote for another user's comment. :param UpvoteComment upvote_request: User-generated request to upvote a comment. :param Session db: ORM Database session. """ existing_vote = get_comment_upvote(db, upvote_request.user_id, upvote_request.comment_id) if upvote_request.vote and existing_vote is None: submit_comment_upvote(db, upvote_request.user_id, upvote_request.comment_id) return upvote_request elif upvote_request.vote and existing_vote: LOGGER.warning( f"Upvote already submitted for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`." ) raise HTTPException( status_code=400, detail= f"Upvote already submitted for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`.", ) elif upvote_request.vote is False and existing_vote: remove_comment_upvote(db, upvote_request.user_id, upvote_request.comment_id) return upvote_request LOGGER.warning( f"Can't delete non-existent upvote for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`." ) raise HTTPException( status_code=400, detail= f"Can't delete non-existent upvote for comment `{upvote_request.comment_id}` from user `{upvote_request.user_id}`.", )
def purge_unwanted_images(self, folder: str) -> List[str]: """ Delete images which have been compressed or generated multiple times. :param str folder: Directory to recursively apply image transformations. :returns: List[str] """ images_purged = [] LOGGER.info("Purging unwanted images...") substrings = [ "@2x@2x", "_o", "psd", "?", "@2x-", "-1-1", "-1-2", ".webp", "_retina/_retina", "_retina/_mobile/", ] blobs = self.get( folder, ) image_blob_names = [blob.name for blob in blobs] for image_blob_name in image_blob_names: if any(substr in image_blob_name for substr in substrings): self.bucket.delete_blob(image_blob_name) images_purged.append(image_blob_name) LOGGER.info(f"Deleted {image_blob_name}.") return images_purged
def save_analysis(self): LOGGER.info( 'Total multipart is: %s,redirect is: %s,request exception is: %s' % (len(MULTIPART), len(REDIRECT), len(REQUEST_ERROR))) self.save_multipart() self.save_redirect() self.save_request_exception()
def test_select_query(rdbms): posts_sql = fetch_sql_files("posts/selects") parsed_posts_sql = parse_sql_batch(posts_sql) query_result = rdbms.execute_query(parsed_posts_sql[0], "hackers_dev") assert len(posts_sql) > 0 assert type(parsed_posts_sql[0]) == str assert type(query_result) == LegacyCursorResult LOGGER.debug(query_result.rowcount)
async def list_states() -> list: resp = await session.get(URLS.get('GET_STATES'), headers=headers) if resp.status == 200: states_string = json.loads(await resp.text()) return states_string.get('states') else: LOGGER.debug(f"{resp.status}: {await resp.text()}") return []
def _get_ssh_key(self): """ Fetch locally stored SSH key.""" try: self.ssh_key = RSAKey.from_private_key_file(self.ssh_key_filepath) LOGGER.info(f"Found SSH key at self {self.ssh_key_filepath}") return self.ssh_key except SSHException as e: LOGGER.error(e)
def function_timer(*args, **kwargs): LOGGER.info("Start running {0} ...".format(fn.__name__)) t0 = time.time() result = fn(*args, **kwargs) t1 = time.time() LOGGER.info("Total time running {0}: {1} seconds".format( fn.__name__, round(t1 - t0, 3))) return result
async def member_unsubscribe(subscriber: Subscriber): """ Log user unsubscribe events. :param Subscriber subscriber: Current Ghost newsletter subscriber. """ subscriber = subscriber.previous LOGGER.info(f"`{subscriber.name}` unsubscribed from newsletter.")
def _remove_repeat_blobs(self, image_blobs): images_purged = [] r = re.compile("-[0-9]-[0-9]@2x.jpg") repeat_blobs = list(filter(r.match, image_blobs)) for repeat_blob in repeat_blobs: self.bucket.delete_blob(repeat_blob) images_purged.append(repeat_blob) LOGGER.info(f"Deleted {repeat_blob}")
def run(self, version): self.VERSION = version LOGGER.debug('Running setup') self.setup() with open('./lib/bot/token.0', 'r', encoding="utf-8") as tf: self.TOKEN = tf.read().strip() LOGGER.info("Running bot") super().run(self.TOKEN, reconnect=True)
def _upload_ssh_key(self): try: system( f"ssh-copy-id -i {self.ssh_key_filepath}.pub {self.user}@{self.host}>/dev/null 2>&1" ) LOGGER.info(f"{self.ssh_key_filepath} uploaded to {self.host}") except FileNotFoundError as error: LOGGER.error(error)
async def list_dist(state: str) -> list: url = URLS.get("GET_DIST").replace("{state_id}", state) resp = await session.get(url=url, headers=headers) if resp.status == 200: dist_string = json.loads(await resp.text()) return dist_string.get('districts') else: LOGGER.debug(f"{resp.status}: {await resp.text()}") return []
def log_info(fmt, *args): """deprecated""" warnings.warn("log_info is deprecated, use LOGGER.info instead", DeprecationWarning, stacklevel=2) if args and len(args) > 0: LOGGER.info(fmt.format(*args)) else: LOGGER.info(fmt)
def get_domain_from_url(url): """get domain from url""" domain = '' # url is http://a.b.com/ads/asds if re.search(r'://.*?/', url): try: domain = url.split('//', 1)[1].split('/', 1)[0] except IndexError, e: LOGGER.warn('Get domain error,%s,%s' % (url, e))
async def github_pr(request: Request) -> JSONResponse: """ Send SMS and Discord notifications upon PR creation in HackersAndSlackers Github projects. :param Request request: Incoming Github payload for newly opened PR. :returns: JSONResponse """ payload = await request.json() action = payload.get("action") user = payload["sender"].get("login") pull_request = payload["pull_request"] repo = payload["repository"] if user in (settings.GITHUB_USERNAME, "dependabot-preview[bot]", "renovate[bot]"): return JSONResponse({ "pr": { "id": pull_request["number"], "time": get_current_time(), "status": "ignored", "trigger": { "type": "github", "repo": repo["full_name"], "title": pull_request["title"], "user": user, "action": action, }, } }) message = f'PR {action} for `{repo["name"]}`: \n \ {pull_request["title"]} \ {pull_request["body"]} \ {pull_request["url"]}' sms_message = sms.send_message(message) LOGGER.info(f"Github PR {action} for {repo['name']} generated SMS message") return JSONResponse({ "pr": { "id": pull_request["number"], "time": get_current_time(), "status": sms_message.status, "trigger": { "type": "github", "repo": repo["full_name"], "title": pull_request["title"], "user": user, "action": action, }, }, "sms": { "phone_recipient": sms_message.to, "phone_sender": sms_message.from_, "date_sent": sms_message.date_sent, "message": sms_message.body, }, })
def denyStation(self, stationIdentifier): LOGGER.write(log.LOGTAGS[0],'Deny Station "'+ stationIdentifier +'"', 'to read "' + self.filePath + '"') if not stationIdentifier in self.allowedStations: LOGGER.write(log.LOGTAGS[1],'Station "'+ stationIdentifier +'"', 'already denied.') self.lastChange = False return False del self.allowedStations[self.getStationPosition(stationIdentifier)] self.updateInformation('allowedStations', self.allowedStations) self.lastChange = True return True
def allowStation(self, stationIdentifier): LOGGER.write(log.LOGTAGS[0],'Allow Station "'+ stationIdentifier +'"', 'to read "' + self.filePath + '"') if stationIdentifier in self.allowedStations: LOGGER.write(log.LOGTAGS[1],'Station "'+ stationIdentifier +'"', 'already allowed.') self.lastChange = False return False self.allowedStations.append(stationIdentifier) self.updateInformation('allowedStations', self.allowedStations) self.lastChange = True return True
async def update_tags_metadata() -> JSONResponse: """ Enrich tag metadata upon update. :returns: JSONResponse """ tag_update_queries = collect_sql_queries("tags") update_results = rdbms.execute_queries(tag_update_queries, "hackers_dev") LOGGER.success(f"Updated tags metadata: {update_results}") return JSONResponse(update_results, status_code=200)
async def check_dist(dist_id: str, date: str) -> list: url = URLS.get("CHECK_DISTRICT").replace("{dist_id}", dist_id).replace("{date}", date) resp = await session.get(url=url, headers=headers) if resp.status == 200: print(await resp.text()) dist_string = json.loads(await resp.text()) return dist_string.get('centers') else: LOGGER.debug(f"{resp.status}: {await resp.text()}") return []
def get_sample_results_by_naive_bayes(classifier, tags_info, words_info): """ This generator will read the test samples and return a sequence of predicted results. """ LOGGER.debug("Creating the naive bayes classifier...") classifier = create_classifier(tags_info, words_info) # Getting the test samples LOGGER.debug("Start to process the samples") test_samples = get_test_samples() # Start to process the test samples for line in test_samples: LOGGER.debug("Processing sample...") # -- Parse the records # Each line of the sample is make up by three parts # - id # - words # - tags # A typical line will look like this: # question_id;word_id1:count<tag>word_id2:count;tag_id1<tab>tag_id2 segments = line[:-1].split(";") words = (to_ints(elem.split(":")) for elem in segments[1].split()) words = dict(elem for elem in words if (elem[0] in words_info)) tags = to_ints(segments[2].split(), lambda t: t in tags_info) # -- Classifying LOGGER.debug("Classifying sample %s..." % segments[0]) tags_with_score = classifier.classify(words) yield tags, tags_with_score
def quit(noError=True, **keyWordArgs): try: logTagID = keyWordArgs['logTag'] except KeyError: logTagID = 2 try: message = keyWordArgs['message'] except KeyError: message = 'unknown error!' if noError == False: LOGGER.write(log.LOGTAGS[logTagID],message) sys.exit(1) LOGGER.write(log.LOGTAGS[0],'Exiting without error') sys.exit()
def __init__(self, RequestHandlerClass, configuration): listenaddress = configuration["listen"] listenport = configuration["port"] x509cert = configuration["cert"] x509key = configuration["key"] LOGGER.write(LOGTAGS[0], "Starting SockServer on " + listenaddress + ":" + str(listenport) + " TCP") SocketServer.BaseServer.__init__(self, (listenaddress, listenport), RequestHandlerClass) ctx = SSL.Context(SSL.SSLv23_METHOD) ctx.use_certificate_file(x509cert) ctx.use_privatekey_file(x509key) self.socket = SSL.Connection(ctx, socket.socket(self.address_family, self.socket_type)) self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.server_bind() self.server_activate()
def createDirectories(*dirs): for path in dirs: LOGGER.write(log.LOGTAGS[0],'Try to create directory "' + path + '"') try: os.makedirs(path, 0664) LOGGER.write(log.LOGTAGS[0],'Directory "' + path + '" created.') except OSError as error: errorCode, errorMessage = getOSErrorMessage(error) logTagID = errorCodeToLogTagID(errorCode) LOGGER.write(log.LOGTAGS[logTagID],'Not able to create directory "' + path + '":',errorMessage+'!')
def getConfigFromFile(self, filePath): if not os.path.isfile(filePath): LOGGER.write(log.LOGTAGS[1],'Configfile "' + filePath + '" does not exists! Will use default config.') return {} if not isJsonFile(filePath): LOGGER.write(log.LOGTAGS[1],'Configfile "' + filePath + '" is no valid json file! Will use default config.') return {} with open(filePath, 'r') as f: LOGGER.write(log.LOGTAGS[0],'Configfile "' + filePath + '" is valid.') return json.load(f)
def initiateParameterAndConfig(): parameters = getParameters() configFilePath = getParameter('config', 'sharelockhomes.conf') logFilePath = getParameter('log') if not logFilePath == False: LOGGER.activateFileMode(logFilePath) LOGGER.write(log.LOGTAGS[0],'Try to use config from file "' + configFilePath + '"') configuration = Config(configFilePath) logging = getConfigValue(configuration, 'logging') logFilePath = getConfigValue(configuration, 'logFilePath', 'sharelockhomes.log') if logging == True and LOGGER.writeToFile == False: LOGGER.activateFileMode(logFilePath) dbPathParameter = getParameter('db') dbPathConfig = getConfigValue(configuration, 'databasePath','db') dbPath = useParameterIfExistsElseUseConfig(dbPathParameter, dbPathConfig) createDirectories(dbPath) return configuration
LOGGER.debug(log_message) except Exception as e: LOGGER.error("Error occurs %s" % (str(e))) evaluations = [] for name, evaluator in settings["evaluators"].items(): evaluation = evaluator.get_evaluation() LOGGER.info("%s Precision: %f\t Recall: %f" % (name, evaluation[0], evaluation[1])) evaluations.append(evaluation) return evaluations if __name__ == "__main__": # Reading the words debug and tags debug from files LOGGER.debug("Reading tags and words...") tags_info, words_info = prediction.read_tags_and_words() LOGGER.debug("Read %d tags and %d words" % (len(tags_info), len(words_info))) EXPERIMENT_CONFIG = { "classifier": "naive_bayes", "evaluator_file": "../../data/stat", "predicted_tag_count": [3, 5, 10, 15, 20, 25], "tags_info": tags_info, "words_info": words_info, "should_rerank": False, "rounds": 5, "sample_count": 10, "NAME": "knn.100.stat", "is_from_classifier": False }
def run_experiment(predicted_results, settings, limit, predicted_tag_count): """ Run the experiment with configuration """ tags_info = settings["tags_info"] sample_count = config.CLASSIFIER["sample_count"] # predicted_tag_count = settings["predicted_tag_count"] LOGGER.debug("Sample count: %d" % sample_count) LOGGER.debug("Max predicted tag count: %d" % predicted_tag_count) get_similarity = settings["get_similarity"] # run the test for index, predict_result in enumerate(predicted_results): if index > limit: break try: LOGGER.debug("%d/%d sample" % (index, sample_count)) orignal, scored_predicted = predict_result # TODO: HARD CODED Code again. if settings["should_rerank"]: scored_predicted = rerank_tags(scored_predicted[:30], get_similarity) scored_predicted = scored_predicted[:predicted_tag_count] predicted = [t for t, s in scored_predicted] # TODO: SOME PROBLEM may raise here predicted = predicted[:predicted_tag_count] for name, evaluator in settings["evaluators"].items(): evaluation = evaluator.update(orignal, predicted) log_message = "\nOriginal Result: %s\n"\ "Predicted Result: %s\n"\ "Evaluator Type: %s\n"\ "\tPrecision: %f\n"\ "\tRecall: %f\n" % ( str(to_named_tags(orignal, tags_info)), str(to_named_tags(predicted, tags_info)), name, evaluation[0], evaluation[1]) LOGGER.debug(log_message) except Exception as e: LOGGER.error("Error occurs %s" % (str(e))) evaluations = [] for name, evaluator in settings["evaluators"].items(): evaluation = evaluator.get_evaluation() LOGGER.info("%s Precision: %f\t Recall: %f" % (name, evaluation[0], evaluation[1])) evaluations.append(evaluation) return evaluations
def center(self, num): return self._text.center(num).encode(LOGGER.getLocale())
def full(self, num): length = len(self._text) text = self._text + (' '*(num-length)) return text.encode(LOGGER.getLocale())
def part(self, start=0, end = None): if end == None: text = self._text[start:] else: text = self._text[start:end] return text.encode(LOGGER.getLocale())
def __init__(self, basket_info_file = "", train_data_file = "", support = 10): """ ***Note***: If both basket_info_file and train_data_file exist, ignore the "train_data_file". """ evaluator.Evaluator.__init__(self) # -- Create basket info from test data if not os.path.exists(basket_info_file): LOGGER.info("Basket info file %s not found" % basket_info_file) LOGGER.info("Get basket form training data...") baskets = _create_baskets(train_data_file) tags_info = analyse_baskets(baskets) LOGGER.info( "Writing back the basket info to " + basket_info_file) # Save the tags info to the disk pickle.dump(tags_info, open(basket_info_file, "wb")) else: LOGGER.info("basket info file %s found" % basket_info_file) LOGGER.info("Loading the basket_info_file ...") # Read the tag info from the file tags_info = pickle.load(open(basket_info_file, "rb")) LOGGER.info("Basket info read!") self.tag_counts, self.cooccurrences, self.total_count = tags_info self.support = support
def create_classifier(all_tags, all_words): LOGGER.debug("Creating classifier ...") conf = config.INPUT base_path = conf["base_path"] model_path = os.path.join(base_path, "bayes.model") # Create classifier from scratch or from already persisted model if not config.CLASSIFIER["retrain_model"] and os.path.exists(model_path): LOGGER.debug("Creating classifier from file ...") classifier = persistence.load_model(model_path) LOGGER.debug("Reading completed.") else: LOGGER.debug("Creating empty classifier ...") classifier = make_classifier_from_config(all_tags, all_words) LOGGER.debug("Traing completed.") if config.CLASSIFIER["retrain_model"]: LOGGER.debug("Writing the model to %s ..." % model_path) persistence.save_model(classifier, model_path) LOGGER.debug("Writing model completed.") return classifier
def onscreen(self): return self._text.encode(LOGGER.getLocale())