def _handle_plot_parser(args): for path in args.input: if not os.path.isfile(os.path.abspath(path)): LOGGER.error(f"File {path} does not exist.") sys.exit(1) if args.population: if not os.path.isfile(os.path.abspath(args.population)): LOGGER.error(f"File {path} does not exist.") sys.exit(1) args.population = _parse_population(args.population) stats.plot_predictions(args.input, args.alpha_level, args.output, args.width, args.fill, args.patterns, args.population)
def _handle_analyze_parser(args): inpath = os.path.abspath(args.input) if args.rows_per_file <= 0: LOGGER.error(f"At least 1 row per file is required.") sys.exit(1) elif not os.path.isfile(inpath): LOGGER.error(f"File {inpath} does not exist!") sys.exit(1) outpath = os.path.abspath(args.output) senti4sd_pool_root = os.path.abspath(args.senti4sd_pool_root) classify.classify_sentiment(args.rows_per_file, senti4sd_pool_root, inpath, outpath)
def _try_commit_and_flush(): """Try to commit and flush the private session. Return True if success. """ try: _session.commit() _session.flush() except Exception as e: LOGGER.error( f"Unexpected exception:\n{type(e).__name__}: {str(e)}\nRolling back" ) _session.rollback() return False return True
async def _async_classify_sentiment(path_to_classifier: str, inpath: str, outpath: str): """Run the classification task asynchronously.""" dir_name, script_name = os.path.split(path_to_classifier) out_file = os.path.basename(outpath) # the classification script must be run from the senti4SD directory command = ['/bin/bash', script_name, os.path.abspath(inpath), out_file] process = await asyncio.create_subprocess_exec(*command, cwd=dir_name) await process.communicate() if process.returncode != 0: with open(inpath, 'r') as f: LOGGER.error( f"Failed to classify {inpath} containing: {''.join(f.readlines())}" ) raise ClassificationError(f"Classifying {inpath} failed.") shutil.move(os.path.join(dir_name, out_file), outpath)
def _post_xml_row_to_model(elem, question_ids: Set[int] = None, target_post_type: PostType = PostType.QUESTION): """Convert an xml row from the Posts.xml file to a model. Text is sanitized before conversion. question_ids is only applicable if the target post type is PostType.ANSWER. An answer is only added if its parent_id is contained in question_ids. """ try: post_type = PostType(int(elem.attrib['PostTypeId'])) except ValueError: # was not a question or answer return None # early returns if target_post_type != post_type: return None if target_post_type == PostType.ANSWER and int( elem.attrib['ParentId']) not in question_ids: return None try: sanitized = sanitize_post(elem.attrib['Body']) except ValueError: LOGGER.error( f"Sanitization failed for Post with Id={elem.attrib['Id']}") return None date = MayaDT.from_rfc3339(elem.attrib['CreationDate']).date if post_type == PostType.ANSWER: title = None tags = None parent_id = elem.attrib['ParentId'] else: # is question title = elem.attrib['Title'] tags = elem.attrib['Tags'] parent_id = None post = Post(id=elem.attrib['Id'], creation_date=date, post_type_id=post_type.value, title=title, text=sanitized, tags=tags, parent_id=parent_id) return post
def _comment_xml_row_to_model(elem, post_ids: Set[int]): """Convert an xml row from the Comments.xml file to a model. Text is sanitized before conversion. Return None if the post_id is not contained in post_ids. """ post_id = int(elem.attrib['PostId']) if post_id not in post_ids: return None try: sanitized = sanitize_comment(elem.attrib['Text']) except Exception as e: LOGGER.error( f"Sanitization failed for Comment with Id={elem.attrib['Id']}\n" f"{type(e).__name__}\n{str(e)}") return None date = MayaDT.from_rfc3339(elem.attrib['CreationDate']).date comment = Comment(id=elem.attrib['Id'], creation_date=date, text=sanitized, post_id=post_id) return comment
def log_exception(pre_msg, e): LOGGER.error(f"{pre_msg}\n{type(e).__name__}: {str(e)}")