def process_polymath(project, split=False): """Created DataFrame for a given project Argument: Project-title. Return: DataFrame with additional data""" titles, threads = list(zip(*ct.main( project.replace("Polymath", "pm"), use_cached=False, cache_it=False, merge=False).items())) pm_frame, indices = create_project_frame(project, titles, split) pm_frame['basic', 'title'] = titles pm_frame['basic', 'thread'] = threads pm_frame['basic', 'url'] = [thread.data.url for thread in threads] pm_frame['basic', 'research'] = [ thread.data.is_research for thread in threads] pm_frame['basic', 'blog'] = [ thread.data.thread_url.netloc.split('.')[0].title() for thread in threads] pm_frame['all threads', 'mthread (single)'] = [ mct.MultiCommentThread(thread) for thread in threads] pm_frame['all threads', 'mthread (accumulated)'] = [ mct.MultiCommentThread(*threads[0:i]) for i in indices] pm_frame['all threads', 'network'] = pm_frame[ 'all threads', 'mthread (accumulated)'].apply(an.AuthorNetwork) if split: pm_frame = split_thread_types(pm_frame) pm_frame = pm_frame.pipe( extend_project_frame).pipe( fill_project_frame) return pm_frame
def main(project, **kwargs): """ Creates AuthorNetwork (first calls CommentThread) based on supplied project, and optionally calls a method of AuthorNetwork. """ do_more = kwargs.get('do_more', False) use_cached = kwargs.get('use_cached', False) cache_it = kwargs.get('cache_it', False) delete_all = kwargs.get('delete_all', False) try: an_mthread = ct.main(project, do_more=False, use_cached=use_cached, cache_it=cache_it, delete_all=delete_all) except AttributeError as err: logging.error("Could not create mthread: %s", err) sys.exit(1) a_network = AuthorNetwork(an_mthread) if do_more: the_project = project.replace( "pm", "Polymath ") if project.startswith( "pm") else project.replace("mini_pm", "Mini-Polymath ") do_this = methodcaller(ACTIONS[do_more], project=the_project) do_this(a_network) logging.info("Processing complete at %s", datetime.now()) else: return a_network