def get_free_text(self, parameters=None): """Builds the freetext associated with the github profile Parameters ---------- parameters : dictionary Options for what free text is wanted """ if parameters is None: parameters = {"commits": 25, "issues": 25} free_text = "" for key, n in parameters.items(): generator = None if key == "commits": generator = self._commits elif key == "issues": generator = self._issues if generator is not None: for _ in range(n): item = generator_pop(generator) if item is not None: free_text += item.get_free_text() free_text = anonymise_text(free_text).replace("_", " ") return free_text
def get_free_text(self, parameters=None): """Builds the freetext associated with the stack overflow profile Parameters ---------- parameters : dictionary Options for what free text is wanted """ if parameters is None: parameters = {"answered_posts": 25, "asked_posts": 25, "top_tags": 0} free_text = "" for key, n in parameters.items(): generator = None if key == "answered_posts": generator = self._answered_posts elif key == "asked_posts": generator = self._asked_posts elif key == "top_tags": generator = self._top_tags if generator is not None: for _ in range(n): item = generator_pop(generator) if type(item) == StackOverflowPost: free_text += item.get_free_text() + '\n' elif type(item) == str: free_text += item + '\n' free_text = anonymise_text(free_text).replace("_", " ") return free_text
def scanProfile(url, session=None): html = scraper_methods.get_html(url, session) soup = BeautifulSoup(html, "lxml") text = soup.text text = scraper_methods.anonymise_text(text) text = re.sub("\\r\\n|\\n", " ", text) writeText(text, profiles_filename)
def get_free_text(self, training=False): """Builds the freetext associated with the stack overflow post Parameters ---------- training : bool Used to indicate if we are obtaining training data(if so we include labels) """ if training: labels_prefix = "__label__ " + " __label__ ".join(self._post_tags) free_text = "{labels} {title} {post} {answers}".format(labels=labels_prefix, title=self._title, post=self._post, answers=" ".join(self._answers)) free_text = anonymise_text(free_text) else: free_text = "{title} {post} {answers}".format(title=self._title, post=self._post, answers=" ".join(self._answers)) return free_text
def get_free_text(self, training=False): """Builds the freetext associated with the github issue Parameters ---------- training : bool Used to indicate if we are obtaining training data(if so we include labels) """ title_tokens = [tag[0] for tag in tokenize_title(self._title)] labels_prefix = "__label__ " + " __label__ ".join(title_tokens) if training: freetext = "{labels} {post}\n".format(labels=labels_prefix, post=self._post) freetext = anonymise_text(freetext).replace("_", " ") else: freetext = "{title} {post}\n".format(title=self._title, post=self._post) return freetext
def get_free_text(self, training=False): """Builds the freetext associated with the github commit Parameters ---------- training : bool Used to indicate if we are obtaining training data(if so we include labels) """ if training: title_tokens = {tag[0] for tag in tokenize_title(self._title)} title_tokens.update(self.get_code_tags()) labels_prefix = "__label__ " + " __label__ ".join(title_tokens) free_text = "{labels_prefix} {code}\n".format( labels_prefix=labels_prefix, code=" ".join(self._code_lines)) free_text = anonymise_text(free_text).replace("_", " ") else: free_text = "{title} {code}\n".format(title=self._title, code=" ".join( self._code_lines)) return free_text