def get_articles(self, articles_directory, number_fake, number_real): self.nbre_all_article = number_fake + number_real files_path_fake = get_fullpath(self.path, articles_directory, 'Fake') files_path_fake_titles = get_fullpath(self.path, articles_directory, 'Fake_titles') files_path_real = get_fullpath(self.path, articles_directory, 'Real') files_path_real_titles = get_fullpath(self.path, articles_directory, 'Real_titles') files_fake = np.random.choice( os.listdir(files_path_fake), number_fake) # Get all files in the fake directory files_real = np.random.choice( os.listdir(files_path_real), number_real) # Get all files in the real directory for file in files_fake: self.articles['fake'].append({ 'content': self.get_content(get_fullpath(files_path_fake, file)), 'title': self.get_content(get_fullpath(files_path_fake_titles, file)) }) for file in files_real: self.articles['real'].append({ 'content': self.get_content(get_fullpath(files_path_real, file)), 'title': self.get_content(get_fullpath(files_path_real_titles, file)) })
def test_get_fullpath(self): '''test_get_fullpath will test the get_fullpath function ''' print("Testing utils.get_fullpath...") from utils import get_fullpath tmpfile = tempfile.mkstemp()[1] print("Case 1: File exists, should return full path") self.assertEqual(get_fullpath(tmpfile),tmpfile) print("Case 2: File doesn't exist, should return error") os.remove(tmpfile) with self.assertRaises(SystemExit) as cm: get_fullpath(tmpfile) self.assertEqual(cm.exception.code, 1) print("Case 3: File doesn't exist, but not required, should return None") self.assertEqual(get_fullpath(tmpfile,required=False),None)
def read_files(): path = os.path.abspath(__file__) path_dir, _ = os.path.split(path) output_dir = os.path.join(path_dir, 'output') files_output = get_fullpath(output_dir) data_output = [] for file in files_output: data_output.append(read_file(file)) return data_output
def save(self, path): """ Save the data in a pickle file :return: """ with open(get_fullpath(path), "wb") as file: to_picle = { "articles": self.articles, "original_articles": self.original_articles, "vocabulary": self.vocabulary, "frequency": self.frequency } pickle.dump(to_picle, file)
def configure(args): # Get fullpath to each file, and concurrently check that exists defaultfile = get_fullpath(args.defaults) # ../src/lib/config_defaults.h infile = get_fullpath(args.infile) # singularity.conf.in # Find define statements define_re = re.compile("#define ([A-Z_]+) (.*)") # Read in input and default files defaultfile = read_file(defaultfile) data = "".join(read_file(infile)) # Lookup for values we want replaced lookup = {'0':'no', '1':'yes'} defaults = {} # Read in defaults to dictionary for line in defaultfile: match = define_re.match(line) if match: key, value = match.groups() # Maintain the original default set by user defaults[key] = value # Use parsed value for final config new_value = value.replace('"', '') if new_value in lookup: new_value = lookup[new_value] data = data.replace("@" + key + "@", new_value) # Write to output file outfile = "%s.tmp" %(args.outfile) write_file(outfile,data) os.rename(outfile, args.outfile) logger.info("*** FINISHED PYTHON CONFIGURATION HELPER ****\n")
def load(self): with open( get_fullpath(self.config.dataset.sentence_based.dataset_path), "rb") as file: statements = pickle.load(file) for k, statement in enumerate(statements): label = statement['label'] if label != "half-true": if label in ['true', 'mostly-true']: label = "real" else: label = "fake" if label not in self.articles.keys(): self.articles[label] = [] content = self._get_content(str(k) + "_statements", statement['text'], label=label, split_sentences=False) self.articles[label].append({"content": content}) return self.articles, self.original_articles, self.vocabulary, self.frequency
def load(self): with open(get_fullpath(self.config.dataset.dataset_path), "rb") as file: to_load = pickle.load(file) return to_load["articles"], to_load["original_articles"], to_load["vocabulary"], to_load["frequency"]