def get_articles(self, articles_directory, number_fake, number_real):
     self.nbre_all_article = number_fake + number_real
     files_path_fake = get_fullpath(self.path, articles_directory, 'Fake')
     files_path_fake_titles = get_fullpath(self.path, articles_directory,
                                           'Fake_titles')
     files_path_real = get_fullpath(self.path, articles_directory, 'Real')
     files_path_real_titles = get_fullpath(self.path, articles_directory,
                                           'Real_titles')
     files_fake = np.random.choice(
         os.listdir(files_path_fake),
         number_fake)  # Get all files in the fake directory
     files_real = np.random.choice(
         os.listdir(files_path_real),
         number_real)  # Get all files in the real directory
     for file in files_fake:
         self.articles['fake'].append({
             'content':
             self.get_content(get_fullpath(files_path_fake, file)),
             'title':
             self.get_content(get_fullpath(files_path_fake_titles, file))
         })
     for file in files_real:
         self.articles['real'].append({
             'content':
             self.get_content(get_fullpath(files_path_real, file)),
             'title':
             self.get_content(get_fullpath(files_path_real_titles, file))
         })
Пример #2
0
    def test_get_fullpath(self):
        '''test_get_fullpath will test the get_fullpath function
        '''
        print("Testing utils.get_fullpath...")
        from utils import get_fullpath
        tmpfile = tempfile.mkstemp()[1]

        print("Case 1: File exists, should return full path")
        self.assertEqual(get_fullpath(tmpfile),tmpfile)

        print("Case 2: File doesn't exist, should return error")
        os.remove(tmpfile)
        with self.assertRaises(SystemExit) as cm:
            get_fullpath(tmpfile)
        self.assertEqual(cm.exception.code, 1)

        print("Case 3: File doesn't exist, but not required, should return None")
        self.assertEqual(get_fullpath(tmpfile,required=False),None)
Пример #3
0
def read_files():
    path = os.path.abspath(__file__)
    path_dir, _ = os.path.split(path)
    output_dir = os.path.join(path_dir, 'output')
    files_output = get_fullpath(output_dir)
    data_output = []
    for file in files_output:
        data_output.append(read_file(file))
    return data_output
Пример #4
0
 def save(self, path):
     """
     Save the data in a pickle file
     :return:
     """
     with open(get_fullpath(path), "wb") as file:
         to_picle = {
             "articles": self.articles,
             "original_articles": self.original_articles,
             "vocabulary": self.vocabulary,
             "frequency": self.frequency
         }
         pickle.dump(to_picle, file)
Пример #5
0
def configure(args):

    # Get fullpath to each file, and concurrently check that exists
    defaultfile = get_fullpath(args.defaults) # ../src/lib/config_defaults.h
    infile = get_fullpath(args.infile)       # singularity.conf.in

    # Find define statements
    define_re = re.compile("#define ([A-Z_]+) (.*)")

    # Read in input and default files
    defaultfile = read_file(defaultfile)
    data = "".join(read_file(infile))

    # Lookup for values we want replaced
    lookup = {'0':'no',
              '1':'yes'}

    defaults = {}
    # Read in defaults to dictionary
    for line in defaultfile:
        match = define_re.match(line)
        if match:
            key, value = match.groups()

            # Maintain the original default set by user
            defaults[key] = value

            # Use parsed value for final config
            new_value = value.replace('"', '')
            if new_value in lookup:
                new_value = lookup[new_value]
            data = data.replace("@" + key + "@", new_value)

    # Write to output file
    outfile = "%s.tmp" %(args.outfile)
    write_file(outfile,data)
    os.rename(outfile, args.outfile)

    logger.info("*** FINISHED PYTHON CONFIGURATION HELPER ****\n")
Пример #6
0
 def load(self):
     with open(
             get_fullpath(self.config.dataset.sentence_based.dataset_path),
             "rb") as file:
         statements = pickle.load(file)
     for k, statement in enumerate(statements):
         label = statement['label']
         if label != "half-true":
             if label in ['true', 'mostly-true']:
                 label = "real"
             else:
                 label = "fake"
             if label not in self.articles.keys():
                 self.articles[label] = []
             content = self._get_content(str(k) + "_statements",
                                         statement['text'],
                                         label=label,
                                         split_sentences=False)
             self.articles[label].append({"content": content})
     return self.articles, self.original_articles, self.vocabulary, self.frequency
 def load(self):
     with open(get_fullpath(self.config.dataset.dataset_path), "rb") as file:
         to_load = pickle.load(file)
     return to_load["articles"], to_load["original_articles"], to_load["vocabulary"], to_load["frequency"]