示例#1
0
    def test_read(self):
        FILENAME.write_text('hello')
        with safer.open(FILENAME, 'r+', temp_file=True) as fp:
            assert fp.read() == 'hello'

        with self.assertRaises(ValueError):
            safer.open(FILENAME, 'r+')
示例#2
0
    def test_all_modes(self):
        modes = 'w', 'r', 'a', 'r+', 'w+', 'a', 'a+'

        for m in modes:
            with safer.open(FILENAME, m, temp_file=True):
                pass
            with safer.open(FILENAME, m + 'b', temp_file=True):
                pass
示例#3
0
def fix_nulls(filepath):
    """Removes null characters from the given file and overwrites it atomically.

    When the device get unplugged mid-write to a CSV, null characters get introduced, corrupt
    the CSV, and cause later Athena queries to fail. This function makes a basic attempt at turning
    the CSV valid again.
    """
    with safer.open(filepath, "rb") as fr:
        content = fr.read().replace(b"\x00", b"")
        with safer.open(filepath, "wb") as fw:
            fw.write(content)
    return filepath
示例#4
0
 def test_mode_error1(self, safer_writer):
     with safer.open(FILENAME, 'w') as fp:
         pass
     with open(FILENAME) as fp:
         with self.assertRaises(ValueError) as e:
             safer_writer(fp)
         assert e.exception.args[0] == 'Stream mode "r" is not a write mode'
示例#5
0
 def test_dry_run(self, safer_writer):
     assert not FILENAME.exists()
     with safer.open(FILENAME, 'w', dry_run=True) as fp1:
         assert not FILENAME.exists()
         fp1.write('one')
         assert not FILENAME.exists()
     assert not FILENAME.exists()
示例#6
0
文件: cast.py 项目: rec/scripta
 def write(self, fp):
     if isinstance(fp, str):
         with safer.open(fp, 'w') as fp2:
             return self.write(fp2)
     print(json.dumps(self.header), file=fp)
     for line in self.lines:
         print(json.dumps(line.to_list()), file=fp)
示例#7
0
    def test_int_filename(self):
        with self.assertRaises(TypeError) as m:
            with safer.open(1, 'w', temp_file=True) as fp:
                fp.write('hello')

        arg = m.exception.args[0]
        assert arg == '`name` must be string, not int'
示例#8
0
 def test_nested_writers(self, safer_writer):
     with safer.open(FILENAME, 'w') as fp1:
         fp1.write('one')
         with safer_writer(fp1) as fp2:
             fp2.write('two')
             fp2.write('three')
         fp1.write('four')
     assert FILENAME.read_text() == 'onetwothreefour'
示例#9
0
    def test_temp_file1(self):
        temp_file = FILENAME.with_suffix('.temp_file')
        with safer.open(FILENAME, 'w', temp_file=temp_file) as fp:
            assert temp_file.exists()
            assert os.path.exists(temp_file)
            fp.write('hello')

        assert FILENAME.read_text() == 'hello'
        assert not temp_file.exists()
示例#10
0
    def test_make_parents(self, safer_open):
        FILENAME = Path('foo/test.txt')
        with self.assertRaises(IOError):
            with safer.open(FILENAME, 'w'):
                pass

        with safer_open(FILENAME, 'w', make_parents=True) as fp:
            fp.write('hello')
        assert FILENAME.read_text() == 'hello'
示例#11
0
 def test_file_error(self, safer_writer):
     with safer.open(FILENAME, 'w') as fp1:
         fp1.write('one')
         with self.assertRaises(ValueError):
             with safer_writer(fp1) as fp2:
                 fp2.write('two')
                 fp2.write('three')
                 raise ValueError
         fp1.write('four')
     assert FILENAME.read_text() == 'onefour'
示例#12
0
    def _get_or_create_todos(self) -> dict:
        if os.path.exists(self.todos_path):
            with open(self.todos_path) as file:
                return json.load(file)

        os.makedirs(self.todos_dir, exist_ok=True)
        with safer.open(self.todos_path, "w") as file:
            json.dump(self.DEFAULT_TODOS, file)

        return self.DEFAULT_TODOS
示例#13
0
def get_tomorrows_RSS_fuel_data(filter=True, **kwargs):
    """Get tomorrows Fuel Watch Data and save to file
    """
    file_failed_list = []
    days = ["tomorrow"]

    kwargs["Day"] = "tomorrow"

    # Get the dict of products to iterate over
    prod = filters.Product()

    # Cycle through the available days and collect fuel watch data
    if tomorrow_RSS_data_available():
        for day in days:
            kwargs["Day"] = day

            for key in prod.keys():

                # FilterName is not a Product
                if key != "FilterName":
                    kwargs["Product"] = key
                    # Construct URL to get fuel watch data
                    url = construct_fuel_watch_url(**kwargs)

                    # Get tomorrows fuel watch data
                    get_data = requests.get(url)

                    # If response is ok, 200 parse the data
                    if get_data.status_code == 200:
                        parsed_data = feedparser.parse(
                            get_data.content)["entries"]

                        # Make a file name with the the parsed data parameters
                        # for saving to disk.
                        file_name = (join_string_with_operator(
                            values=[
                                parsed_data[0]["updated"],
                                kwargs["StateRegion"],
                                kwargs["Product"],
                            ],
                            operator="_",
                        ) + ".json")

                        # Use safer to  save to disk to ensure files are
                        # not corrupted before closing.
                        with safer.open(os.path.join("fuel_data", file_name),
                                        "w") as fp:
                            json.dump(
                                parsed_data,
                                fp)  # All of the file is written, or none

                        if not check_file_name_saved_on_disk(file_name):
                            file_failed_list.append(file_name)

    print("Files Failed ", file_failed_list)
示例#14
0
    def test_binary(self, safer_open):
        with safer_open(FILENAME, 'wb') as fp:
            fp.write(b'hello')
            fp.write(b' there')
            with self.assertRaises(TypeError):
                fp.write('hello')

        with open(FILENAME, 'rb') as fp:
            assert fp.read() == b'hello there'
        with safer.open(FILENAME, 'rb') as fp:
            assert fp.read() == b'hello there'
示例#15
0
    def test_temp_file2(self):
        temp_file = FILENAME.with_suffix('.temp_file')

        with self.assertRaises(ValueError) as e:
            with safer.open(FILENAME, 'w', temp_file=temp_file) as fp:
                assert temp_file.exists()
                fp.write('hello')
                raise ValueError('Expected')
        assert e.exception.args[0] == 'Expected'

        assert not FILENAME.exists()
        assert not temp_file.exists()
示例#16
0
 def test_nested_writers_dry_run(self, safer_writer):
     assert not FILENAME.exists()
     with safer.open(FILENAME, 'w', dry_run=True) as fp1:
         assert not FILENAME.exists()
         fp1.write('one')
         with safer_writer(fp1, dry_run=True) as fp2:
             assert not FILENAME.exists()
             fp2.write('two')
             fp2.write('three')
         assert not FILENAME.exists()
         fp1.write('four')
     assert not FILENAME.exists()
示例#17
0
    def get_blocks_from_file(cls, fileNumber):
        """
        Get all blocks from a file
        """
        obj = cls.__new__(cls)
        block_file = f"{Config.BLOCKS_DIR}blk{blk_file_format(fileNumber)}.dat"
        blocks = []

        with safer.open(block_file, "rb") as f:
            file_bytes = f.read()
            while file_bytes:
                block_size = int.from_bytes(file_bytes[:4],
                                            byteorder="little",
                                            signed=False)
                blocks.append(obj.deserialize(file_bytes[4:block_size + 4]))
                file_bytes = file_bytes[4 + block_size:]

        return blocks
示例#18
0
    def test_explicit_close(self):
        FILENAME.write_text('hello')
        assert FILENAME.read_text() == 'hello'
        before = set(os.listdir('.'))

        fp = safer.open(FILENAME, 'w', temp_file=True)
        fp.write('OK!')
        assert FILENAME.read_text() == 'hello'

        after = set(os.listdir('.'))
        assert len(before) + 1 == len(after)
        assert len(after.difference(before)) == 1

        fp.close()

        self.assertEqual(FILENAME.read_text(), 'OK!')
        assert FILENAME.read_text() == 'OK!'
        after = set(os.listdir('.'))
        assert before == after
示例#19
0
    def commit(self, host: str) -> bool:
        """Persist current API information to file

        Parameters
        ----------
        host : str
            Host to persist

        Returns
        -------
        bool
        """
        filename = 'config/apidata/%s' % config().getHostfile(host)

        with safer.open(filename, 'w') as f:
            f.write(json.dumps(self.__data[host], indent=2))
        print('Saved data for %s ...' % host)

        return True
示例#20
0
    def save(self) -> None:
        """
        if not self.validate():
            raise BlockNotValidError("Block is not valid")
        """
        file_number = self.current_file_number
        current_block_file = f"{Config.BLOCKS_DIR}{get_current_blk_file(self.current_file_number)}"

        with safer.open(current_block_file, "ab") as w:
            serialized_block = self.serialize()
            block_size = len(serialized_block)
            w.write(
                block_size.to_bytes(4, byteorder="little", signed=False) +
                serialized_block)

        print(Block.get_blocks_from_file(0))
        self.set_tip(self.height)
        self.set_block_file_number(self.height, file_number)

        miner_balance = self.get_account(self.miner)
        miner_reward = mining_reward(self.height)
        print(f"Miner reward: {miner_reward}")
示例#21
0
def ld_list(list_livedisc, bandwidth):
    for dList in list_livedisc:
        with safer.open(dList) as f:
            urlList = f.readlines()
            urlList = [x.strip() for x in urlList]

    try:
        random.shuffle(urlList)
        for item in urlList:
            if item != "":
                print("download: " + item)
                download_livedisk(str(item), bandwidth)

    except KeyboardInterrupt:
        print("\nInterupt by User\n")
        exit()

    except:
        print("error: " + sys.exc_info()[0])

    finally:
        sys.exit(ExitStatus.success)
示例#22
0
def main():
    analysis = processor.sources_analysis.do_analysis()

    nlp_en = spacy.load("en_core_web_lg")
    nlp_fr = spacy.load("fr_core_news_lg")

    pairs = [(k, v) for k, v in sentences.items()
             if True or ('"' not in k and '-' not in k)]

    # pairs.sort(key=lambda sentence: rate_french_sentence_easiness(sentence[0], analysis), reverse=True)
    pairs.sort(key=lambda sentence: rate_french_sentence_reading_level(
        sentence[0], analysis),
               reverse=True)

    data = {
        k: v
        for pair in pairs[:50]
        for k, v in get_nlp_info(nlp_en, nlp_fr, pair, analysis).items()
    }

    dump = yaml.dump(data, Dumper=Dumper, allow_unicode=True)
    with safer.open("work/nlp_sentences.yaml", "w", encoding='utf-8') as f:
        f.write(dump)
def main(analysis = None):
    if isfile("handmade_dictionary.yaml"):
        print("can't create handmade_dictionary.yaml, it already exists!")
        return

    if analysis == None:
        analysis = processor.sources_analysis.do_analysis()
    (collected_words, collected_sentences, source_info) = analysis

    instructions = """
When adding verbs, use the `get_conjugations.py` script to easily generate the YAML required (this is very time-consuming to do by hand).
"""

    known_words = get_all_known_french_words()
    unknown_words = collected_words.keys() - known_words
    unknown_words = sorted(list(unknown_words), key=(lambda w: -sum([v for k, v in collected_words[w].items() if True or k[0] == 'le_petit_nicolas'])))


    with safer.open("handmade_dictionary.yaml", "w", encoding='utf-8') as f:
        template = {word: [{'display': word, 'gender': '', 'pos': '', 'translations': ['']}] for word in unknown_words[:20]}
        data = yaml.dump(template, Dumper=Dumper, allow_unicode=True)
        f.write("# " + "\n# ".join(instructions.split("\n")) + "\n\n")
        f.write(data)
示例#24
0
def on_save(path):
    """Appends the count for the last minute to a file on disk."""
    global count
    now = datetime.datetime.utcnow()

    marker = common.write_marker(path, now)
    logger.debug("Updated local cache marker to %s", marker)

    if count == 0:
        logger.debug("Skipping save: no rotations in the last minute")
        return

    with safer.open(
            os.path.join(path, f"{marker}.csv"),
            mode="a",
            encoding="utf-8",
    ) as f:
        f.writelines(f"{now:%Y-%m-%dT%H:%M:%SZ},{count}\n")

    logger.info("Stored %s rotations per minute in local cache", count)
    semaphore.acquire()
    count = 0
    semaphore.release()
示例#25
0
def do_analysis():
    print("Analyzing source files...")
    source_paths = ["../books/hp", "../books/inner_french_podcast", "../books/hprat", "../books/le_petit_prince", '../books/alcatraz', '../books/le_petit_nicolas']
    source_files = [Path(join(p, f)) for p in source_paths for f in listdir(p) if isfile(join(p, f))]

    collected_words = {}
    collected_sentences = {}
    source_info = {}

    for filename in source_files:
        frontmatter = ""
        source = tuple(filename.parts[-2:])
        with safer.open(filename, "r", encoding='utf-8') as f:
            out_of_frontmatter = False
            for index, line in enumerate(f):
                if retain_only_characters(line).strip() == "":
                    continue

                if index == 0 and line.strip() == "---":
                    out_of_frontmatter = False
                    continue
                else:
                    out_of_frontmatter = True

                if index > 0 and line.strip() == "---":
                    out_of_frontmatter = False

                if out_of_frontmatter == False:
                    frontmatter += line
                else:
                    line = clean_line(line)
                    words = line_to_words_french(line)
                    for word in words:
                        collected_words[word] = collected_words.get(word, Counter())
                        collected_words[word][source] += 1

                    if "»" in line or "»" in line or line == "":
                        # Don't want to deal with these right now
                        pass
                    else:
                        # I was hoping this would work but it fails by splitting "– Allez, ouste ! s'exclama Mr Dursley." into two seperate sentences.
                        """ 
                        doc = nlpfr(line)
                        for sentence in doc.sents:
                            print(sentence)
                            if False and sentence != "" and len(sentence.split()) > 1:
                                collected_sentences[sentence] = collected_sentences.get(sentence, {})
                                collected_sentences[sentence][source] = collected_sentences[sentence].get(source, []) + [index]
                        """

                        sentences = re.findall(r'(?:["«A-ZÉÀÂÄÈÉÊËÎÏÔŒÙÛÜŸÇ]).*?(?:(?:[.?!]["»]?)|-")(?=$| ["«]?[A-ZÉÀÂÄÈÉÊËÎÏÔŒÙÛÜŸÇ])', line.strip())
                        sentences = [sentence.strip('– ') for sentence in sentences]
                        # sentences = [sentence.strip('"') if sentence.count('"') == 2 and sentence[0] == '"' and sentence[-1] == '"' and sentence[-2] != "-" else sentence for sentence in sentences] # sometimes sentences are wrapped in "s

                        sentence_buildup = ""  # sometimes we have false-negatives where sentences end where they shouldn't, mostly in names like M. McGonagall or whatever. This detects those
                        for sentence in sentences:
                            if sentence[-3:] == " M." or sentence[-3:] == " H." or sentence[-4:] == " Dr." or sentence[-3:] == " D." or sentence[-3:] == " J.":
                                sentence_buildup += sentence + " "
                            else:
                                sentence = sentence_buildup + sentence
                                sentence_buildup = ""

                                sentence = sentence.strip()
                                sentence = sentence.strip('– ')
                                sentence = sentence.replace('"', "").strip() if sentence.count('"') == 1 else sentence
                                sentence = sentence.strip()
                                if sentence != "" and len(sentence.split()) > 1:
                                    collected_sentences[sentence] = collected_sentences.get(sentence, {})
                                    collected_sentences[sentence][source] = collected_sentences[sentence].get(source, []) + [index]

        try:
            source_info[source] = yaml.load(frontmatter, Loader=Loader)
        except:
            print(f"trouble processing frontmatter for {source}")
            print(frontmatter)
    return Analysis(words=collected_words, sentences=collected_sentences, source_info=source_info)
示例#26
0
def get_translations():
    with safer.open("translations.yaml", encoding='utf-8') as f:
        translations = yaml.load(f, Loader=Loader)
    return translations
示例#27
0
def get_word_dictionary():
    with safer.open("worddictionary.yaml", encoding='utf-8') as f:
        word_dictionary = yaml.load(f, Loader=Loader)
    return word_dictionary
示例#28
0
def write_help():
    with safer.open(HELP_FILE, 'w') as fp:
        fp.write(get_help())
示例#29
0
def read_file(path: Union[str, Path], encoding: str = "utf-8") -> str:
    """Safely open .ipynb file."""
    with safer.open(resolve(path), "r", encoding=encoding) as ipynb_infile:
        return cast(str, ipynb_infile.read())
示例#30
0
 def _error(self, mode='w', **kwds):
     with self.assertRaises(ValueError) as e:
         safer.open(FILENAME, mode, temp_file=True, **kwds)
     return e.exception.args[0]