def test_cursor_assignment_does_not_exceed_bounds(self): """ testing that cursors don't overlap or exceed bounds """ f = File() f.file_lines = ['line: ' + str(i) for i in range(0, 10)] f.cursor_end = 20 f.cursor_start = 15 self.assertEqual( f.file_length - 1, f.cursor_end, msg='End cursor should match file length when bounds exceed') self.assertEqual( f.cursor_end, f.cursor_start, msg='start cursor should match end cursor when start overlaps') f.cursor_start = -10 f.cursor_end = -5 self.assertEqual( f.cursor_start, 0, msg='End cursor should match file length when bounds exceed') self.assertEqual( f.cursor_end, f.cursor_start, msg='end cursor should match start cursor when end overlaps') f.cursor_end = 4 f.cursor_start = 2 f.cursor_end = 1 self.assertEqual( f.cursor_end, f.cursor_start, msg='end cursor should match start cursor when end overlaps')
def test_strip_all_files_does_strips_all_files(self): """ """ f = File() f.file_lines = [ str(i) + ' squared is: ' + str(i * i) + '\n' for i in range(0, 100, 3) ] other_lines = [ str(i) + ' squared is: ' + str(i * i) for i in range(0, 100, 3) ] f.rstrip_all_lines('\n') self.assertEqual(f.file_lines, other_lines, msg='all newlines should have been stripped.')
def test_file_lines_assignment_updates_file_length(self): """ test that file line assignment functions correctly.""" f = File() self.assertEqual(f.file_length, 0, msg='Zero Lines, length should be 0') f.file_lines = ['line: ' + str(i) for i in range(0, 10)] self.assertEqual(len(f.file_lines), 10, msg='Ten Lines, length should be 0') self.assertEqual(f.file_length, 10, msg='Ten Lines, length should be 0')
def create_file(file_path): """ creates plain file object. """ f = open(file_path) file_lines = f.readlines() line_count = len(file_lines) return File.PopulatedFile('dir_file', file_lines, 0, line_count - 1)
def add_word_file(self, file_path): try: text = docx2txt.process(file_path) file = File(file_path, text) self.files.append(file) except Exception as error: print("Couldn't Load {0}: {1}".format(file_path, error)) return
def __get_folder_content(self) -> List[File]: content_list = [] if self.path_exist(): content_list = [ File(item) for item in glob.glob('{}*.{}'.format( self.__work_path, self.__work_extension)) ] return content_list
def add_text_file(self, file_path): try: with open(file_path, "r", encoding="utf8", errors="ignore") as file: text_file = File(file_path, file.read()) self.files.append(text_file) except Exception as error: print("Couldn't Load {0}: {1}".format(file_path, error)) return
def test_returns_file_content(self): file_mock = Mock(readlines=Mock(), close=Mock()) open_mock = Mock(return_value=file_mock) File().read('some file name', open_function=open_mock) open_mock.assert_called_once_with('some file name', 'r') file_mock.readlines.assert_called_once_with() file_mock.close.assert_called_once_with()
def add_pdf_file(self, file_path): try: with open(file_path, "rb") as raw_file: reader = PyPDF2.PdfFileReader(raw_file) pages = reader.numPages text = "" for i in range(pages): page = reader.getPage(i) text += page.extractText() file = File(file_path, text) self.files.append(file) except Exception as error: print("Couldn't Load {0}: {1}".format(file_path, error))
def __init__(self, input_file, wordlist_file, file=File(), similarity_checker=SimilarityChecker, print_function=print): sentance = file.read(input_file)[0] wordlist = file.read(wordlist_file) closest_sentance = similarity_checker.process_sentance( wordlist, sentance) print_function('Closest sentance: ' + closest_sentance['sentance']) print_function('Total number of changes that had to be made: ' + str(closest_sentance['distance']))
def sort_file(self, file: File): if self.logging: echo("Sorting file: {}".format(path.basename(file.path))) if file.sorted: return # Check to see if a file can be sorted into a pre-existing category for category in self.categories: similarity = 0 # Collects the average similarity to each document already in the category for category_file in category.files: file_doc_no_stop = self.nlp(' '.join([ str(t) for t in self.nlp(file.contents) if not t.is_stop ])) other_doc_no_stop = self.nlp(' '.join([ str(t) for t in self.nlp(category_file.contents) if not t.is_stop ])) similarity += (file_doc_no_stop.similarity(other_doc_no_stop) ) / len(category.files) if similarity >= DocumentSorter.threshold: category.files.append(file) file.sorted = True return # Checks to see if a file can be sorted into a new category with another file similar_files = self.document_sorter.check_for_similar( file.path, file.contents) top_similar = (None, float("-inf")) for other_file, similarity in similar_files: if other_file.sorted is False and similarity >= top_similar[1]: top_similar = (other_file, similarity) if top_similar[0] is not None: category = Category("untitled") file.sorted = True top_similar[0].sorted = True category.files.append(file) category.files.append(top_similar[0]) self.categories.append(category) return
def get_published_elements(self): """ :return: published elements """ json_dict = self._get_dictionary_of_published_files() elements = [] for item in json_dict["items"]: if item["type"] == "dir": elements.append(Directory(**item)) elif item["type"] == "file": elements.append(File(**item)) return elements
def __init__(self, **kwargs): self.children = [] for key in kwargs: if key is not "_embedded": setattr(self, key, kwargs[key]) if "_embedded" in kwargs: for item in kwargs["_embedded"]['items']: if item["type"] == "dir": d = Directory(**item) self.children.append(d) if item["type"] == "file": f = File(**item) self.children.append(f)
def get_list_of_all_files(self): """ :return: List of all files """ url = self._base_url + "/resources/files" r = requests.get(url, headers=self.base_headers) self._check_code(r) json_dict = r.json() files = [] for item in json_dict["items"]: f = File(**item) files.append(f) return files
def test_cursor_assignment_via_setter_property_works(self): """ test the cursor assignment functions correctly """ f = File() f.cursor_start = 1 f.cursor_end = 1 f.file_length = 20 self.assertEqual(f.cursor_start, 0, msg='Zero Lines, cursor should reset to 0') self.assertEqual(f.cursor_end, 0, msg='Zero Lines, cursor should reset to 0') f.file_lines = ['line: ' + str(i) for i in range(0, 10)] self.assertEqual(f.file_length, 10, msg='Ten Lines, length should be 0')
def __init__(self): File.__init__(self)
def test_processes_input(self): list = ['ABC', 'ABC\n', 'A B C\n\r'] self.assertEqual(File().process_input(list), ['abc', 'abc', 'a b c'])
def __init__(self): """ Model Class Constructor """ self.replacement_file = File() self.dirs = {} self.dir_count = 0