def helper_get_details(self, test_publish): """ Generates a bunch of relevant information about the stub data being used. The attribute names should be relevant. :param test_publish: the file to the test stub file :return: no return """ with open(os.path.join(PROJ_HOME, test_publish), "r") as f: lines = f.readlines() self.nor = len(lines) self.bibcode, self.ft_source, self.provider = \ lines[0].strip().split('\t') self.bibcode_list = [i.strip().split('\t')[0] for i in lines] self.test_expected = check_if_extract.create_meta_path( {'bibcode': self.bibcode}, extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST') self.meta_list = \ [check_if_extract.create_meta_path( {"bibcode": j}, extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST' ).replace('meta.json', '') for j in self.bibcode_list] self.meta_path = self.test_expected.replace('meta.json', '') self.number_of_PDFs = len( list( filter(lambda x: x.lower().endswith('.pdf'), [i.strip().split("\t")[-2] for i in lines]))) self.number_of_standard_files = self.nor - self.number_of_PDFs
def helper_get_details(self, test_publish): """ Generates a bunch of relevant information about the stub data being used. The attribute names should be relevant. :param test_publish: the file to the test stub file :return: no return """ with open(os.path.join(PROJ_HOME, test_publish), "r") as f: lines = f.readlines() self.nor = len(lines) self.bibcode, self.ft_source, self.provider = \ lines[0].strip().split('\t') self.bibcode_list = [i.strip().split('\t')[0] for i in lines] self.test_expected = check_if_extract.create_meta_path( {'bibcode': self.bibcode}, extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST' ) self.meta_list = \ [check_if_extract.create_meta_path( {"bibcode": j}, extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST' ).replace('meta.json', '') for j in self.bibcode_list] self.meta_path = self.test_expected.replace('meta.json', '') self.number_of_PDFs = len( list( filter(lambda x: x.lower().endswith('.pdf'), [i.strip().split("\t")[-2] for i in lines]) ) ) self.number_of_standard_files = self.nor - self.number_of_PDFs
def calculate_expected_folders(self, full_text_links): """ Determines the paths that should exist if the test data was extracted. :param full_text_links: file that contains the full text links stub data :return: list of expected paths that would be created when the full text was extracted """ with open(os.path.join(PROJ_HOME, full_text_links), "r") as inf: lines = inf.readlines() expected_paths = \ [check_if_extract.create_meta_path( {CONSTANTS['BIBCODE']: line.strip().split('\t')[0]}, extract_key='FULLTEXT_EXTRACT_PATH_UNITTEST' ).replace('meta.json', '') for line in lines] return expected_paths