def main(fhs_folder: str, now: str, mode_name: str, csv_output: str) -> None: """Extract the unemployment duration information from FHS. Args: fhs_folder: path of the root folder of the FHS files. now: the date at which the FHS data was extracted, e.g. 2015-12-31. mode_name: the mode of extraction, see _MODES. csv_output: path to the file to write to. """ if mode_name not in _MODES: raise ValueError( f'Unsupported mode: [{mode_name}], want one of [{_MODES.keys()}]') mode = _MODES[mode_name] now_as_date = datetime.datetime.strptime(now, '%Y-%m-%d').date() job_seekers = fhs.job_seeker_iterator( fhs_folder, (fhs.UNEMPLOYMENT_PERIOD_TABLE, fhs.PART_TIME_WORK_TABLE)) # Estimation of the total # of job seekers in the FHS 2017. total = 2478287 with open(csv_output, 'w') as csv_file: writer = csv.writer(csv_file) writer.writerow(_Criteria._fields) for job_seeker in tqdm.tqdm(job_seekers, total=total): for row in job_seeker_rows(job_seeker, now_as_date, mode.categories, mode.only_one): writer.writerow(row)
def main(fhs_folder, now, csv_output): """Extract the job group history from FHS and deduplicate them. Args: fhs_folder: path of the root folder of the FHS files. now: the date at which the FHS data was extracted, e.g. 2015-12-31. csv_output: path to the file to write to. """ now = datetime.datetime.strptime(now, '%Y-%m-%d').date() job_seekers = fhs.job_seeker_iterator( fhs_folder, (fhs.UNEMPLOYMENT_PERIOD_TABLE, fhs.TARGETED_JOB_TABLE)) # Estimation of the total # of job seekers in the FHS. total = 2522364 with open(csv_output, 'w') as csv_file: writer = csv.writer(csv_file) writer.writerow(_JobseekerCriteria._fields) for job_seeker in tqdm.tqdm(job_seekers, total=total): for row in job_seeker_rows(job_seeker, now): writer.writerow(row)
def main(fhs_folder: str, now: str, csv_output: str) -> None: """Extract the training history from FHS. Args: fhs_folder: path of the root folder of the FHS files. now: the date at which the FHS data was extracted, e.g. 2017-12-31. csv_output: path to the file to write to. """ now_date = datetime.datetime.strptime(now, '%Y-%m-%d').date() job_seekers = fhs.job_seeker_iterator( fhs_folder, (fhs.UNEMPLOYMENT_PERIOD_TABLE, fhs.TARGETED_JOB_TABLE, fhs.TRAINING_TABLE)) # Estimation of the total # of job seekers in the FHS. total = 2522364 with open(csv_output, 'w', encoding='utf-8') as csv_file: writer = csv.writer(csv_file) writer.writerow(_JobseekerCriteria._fields) for job_seeker in tqdm.tqdm(job_seekers, total=total): for row in job_seeker_rows(job_seeker, now_date): writer.writerow(row)
def test_job_seeker_iterator(self, mock_flatten_iterator): """Basic usage of job_seeker_iterator.""" def _flatten_iterator(filename): if '/de.csv' in filename: return iter([ { 'IDX': '1', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '15', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '2', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': filename.replace('*', 'Reg21'), }, ]) if '/e0.csv' in filename: return iter([ { 'IDX': '1', 'HOURS': 42, 'MOIS': '201510', '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '1', 'HOURS': 43, 'MOIS': '201510', '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '2', 'HOURS': 27, 'MOIS': '201510', '__file__': filename.replace('*', 'Reg21'), }, ]) mock_flatten_iterator.side_effect = _flatten_iterator seekers = list( fhs.job_seeker_iterator('/folder/path/', tables=('de', 'e0'))) data = [j._data for j in seekers] # pylint: disable=protected-access self.assertEqual([ { 'region': '01', 'IDX': '1', 'de': [{ 'IDX': '1', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': '/folder/path/Reg01/de.csv', }], 'e0': [ { 'IDX': '1', 'HOURS': 42, 'MOIS': '201510', '__file__': '/folder/path/Reg01/e0.csv', }, { 'IDX': '1', 'HOURS': 43, 'MOIS': '201510', '__file__': '/folder/path/Reg01/e0.csv', }, ], }, { 'region': '01', 'IDX': '15', 'de': [{ 'IDX': '15', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': '/folder/path/Reg01/de.csv', }], 'e0': [], }, { 'region': '21', 'IDX': '2', 'de': [{ 'IDX': '2', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': '/folder/path/Reg21/de.csv', }], 'e0': [{ 'IDX': '2', 'HOURS': 27, 'MOIS': '201510', '__file__': '/folder/path/Reg21/e0.csv', }], }, ], data)
def test_job_seeker_iterator( self, mock_flatten_iterator: mock.MagicMock) -> None: """Basic usage of job_seeker_iterator.""" def _flatten_iterator(filename: str) -> Iterator[dict[str, Any]]: if '/de.csv' in filename: return iter([ { 'IDX': '1', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '15', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '2', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': filename.replace('*', 'Reg21'), }, ]) if '/e0.csv' in filename: return iter([ { 'IDX': '1', 'HOURS': 42, 'MOIS': '201510', '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '1', 'HOURS': 43, 'MOIS': '201510', '__file__': filename.replace('*', 'Reg01'), }, { 'IDX': '2', 'HOURS': 27, 'MOIS': '201510', '__file__': filename.replace('*', 'Reg21'), }, ]) raise ValueError(f'Called with "{filename}"') mock_flatten_iterator.side_effect = _flatten_iterator seekers = list( fhs.job_seeker_iterator('/folder/path/', tables=('de', 'e0'))) data = [j._data for j in seekers] # pylint: disable=protected-access self.assertEqual([ { 'de': [{ 'IDX': '1', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': '/folder/path/Reg01/de.csv', }], 'e0': [ { 'IDX': '1', 'HOURS': 42, 'MOIS': '201510', '__file__': '/folder/path/Reg01/e0.csv', }, { 'IDX': '1', 'HOURS': 43, 'MOIS': '201510', '__file__': '/folder/path/Reg01/e0.csv', }, ], }, { 'de': [{ 'IDX': '15', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': '/folder/path/Reg01/de.csv', }], 'e0': [], }, { 'de': [{ 'IDX': '2', 'ROME': 'foo', 'DATINS': datetime.date(2015, 12, 1), '__file__': '/folder/path/Reg21/de.csv', }], 'e0': [{ 'IDX': '2', 'HOURS': 27, 'MOIS': '201510', '__file__': '/folder/path/Reg21/e0.csv', }], }, ], data)