def read_spss(spss_file_path): with SavReader(spss_file_path, returnHeader=True) as reader: for record in reader: print(record) # records_got.append(record) data_frame = DataFrame(list(SavReader(spss_file_path))) print(data_frame.info()) return data_frame
def _test_sav_file(section): with SavReader(os.path.join(temp_dir, "{0}.sav".format(section)), returnHeader=True) as reader: header = next(reader) rows = [r for r in reader] # open comparison file with SavReader(_logger_fixture_path('spss', "{0}.sav".format(section)), returnHeader=True) as fixture_reader: fixture_header = next(fixture_reader) self.assertEqual(header, fixture_header) expected_rows = [r for r in fixture_reader] self.assertEqual(rows, expected_rows)
def transform(self): self._tracker.reset() with SavReader(self.in_path) as savData: with open(self.out_path, mode='wt', errors='strict', encoding='utf8') as out_file: header = [ str(field, 'utf8').casefold() for field in savData.header ] writer = csv.DictWriter(out_file, fieldnames=header, extrasaction='raise') writer.writeheader() for row in savData: dict_row = { h: row[i].decode() if type(row[i]) is bytes else row[i] for (i, h) in enumerate(header) } self._tracker.track_in_row() if self._all_filters_pass(dict_row): uuid = dict_row[self._uuid_fieldname] self._tracker.track_uuid(uuid) self._tracker.track_out_row() writer.writerow(dict_row) self._tracker.print(self.out_path)
def spss_to_csv(fp: pathlib.Path) -> None: """ Converts an SPSS SAV file to CSV. The encoding format will be CP1252 for all string helds in the CSV file. The CSV file will be saved to the same directory as the input SPSS SAV file, with a different extension (CSV, naturally). Parameters ---------- fp : str location on disk where the SPSS sav file is held Returns ------- None """ from savReaderWriter import SavReader with SavReader(fp) as sav: r, c = sav.shape.nrows, sav.shape.ncols print(f'shape: ({r}, {c})') with (fp.parent / f'{fp.stem}.csv').open('w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([c.decode('CP1252') for c in sav.header]) for line in sav: writer.writerow(list(map(spss_value_encoder, line)))
def process_body(): with SavReader(config['inputfile'], ioLocale='en_US.ISO8859-1') as body: for line in body: document = dict( SPSSDocType = 'data', dataline = line) updatedb(document)
def Sav2Df(path, utf8=True): ''' Load data from SPSS .SAV file into pandas DataFrame with coded columns''' with SavReader(path, returnHeader=1) as readerH: df = pd.DataFrame(list(readerH)) df.columns = df.iloc[0] df = df.reindex(df.index.drop(0)) return df
def convert(infile, outfile): with SavReader(infile, returnHeader=True, ioUtf8=True, recodeSysmisTo='NA') as r: with open(outfile, 'w') as fout: for l in r: l = [_stringify(c) for c in l] writer = csv.writer(fout, dialect='RFC4180') writer.writerow(l)
def test_date_conversion(self): with tempfile.NamedTemporaryFile(suffix='.sav') as tmpfile: with SavWriter(tmpfile.name, ['date'], {b'date': 0}, formats={b'date': b'EDATE40'}, ioUtf8=True, ioLocale="C.UTF-8") as writer: record = [writer.spssDateTime(b"2000-01-01", "%Y-%m-%d")] writer.writerow(record) with SavReader(tmpfile.name, returnHeader=False, ioUtf8=True, ioLocale="C.UTF-8") as reader: date = list(reader)[0][0] self.assertEqual('2000-01-01', date)
def to_csv(self): ''' list all SAV files and convert to csv ''' for f in os.listdir(self.local_path): if f.endswith('.SAV') and not f.startswith(self.transform_prefix): tgt_name = self.transform_prefix + f[:-4] + '.csv' with SavReader(os.path.join(self.local_path, f), ioUtf8=True) as reader: header = reader.header with open(os.path.join(self.local_path, tgt_name), 'w+', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) writer.writerow(header) for line in reader: writer.writerow(line)
def data(self, savFileName): kwargs = dict(savFileName=savFileName, ioUtf8=True, recodeSysmisTo=float("nan")) data = SavReader(**kwargs) if not data.isCompatibleEncoding(): del kwargs["ioUtf8"] encoding = data.fileEncoding.replace("_", "-") encoding = re.sub(r"cp(\d+)", r"\1", encoding) locale_ = locale.getlocale()[0] + "." + encoding kwargs["ioLocale"] = locale_ data.close() try: data = SavReader(**kwargs) except ValueError: msg = ("Locale not found --> Linux: sudo localedef -f " "%s -i %s /usr/lib/locale/%s") msg = msg % (encoding.upper(), locale_.split(".")[0], locale_) raise ValueError(msg) return data
# Python 2.7 from savReaderWriter import SavReader import pandas as pd with SavReader('file.sav') as reader: chunk = 20000 N = reader.shape.nrows lista = range(0,N+chunk, chunk) rangos = [lista[i-1:i+1] for i in range(1,len(lista))] pisa = pd.DataFrame([], columns=reader.header) pisa.to_csv('filename.csv', index=False) for r in rangos: records = [] for line in reader[r[0]:r[1]]: records.append(line) pisa = pd.DataFrame(records) pisa.to_csv('filename.csv', index=False, mode='a', header=False)
for name, transform in transform_generator(uuid_map=uuids_to_filter): inputSavPath = transform.get('input_path') outputCsvPath = transform.get('output_path') print('[-] Filtering {} :: {} to {} ...'.format(name, inputSavPath, outputCsvPath)) # Reset the stats collector for uuid in uuids_to_filter.keys(): uuids_to_filter[uuid] = False for uuid in dead_uuid_tracker.keys(): dead_uuid_tracker[uuid] = False total_rows = 0 with SavReader(inputSavPath) as savData: with open(outputCsvPath, mode='wt', errors='strict', encoding='utf8') as outFile: outFilerWriter = csv.writer(outFile, strict=True) header = [ str(field, 'utf8').casefold() for field in savData.header ] print(header) outFilerWriter.writerow(header) for row in savData: total_rows += 1 if all([fn(row) for fn in transform.get('filters')]): parsed_row = [ field.decode() if type(field) is bytes else field
""" import pandas as pd from savReaderWriter import SavReader '************************************************' full = 'GPS' part = 'GPS' AQ_TQ = 'TQ' STAA_STAO = 'STAO' #read spss.sav file with SavReader('X:\{}\{}\{}\{}_{}_{}_BACKGROUND.sav'.format( STAA_STAO, AQ_TQ, full, STAA_STAO, part, AQ_TQ), ioUtf8=True, returnHeader=True, idVar='Q1') as reader: records = reader.all() #the first item of records list is the columns of the dataframe columns = [records[0]] columns = columns[0] #delete that first item del records[0] #convert records to a dataframe spss = pd.DataFrame(records) #rename columns spss.columns = columns excel = pd.read_excel(