Exemplo n.º 1
0
def read_spss(spss_file_path):
    with SavReader(spss_file_path, returnHeader=True) as reader:
        for record in reader:
            print(record)
            # records_got.append(record)

    data_frame = DataFrame(list(SavReader(spss_file_path)))
    print(data_frame.info())

    return data_frame
Exemplo n.º 2
0
        def _test_sav_file(section):
            with SavReader(os.path.join(temp_dir, "{0}.sav".format(section)),
                           returnHeader=True) as reader:
                header = next(reader)
                rows = [r for r in reader]

                # open comparison file
                with SavReader(_logger_fixture_path('spss',
                                                    "{0}.sav".format(section)),
                               returnHeader=True) as fixture_reader:
                    fixture_header = next(fixture_reader)
                    self.assertEqual(header, fixture_header)
                    expected_rows = [r for r in fixture_reader]
                    self.assertEqual(rows, expected_rows)
    def transform(self):
        self._tracker.reset()
        with SavReader(self.in_path) as savData:
            with open(self.out_path,
                      mode='wt',
                      errors='strict',
                      encoding='utf8') as out_file:
                header = [
                    str(field, 'utf8').casefold() for field in savData.header
                ]
                writer = csv.DictWriter(out_file,
                                        fieldnames=header,
                                        extrasaction='raise')
                writer.writeheader()

                for row in savData:
                    dict_row = {
                        h: row[i].decode() if type(row[i]) is bytes else row[i]
                        for (i, h) in enumerate(header)
                    }
                    self._tracker.track_in_row()
                    if self._all_filters_pass(dict_row):
                        uuid = dict_row[self._uuid_fieldname]
                        self._tracker.track_uuid(uuid)
                        self._tracker.track_out_row()
                        writer.writerow(dict_row)
        self._tracker.print(self.out_path)
Exemplo n.º 4
0
Arquivo: io.py Projeto: boonhapus/sn
def spss_to_csv(fp: pathlib.Path) -> None:
    """
    Converts an SPSS SAV file to CSV.
    
    The encoding format will be CP1252 for all string helds in the CSV file. The
    CSV file will be saved to the same directory as the input SPSS SAV file,
    with a different extension (CSV, naturally).

    Parameters
    ----------
    fp : str
        location on disk where the SPSS sav file is held
        
    Returns
    -------
    None
    """
    from savReaderWriter import SavReader

    with SavReader(fp) as sav:
        r, c = sav.shape.nrows, sav.shape.ncols
        print(f'shape: ({r}, {c})')

        with (fp.parent / f'{fp.stem}.csv').open('w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([c.decode('CP1252') for c in sav.header])

            for line in sav:
                writer.writerow(list(map(spss_value_encoder, line)))
Exemplo n.º 5
0
def process_body():
    with SavReader(config['inputfile'], ioLocale='en_US.ISO8859-1') as body:
        for line in body:
            document = dict(
                SPSSDocType = 'data',
                dataline = line)
            updatedb(document)
def Sav2Df(path, utf8=True):
''' Load data from SPSS .SAV file into pandas DataFrame with coded columns'''
    with SavReader(path, returnHeader=1) as readerH:
        df = pd.DataFrame(list(readerH))
    df.columns = df.iloc[0]
    df = df.reindex(df.index.drop(0))
    return df
Exemplo n.º 7
0
def convert(infile, outfile):
    with SavReader(infile, returnHeader=True, ioUtf8=True,
                   recodeSysmisTo='NA') as r:
        with open(outfile, 'w') as fout:
            for l in r:
                l = [_stringify(c) for c in l]
                writer = csv.writer(fout, dialect='RFC4180')
                writer.writerow(l)
    def test_date_conversion(self):
        with tempfile.NamedTemporaryFile(suffix='.sav') as tmpfile:
            with SavWriter(tmpfile.name, ['date'], {b'date': 0},
                           formats={b'date': b'EDATE40'},
                           ioUtf8=True,
                           ioLocale="C.UTF-8") as writer:
                record = [writer.spssDateTime(b"2000-01-01", "%Y-%m-%d")]
                writer.writerow(record)

            with SavReader(tmpfile.name,
                           returnHeader=False,
                           ioUtf8=True,
                           ioLocale="C.UTF-8") as reader:
                date = list(reader)[0][0]
                self.assertEqual('2000-01-01', date)
Exemplo n.º 9
0
 def to_csv(self):
     '''
     list all SAV files and convert to csv
     '''
     for f in os.listdir(self.local_path):
         if f.endswith('.SAV') and not f.startswith(self.transform_prefix):
             tgt_name = self.transform_prefix + f[:-4] + '.csv'
             with SavReader(os.path.join(self.local_path, f),
                            ioUtf8=True) as reader:
                 header = reader.header
                 with open(os.path.join(self.local_path, tgt_name),
                           'w+',
                           encoding='utf-8') as csvfile:
                     writer = csv.writer(csvfile)
                     writer.writerow(header)
                     for line in reader:
                         writer.writerow(line)
Exemplo n.º 10
0
 def data(self, savFileName):
     kwargs = dict(savFileName=savFileName,
                   ioUtf8=True,
                   recodeSysmisTo=float("nan"))
     data = SavReader(**kwargs)
     if not data.isCompatibleEncoding():
         del kwargs["ioUtf8"]
         encoding = data.fileEncoding.replace("_", "-")
         encoding = re.sub(r"cp(\d+)", r"\1", encoding)
         locale_ = locale.getlocale()[0] + "." + encoding
         kwargs["ioLocale"] = locale_
         data.close()
         try:
             data = SavReader(**kwargs)
         except ValueError:
             msg = ("Locale not found --> Linux: sudo localedef -f "
                    "%s -i %s /usr/lib/locale/%s")
             msg = msg % (encoding.upper(), locale_.split(".")[0], locale_)
             raise ValueError(msg)
     return data
Exemplo n.º 11
0
# Python 2.7
from savReaderWriter import SavReader
import pandas as pd
with SavReader('file.sav') as reader:
    chunk = 20000
    N = reader.shape.nrows
    lista = range(0,N+chunk, chunk)
    rangos = [lista[i-1:i+1] for i in range(1,len(lista))]
    pisa = pd.DataFrame([], columns=reader.header)
    pisa.to_csv('filename.csv', index=False)
    for r in rangos:
        records = []
        for line in reader[r[0]:r[1]]:
            records.append(line)
        pisa = pd.DataFrame(records)
        pisa.to_csv('filename.csv', index=False, mode='a', header=False)
Exemplo n.º 12
0

for name, transform in transform_generator(uuid_map=uuids_to_filter):
    inputSavPath = transform.get('input_path')
    outputCsvPath = transform.get('output_path')

    print('[-] Filtering {} :: {} to {} ...'.format(name, inputSavPath,
                                                    outputCsvPath))
    # Reset the stats collector
    for uuid in uuids_to_filter.keys():
        uuids_to_filter[uuid] = False
    for uuid in dead_uuid_tracker.keys():
        dead_uuid_tracker[uuid] = False

    total_rows = 0
    with SavReader(inputSavPath) as savData:
        with open(outputCsvPath, mode='wt', errors='strict',
                  encoding='utf8') as outFile:
            outFilerWriter = csv.writer(outFile, strict=True)
            header = [
                str(field, 'utf8').casefold() for field in savData.header
            ]

            print(header)
            outFilerWriter.writerow(header)

            for row in savData:
                total_rows += 1
                if all([fn(row) for fn in transform.get('filters')]):
                    parsed_row = [
                        field.decode() if type(field) is bytes else field
"""

import pandas as pd

from savReaderWriter import SavReader

'************************************************'
full = 'GPS'
part = 'GPS'
AQ_TQ = 'TQ'
STAA_STAO = 'STAO'

#read spss.sav file
with SavReader('X:\{}\{}\{}\{}_{}_{}_BACKGROUND.sav'.format(
        STAA_STAO, AQ_TQ, full, STAA_STAO, part, AQ_TQ),
               ioUtf8=True,
               returnHeader=True,
               idVar='Q1') as reader:
    records = reader.all()

#the first item of records list is the columns of the dataframe
columns = [records[0]]
columns = columns[0]
#delete that first item
del records[0]
#convert records to a dataframe
spss = pd.DataFrame(records)
#rename columns
spss.columns = columns

excel = pd.read_excel(