示例#1
0
def test_writerows_pd_np_issue63():
    """
    issue #63 "ufunc 'isnan' not supported for the input types"
    Caused by strings that contained NaN values
    """
    if skip:
        raise SkipTest
    buff = StringIO(u"""n1,n2,s1,s2
    1,1,a,a
    2,2,b,bb
    3,,c,""")
    desired = [[1.0, 1.0, b'a', b'a'], [2.0, 2.0, b'b', b'bb'],
               [3.0, None, b'c', b'']]

    df = pd.read_csv(buff, chunksize=10**6, sep=',').get_chunk()
    arr = df.values
    savFileName = join(gettempdir(), "check.sav")
    kwargs = dict(varNames=list(df),
                  varTypes=dict(n1=0, n2=0, s1=1, s2=2),
                  savFileName=savFileName,
                  ioUtf8=True)

    # numpy
    with srw.SavWriter(**kwargs) as writer:
        writer.writerows(arr)
    with srw.SavReader(savFileName) as reader:
        actual = reader.all(False)
    assert actual == desired, actual

    # pandas
    with srw.SavWriter(**kwargs) as writer:
        writer.writerows(df)
    with srw.SavReader(savFileName) as reader:
        actual = reader.all(False)
    assert actual == desired, actual
示例#2
0
def main():
    if len(sys.argv) != 4:
        print ('Usage: ' + sys.argv[0] + ' headfile datafile outputfile')
        os._exit(1)

    try:
        with open(sys.argv[1], 'r') as head_file:
            head = json.load(head_file)

        head['valueLabels'] = int_keys(head['valueLabels'], head['varTypes'])
        data_path = replace_last(sys.argv[2], '.json', '_*.json')

        with savReaderWriter.SavWriter(sys.argv[3], head['varNames'], head['varTypes'],
            head['valueLabels'], head['varLabels'], head['varFormats'], None, head['measureLevels'],
            head['columnWidths'], head['alignments'], ioUtf8=True) as writer:

            for f in sorted(glob.glob(data_path)):
                with open(f, 'r') as data_file:
                    for record in ijson.items(data_file, 'item'):
                        writer.writerow(record)


            os.remove(sys.argv[1])
            os.remove(sys.argv[2])
            for f in glob.glob(data_path):
                os.remove(f)

            print ('File ' + sys.argv[3] + ' successfully created.')
    except:
        print ('Error: ', sys.exc_info())
        traceback.print_exc()
        os._exit(1)
示例#3
0
def write_sav(path_sav, data, **kwargs):
    """
    Write the given records to a SAV file at path_sav.

    Using the various definitions indicated by the packed kwargs, write
    the given set of records to a SAV file at the location indicated by
    path_sav.

    For a full explanation of the kwargs used please see the
    savReaderWriter library documentation here:
    http://pythonhosted.org/savReaderWriter/

    Parameters
    ----------
    path_sav : str
        The full path, including extension, indicating where the output
        file should be saved.
    records : list
        A list of records (so a list of lists) holding the row data to
        be saved in the output SAV file.
    **kwargs : various
        Remaining keyword arguments passed to
        savReaderWriter.SavWriter().

    Returns
    -------
    None
    """

    with srw.SavWriter(path_sav, ioUtf8=True, **kwargs) as writer:
        records = data.fillna(writer.sysmis).values.tolist()
        for record in records:
            writer.writerow(record)
 def test_check_segfault_numeric(self):        
     """Test if incorrect specification raises ctypes.ArgumentError, 
     not segfault"""
     valueLabels = {b"a_numeric": {b"1": b"male", b"2": b"female"}}
     with self.assertRaises(ctypes.ArgumentError):
         with rw.SavWriter(*self.args, valueLabels=valueLabels) as writer:
             writer.writerows(self.records)
 def test_check_segfault_char(self):        
     """Test if incorrect specification raises ctypes.ArgumentError, 
     not segfault"""
     # c_char_p is wrapped in c_char_p3k in py3k.py, hence a separate test
     valueLabels = {b"a_string": {1: b"male", 2: b"female"}}
     with self.assertRaises(ctypes.ArgumentError):
         with rw.SavWriter(*self.args, valueLabels=valueLabels) as writer:
             writer.writerows(self.records)
示例#6
0
def test_writerows_tuple():
    records = tuple([tuple(record) for record in desired])
    savFileName = "output_tuple.sav"
    with srw.SavWriter(savFileName, *args) as writer:
        writer.writerows(records)
    with srw.SavReader(savFileName) as reader:
        actual = reader.all(False)
    assert actual == desired, actual
示例#7
0
def write_test(savFileName, nrows, ncols):
    varNames = ['v_%s' % i for i in range(ncols)]
    record = [b'aaaaa', 0] * (int(ncols) // 2)
    cmd = "5 if i % 2 == 0 else 0"
    varTypes = {varName: eval(cmd) for i, varName in enumerate(varNames)}
    with rw.SavWriter(savFileName, varNames, varTypes) as writer:
        for i in range(nrows):
            writer.writerow(record)
    def func(self, savFileName):

        self.outfile = tempfile.mktemp(suffix="_out.sav")
        with rw.SavWriter(self.outfile, [b'v1'], {b'v1': 0}) as writer:
            for i in range(10):
                writer.writerow([i])
        with rw.SavReader(self.outfile) as reader:
            self.assertEqual(reader.all(), [[float(i)] for i in range(10)])
        self.assertTrue(os.path.exists(self.outfile))
示例#9
0
def test_accented_varSet_codepage_mode():
    with srw.SavWriter(**kwargs) as writer:
        for i in range(10):
            writer.writerow([1, 1])
    with srw.SavHeaderReader(kwargs["savFileName"]) as header:
        actual = header.varSets
    desired = {b'\xc3\xbcberhaupt': [b'salbegin', b'salary']}
    remove(kwargs["savFileName"])
    assert actual == desired, actual
示例#10
0
def test_writerows_namedtuple():
    Record = namedtuple("Record", args[0])
    records = [Record(*record) for record in desired]
    savFileName = "output_namedtuple.sav"
    with srw.SavWriter(savFileName, *args) as writer:
        writer.writerows(records)
    with srw.SavReader(savFileName) as reader:
        actual = reader.all(False)
    assert actual == desired, actual
示例#11
0
def test_writerows_pandas():
    if skip:
        raise SkipTest
    df = pd.DataFrame({"a": range(0, 20, 2), "b": range(1, 20, 2)})
    df.loc[0, "a"] = np.nan
    savFileName = "output_pd.sav"
    with srw.SavWriter(savFileName, *args) as writer:
        writer.writerows(df)
    with srw.SavReader(savFileName) as reader:
        actual = reader.all(False)
    assert actual == desired, actual
示例#12
0
def test_accented_varSet_unicode_mode():
    kwargs["varSets"] = {u'\xfcberhaupt': varNames}
    kwargs["ioUtf8"] = True
    with srw.SavWriter(**kwargs) as writer:
        for i in range(10):
            writer.writerow([1, 1])
    with srw.SavHeaderReader(kwargs["savFileName"], ioUtf8=True) as header:
        actual = header.varSets
    desired = {u'\xfcberhaupt': [u'salbegin', u'salary']}
    remove(kwargs["savFileName"])
    assert actual == desired, actual
示例#13
0
def test_writerows_numpy():
    if skip:
        raise SkipTest
    data = [range(10), range(10, 20)]
    array = np.array(data, dtype=np.float64).reshape(10, 2)
    array[0, 0] = np.nan
    savFileName = "output_np.sav"
    with srw.SavWriter(savFileName, *args) as writer:
        writer.writerows(array)
    with srw.SavReader(savFileName) as reader:
        actual = reader.all(False)
    assert actual == desired, actual
示例#14
0
def escrituraspss():
    savFileName = 'someFile.sav'
    records = [[b'Test1', 1, 1], [b'Test2', 2, 1]]
    varNames = ['var1', 'v2', 'v3']
    varTypes = {'var1': 5, 'v2': 0, 'v3': 0}

    with savReaderWriter.SavWriter(savFileName,
                                   varNames,
                                   varTypes,
                                   ioLocale='Spanish_Spain.1252') as writer:
        for record in records:
            writer.writerow(record)
示例#15
0
def extra_moc_plot(flux_input, lam_input, flux_moc, lam_moc, error_flux_input, chi_sqr):
    srw.SavWriter('extra_moc_plot.sav',flux_input, lam_input, flux_moc, lam_moc, error_flux_input)

    #check input
    if min(lam_input) < 1: print('input fail2')
    if max(lam_input) > 45: print('input fail3')

#restrict the maximum and minimum values to be that of the IR
    minlam = min(lam_input)
    maxlam = max(lam_input)

    index = []
    for i in lam_moc:
        if (lam_moc[i] >= minlam and lam_moc[i] <= maxlam):
            index.append(lam_moc[i])

    flux_moc = flux_moc[index]
    lam_moc = lam_moc[index]

#interpolate and subtract the mocassin data from the input



    # Specifies the kind of interpolation as a string (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’ where
    # ‘zero’, ‘slinear’, ‘quadratic’ and ‘cubic’ refer to a spline interpolation of zeroth, first, second or third order) or
    # as an integer specifying the order of the spline interpolator to use. Default is ‘linear’.
    interpfunc = interpolate.interp1d(lam_moc, flux_moc, kind='linear')
    flux_moc_int = interpfunc(lam_input) #idl equivalent flux_moc_int = interpol(flux_moc, lam_moc, lam_input)

#moc to many points.
    flux_subtracted = []
    for i in flux_input:
        flux_subtracted.append(flux_input[i] - flux_moc_int)

#calc and print useful things
    median = statistics.median(flux_subtracted)
    result =[np.mean(flux_subtracted),np.var(flux_subtracted),sci.skew(flux_subtracted),sci.kurtosis(flux_subtracted)]
    std_dev = np.std(flux_subtracted)
    chi_sqr = chi_squared(flux_input, flux_moc_int, error_flux_input, 10)


    plt.subplot(221)
    plt.gca().set_color_cycle(['red', 'black', 'green', 'blue','green', 'blue', 'black'])
#plot and stuff x, y and plot a line a y = 0 for refrence
    plt.plot(lam_input, flux_subtracted,'D', [.1,100], [0,0], linewidth = 1)
    plt.title = 'Real - Moc'
    plt.errorbar(lam_input, flux_subtracted, error_flux_input, 3)

    plt.subplot(222)
    plt.plot(lam_input, flux_input, '^', lam_moc, flux_moc, 'D',[.1, 100], [0, 0], linestyle = 1)

    plt.subplot(223)
    plt.plot(lam_input, flux_input, 's', lam_moc, flux_moc, 'D', [.1, 100], [0, 0], linestyle = 1)
示例#16
0
def writeSav(df, fname):
    varNames = df.columns
    varTypes = {}
    for c in df.columns:
        #print("DATA TYPE ",df[c].dtype)
        if is_string_dtype(df[c]):
            # Currently falls over if null/float values as tries to get len so just use 1 for now
            #df[c].fillna(b'',inplace=True)
            df[c].fillna('', inplace=True)
            # Also need to change eveyrthing to bytes
            #df[c] = df[c].apply(lambda elt: str(int(elt)).encode() if isinstance(elt, float) else str(elt).encode())
            df[c] = df[c].apply(lambda elt: str(elt).encode() if isinstance(
                elt, float) else str(elt).encode())
            d = df[c].map(len).max()
            #d = 1
        elif is_numeric_dtype(df[c]):
            d = 0
        varTypes[c] = d
    with spss.SavWriter(fname, varNames, varTypes, ioUtf8=False) as writer:
        writer.writerows(df)
示例#17
0
def mocassin_fail_amiy(j, username, diffuse, directoryname, outfoldername,
                       starname):
    print("RUN FAILED! Writing output.")
    print("Failed on line number" + (j + 1) + "of AMIY_input.txt")

    with srw.SavReader('/Users/' + username +
                       '/mocassin-rw_changes/AMIY_number.sav') as reader:
        AMIY_number = reader.next()
    id = ssi(AMIY_number)
    AMIY_number += 1
    srw.SavWriter(
        '/Users/' + username + '/mocassin-rw_changes/AMIY_number.sav',
        AMIY_number)

    if (diffuse[j]): type = 'SN'
    else: type = 'RSG'

    directoryname = "/Users/" + username + "/mocassin-rw_changes/output/" + type + "/" + id + '_' + starname + '_FAIlED'
    os.system("mkdir " + directoryname)
    outfoldername = type + "/" + id + '_' + starname + '_FAIlED'

    os.chdir('/Users/' + username + '/mocassin-rw_changes/output')

    os.system('cp dustGrid.out ' + directoryname + '/dustGrid_' + id +
              '.out.txt')
    os.system('cp runinfo.txt ' + directoryname + '/runinfo_' + id + '.txt')
    os.system('cp SED.out ' + directoryname + '/SED_' + id + '.out.txt')
    if (diffuse[j]):
        os.system('cp equivalentTau.out ' + directoryname + '/equivalentTau_' +
                  id + '.out.txt')
    else:
        os.system('cp tauNu.out ' + directoryname + '/tauNu_' + id +
                  '.out.txt')
    os.system('cp /Users/' + username +
              '/mocassin-rw_changes/input/input.in ' + directoryname +
              '/input_' + id + '.in.txt')
    os.system('cp /Users/' + username +
              '/mocassin-rw_changes/input/ndust/nDUST ' + directoryname +
              '/nDUST_' + id + '.in.txt')
示例#18
0
文件: scraper.py 项目: vonj/scraping
    def generate_reports(self, keywordsfilename, before, after):
        reportbase = 'keywords_aftonbladet_idg_' + after.strftime(
            '%Y-%m-%d') + '_' + before.strftime('%Y-%m-%d')

        conf = open(keywordsfilename)
        keywords = json.load(conf)

        try:
            shutil.rmtree(reportbase)
        except OSError:
            pass

        try:
            os.remove(reportbase + '.zip')
        except OSError as e:
            pass

        os.mkdir(reportbase)

        self._reportname = os.path.join(reportbase, reportbase)

        f = open(self._reportname + '.html', 'w')

        self._rownames = [
            'idx',
            'fetched',
            'keywords',
            'publication',
            'date',
            'updated',
            'author',
            'author_email',
            'url',
            'title',
            'fulltext_plain',
        ]
        spss_types = {
            'idx': 0,
            'fetched': 34,
            'keywords': 150,
            'publication': 30,
            'date': 34,
            'updated': 34,
            'author': 50,
            'author_email': 50,
            'url': 100,
            'title': 140,
            'fulltext_plain': 10000,
        }

        with savReaderWriter.SavWriter(
                self._reportname + '.sav',
                self._rownames,
                spss_types,
                ioUtf8=True,
        ) as self._SPSSwriter:
            f.write(self._generate_report(keywords, before, after))
            f.close()
            subprocess.call([
                'wkhtmltopdf', self._reportname + '.html',
                self._reportname + '.pdf'
            ])
            subprocess.call(['cp', keywordsfilename, reportbase])
            subprocess.call([
                'rm',
                self._reportname + '.html',
            ])
            subprocess.call([
                '7z', 'a', '-r', '-mx=9', '-v5m', reportbase + '.zip',
                reportbase
            ])
    data[valLabel] = data[valLabel].apply(lambda x: int(x.split(':')[0]))

nonLabels = []
[
    nonLabels.append(y) if y not in valueLabelsData else False
    for y in list(data)
]
str_columns = data[nonLabels].select_dtypes(exclude=['float', 'int'])
int_columns = data[nonLabels].select_dtypes(include=['float', 'int'])

for int_column in int_columns:
    varTypes.update({int_column.encode(): 0})
for str_column in str_columns:
    varTypes.update({str_column.encode(): 5})
    data[str_column] = data[str_column].apply(lambda x: x.encode())

records = data.to_dict('split')['data']
varNames = [y.encode() for y in list(data)]
Labels = {}
for varName in varNames:
    Labels.update({varName: varName})

savFileName = INSTANCE + '_' + FORMID + '.sav'
with spss.SavWriter(savFileName,
                    varNames,
                    varTypes,
                    valueLabels=valueLabels,
                    varLabels=Labels) as writer:
    for record in records:
        writer.writerow(record)
示例#20
0
    def graph(self, info: str = None, output=("plt", "excel")) -> str:
        """
        Graph the recorded statistics in a plt plot, in an excel spreadsheet or in an ssps compatible file.

        Args:
            output (Tuple[str]): the output formats to use.
            info(str): Additional notes for the plt plot. If None is passed the function will ask via input so if you don't want info, pass an empty string.

        Returns:
            str: folder name for output
        """
        compatible_out = ["plt", "excel", "spss"]
        e = False
        for ro in output:
            if ro not in compatible_out:
                e = True
                print(
                    "WARNING, output format {} is not supported, it will be skipped"
                    .format(ro))
        if e:
            print("We currently support " + str(compatible_out))

        if info is None:
            info = input("Enter additional information about the sim: ")

        titles = [
            "Number Of Agents", "Average Agent Mass",
            "Amount of Food Consumed", "Average Agent IQ", "Average Agent EQ",
            "Average breeding mass divider", "Average Agent Breed Chance",
            "Fight count relative to population size",
            "Help count relative to population size",
            "Ignore count relative to population size", "Number of groups",
            "Close family ration in group"
        ]

        values = [
            self.number_of_agents_OT, self.mass_OT, self.eat_OT, self.iq_OT,
            self.iq_OT, self.breed_mass_div_OT, self.breed_chance_OT,
            self.fight_OT, self.help_OT, self.nothing_OT,
            self.relative_groups_OT, self.close_family_in_group_OT
        ]
        extention = "png"
        fn = "graphs-0.3/" + self.get_fn()
        os.mkdir(fn)

        try:
            if "plt" in output:
                if len(titles) != len(values):
                    raise Exception(
                        "Error len of titles must match len of vars")

                fig, axs = plt.subplots(len(values),
                                        sharex='all',
                                        figsize=(20, 60))
                metadata = dict()
                for i in range(len(values)):
                    axs[i].plot(self.i_OT, values[i], linewidth=0.25)
                    axs[i].axes.set_ylim([0, max(values[i])])
                    axs[i].set_ylabel(titles[i])

                    metadata["Final" + titles[i]] = values[i][-1]

                axs[0].axes.set_xlim([0, self.dataPoints])
                axs[0].set_title(
                    "Simulation with {} initial agents and {} steps\nDate: {}\nNotes: {}\n\nStats:\n{}\n"
                    .format(len(self.agents), self.gcsteps,
                            time.strftime("%D"), info, self.stats()), )

                axs[-1].set_xlabel("Number Of Data Points")

                plt.tight_layout()
                plt.autoscale()

                pltfn = fn + "/plt." + extention
                fig.savefig(pltfn, bbox_inches='tight')  # save graph
                # add metadata:
                im = Image.open(pltfn)
                meta = PngImagePlugin.PngInfo()
                for x in metadata:
                    meta.add_text(x, str(metadata[x]))
                im.save(pltfn, extention, pnginfo=meta)
        except:
            print("error in generating plt file")
        transposed_data = []
        for i in range(self.dataPoints):
            transposed_data.append([j[i] for j in values])
        try:
            if "excel" in output:
                if len(values[0]) > 1048576:
                    print("to manny data points, skipping excel")
                else:
                    wb = openpyxl.Workbook(write_only=True)
                    sheet = wb.create_sheet()
                    sheet.append(titles)
                    for i in transposed_data:
                        sheet.append(i)
                    wb.save(fn + "/excel.xlsx")
        except:
            print("error in generating excel file")

        if "spss" in output:
            savFileName = fn + '/spss.sav'
            varNames = [i.replace(" ", "_") for i in titles]
            varTypes = dict()
            for t in varNames:
                varTypes[t] = 0
            with savReaderWriter.SavWriter(savFileName, varNames,
                                           varTypes) as writer:
                for i in range(self.dataPoints):
                    writer.writerow(transposed_data[i])

        return os.getcwd() + "\\" + fn.replace("/", "\\")
ioLocale = "german" if is_windows else "de_DE.cp1252"
b_settings = dict(ioUtf8=sav.UNICODE_BMODE, ioLocale=ioLocale)

# read SPSS file data
with sav.SavReader(in_savFileName, rawMode=True, **b_settings) as data:
    in_records = data.all(False)

# read SPSS file metadata
with sav.SavHeaderReader(in_savFileName, **b_settings) as header:
    metadata = header.dataDictionary()
    #pprint(metadata)

# write (unmodified) data to SPSS file
out_savFileName = os.path.join(tempfile.gettempdir(), 'out.sav')
metadata.update(b_settings)
with sav.SavWriter(out_savFileName, **metadata) as writer:
    writer.writerows(in_records)


# Now test whether input and output are the same
class Test_MetadataRoundTrip(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None

    def test_data_same(self):
        with sav.SavReader(out_savFileName, rawMode=True,
                           **b_settings) as data:
            out_records = data.all(False)
            out_encoding = data.fileEncoding
        self.assertEqual("utf_8", out_encoding)
        self.assertEqual(in_records, out_records)
示例#22
0
def pseudonymise(input_file,
                 columns=None,
                 names=None,
                 mapping_file=None,
                 output_file=None):
    """
    Create UUID integer for certain columns of an SAV file. If no columns are selected (either by name  '-n' or by
    number '-c') then automatically the first column is selected for pseudonymisation.

    \b
    :param input_file: points to input SAV file.
    :param columns: columns by number to pseudonymise, can be comma separated list (e.g. 0,5,7).
    :param names: columns by name to pseudonymise. Given as a comma separated
        list (e.g. 'PIDnumber,PIDnumberRelation,OtherPID')
    :param mapping_file: file with a previously created mapping. Reuses the pseudonymisation
        and adds mappings for new numbers.
    :param output_file: path to output file, defaults to same directory as input sav with
        suffix '-pseudonymised.sav'.
    """

    import sys
    import os
    from uuid import uuid4

    # Trick to prevent savReaderWriter from complaining in stdout about
    # missing numpy, which it does not need.
    sys.stdout = open(os.devnull, 'w')
    import savReaderWriter
    sys.stdout = sys.__stdout__

    uuid_map = {}
    if mapping_file:
        click.echo('Using uuid map: {}'.format(mapping_file))
        with open(mapping_file, 'r') as f:
            for line in f.readlines():
                if not line:
                    continue
                fis, uuid = line.strip().split('\t')
                uuid_map[fis] = uuid

    if output_file is None:
        output_file = input_file.rsplit('.', 1)[0] + '-pseudonymised.sav'

    output_map = input_file.rsplit('.', 1)[0] + '-mapping.tsv'

    with savReaderWriter.SavReader(input_file) as reader:

        int_columns = []
        header = [c.decode() for c in reader.getHeader(None)]

        if columns is not None:
            int_columns += [int(c) for c in columns.split(',') if c != '']

        if names is not None:
            for name in names.split(','):
                try:
                    idx = header.index(name)
                except ValueError:
                    raise SystemExit(
                        "Column {} not found in file. Aborting.".format(name))
                int_columns.append(idx)

        if not int_columns:
            int_columns = [0]

        click.echo("Pseudonymising columns: {!r}".format(
            [header[c] for c in int_columns]))

        with savReaderWriter.SavWriter(
                savFileName=output_file,
                varNames=reader.varNames,
                varTypes=reader.varTypes,
                valueLabels=reader.valueLabels,
                varLabels=reader.varLabels,
                formats=reader.formats,
                missingValues=reader.missingValues,
                measureLevels=reader.measureLevels,
                columnWidths=reader.columnWidths,
                alignments=reader.alignments,
                varSets=reader.varSets,
                varRoles=reader.varRoles,
                varAttributes=reader.varAttributes,
                fileAttributes=reader.fileAttributes,
                fileLabel=reader.fileLabel,
                multRespDefs=reader.multRespDefs,
                caseWeightVar=reader.caseWeightVar) as writer:

            for record in list(reader):
                for n in int_columns:

                    # Safer to convert to string, as this is how the map would be
                    # read from file if it is used another time.
                    fis = str(record[n])
                    if fis not in uuid_map:
                        uuid_map[fis] = uuid4().int

                    record[n] = uuid_map[fis]

                writer.writerow(record)
            click.echo('Writing pseudonymised sav: {}'.format(output_file))

    # write uuid map to disk
    with open(output_map, 'w') as f:
        click.echo('Writing mapping file: {}'.format(output_map))
        for fis, uuid in uuid_map.items():
            f.write('{}\t{}\n'.format(fis, uuid))
    click.echo('Finished. Good bye.')
示例#23
0
for col in odf.columns:
    if str(odf[col].dtype) == 'object':
        varTypes[col] = 1024
    elif 'date' in str(odf[col].dtype):
        varTypes[col] = 0
    else:
        varTypes[col] = 0
varTypes['date'] = 0
logging.debug(f"varTypes: {varTypes}")

colsSave = list(odf.columns)

# https://pythonhosted.org/savReaderWriter/generated_api_documentation.html#savwriter
with srw.SavWriter(pathSav,
                   varNames=colsSave,
                   varTypes=varTypes,
                   valueLabels=valLabs,
                   ioUtf8=True,
                   formats={'date': 'DATETIME17'}) as writer:
    for record in records:
        record[0] = writer.spssDateTime(record[0].encode(),
                                        '%Y-%m-%d %H:%M:%S')
        writer.writerow(record)

logging.info(f"Save {os.path.abspath(pathSav)}")

timeend = dt.datetime.now()

logging.info(
    f"\nstart time: {timestart}\nfinish time: {timeend}\nduration: {(timeend - timestart)}"
)
print(
示例#24
0
    # for i, line in enumerate(reader):
    for i in range(0, 2):
        newline = []
        newline.append(reader[i][2]) # username
        newline.append(reader[i][12]) # posttext
        newline.append(reader[i][17]) # posttextpolarity
        newline.append(reader[i][18]) # posttextsubjectivity
        newline.append(reader[i][66]) # clustername
    newrow.append(newline)
    # print newline

varNames = ['UserName', 'PostText', 'PostTextPolarity', 'PostTextSubjectivity', 'ClusterName']
varTypes = {'UserName': 1, 'PostText': 1, 'PostTextPolarity': 0, 'PostTextSubjectivity': 0, 'ClusterName': 1}
# varTypes = {'UserName': 5, 'v2': 0, 'v3': 0}

with savReaderWriter.SavWriter(filenametowrite, varNames, varTypes) as writer :
    for x in newrow:
        writer.writerows(x)

"""

"""
"""
# username -> [2]
# posttext -> [12]
# polarity -> [17]
# subjectivity -> [18]
# clustername -> [66]
newline.append(line[2])
newline.append(line[12])
newline.append(line[17])
示例#25
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
#sys.setdefaultencoding("utf-8")
#reload(sys)
import savReaderWriter
import locale
import os
import collections
from obdc import *

varLabels = {'var1': 'This is variable 1', 'v2': 'This is v2!', 'bdate': 'dob'}

file = "Agropecuario.sav"
#file="Hogares.sav"

preguntas, dicpreguntas, vartypes, varlabels, medicion, valuelabels = metadata(
    "Agropecuario-ccc.mdb")

with savReaderWriter.SavWriter(file,
                               preguntas,
                               vartypes,
                               valuelabels,
                               varlabels,
                               formats=None,
                               missingValues=None,
                               measureLevels=medicion,
                               ioLocale='Spanish_Spain.1252') as sav:
    pass
示例#26
0
def test_writerows_erroneous_flat_n():
    records = [0, 1]  # wrong!,
    savFileName = "output_error1.sav"
    with srw.SavWriter(savFileName, *args) as writer:
        assert_raises(TypeError, writer.writerows, records)
示例#27
0
def test_writerows_erroneous_flat_empty():
    records = []  # wrong!
    string_args = ["v1", "v2"], dict(v1=1, v2=1)
    savFileName = "output_error3.sav"
    with srw.SavWriter(savFileName, *string_args) as writer:
        assert_raises(ValueError, writer.writerows, records)
示例#28
0
                i.encode('UTF-8')
            )  #every row/column data converted to bytes which is allow us to insert on spss file and append on inner array

        elif i == None:  #i replace a null value to empity string and append it on the array
            myinner.append(b'')
            #print('i got none')
        else:
            myinner.append(i)
            #print(myinner)
    myouter.append(
        myinner
    )  #after every inner array preparation i append it on outer array
    #-->19-nov-2018 print(myouter)
YonisavFileName = 'TestSpss.sav'  #define on which spss data to be inserted
with savReaderWriter.SavWriter(
        YonisavFileName, mode=b'ab', *metadata
) as writer:  #open spss data for insert by append style "b'ab'" work the append

    for record in myouter:  #do itteration for every row
        #-->19-nov-2018 print(record[2])#This
        #-->19-nov-2018 print(record[25])
        #Date formating start here----------------------
        #The following 4 lines deal with date time conversion
        #i have two spss variable which has date data type for those variable i need to have date formating
        #i manually get a date variable array index which is 2 and 25
        #i create a date value using spssDateTime functon and put value on the record array
        spssDateValue = writer.spssDateTime(record[2], '%d-%m-%Y')
        record[2] = spssDateValue
        spssDateValue = writer.spssDateTime(record[25], '%d-%m-%Y')
        record[25] = spssDateValue
        #Date formating end here------------------------------
    formats = [
        "S%d" % data.varTypes[v] if data.varTypes[v] else np.float64
        for v in data.varNames
    ]
    dtype = np.dtype({'names': data.varNames, 'formats': formats})
    structured_array = np.array([tuple(record) for record in records],
                                dtype=dtype)

allDataArray = np.array(
    records
)  # in the most recent version one can directly read to numpy arrays
print(records)

# reading metadata from SPSS file
with sav.SavHeaderReader(spss_file, ioUtf8=True, ioLocale=ioLocale) as header:
    metadata = header.dataDictionary(
        asNamedtuple=False)  # Why does this take so long?

pprint.pprint(metadata)

# writing unmodified data
with sav.SavWriter(spss_file_out,
                   overwrite=True,
                   ioUtf8=True,
                   ioLocale=ioLocale,
                   mode=b'wb',
                   refSavFileName=None,
                   **metadata) as writer:
    for i, record in enumerate(structured_array):
        writer.writerow(record)