Exemplo n.º 1
0
def _example():
    savFileName = 'someFile.sav'
    records = [[b'Test1', 1, 1], [b'Test2', 2, 1]]
    varNames = ['var1', 'v2', 'v3']
    varTypes = {'var1': 5, 'v2': 0, 'v3': 0}

    with SavWriter(savFileName, varNames, varTypes) as writer:
            for record in records:
                    writer.writerow(record)
Exemplo n.º 2
0
def single_process_run(chunked_data_iterator):
    """Process and write chunks serially."""
    with SavWriter(args.outfile, 
                   save_info['column_names'], 
                   save_info['types'], 
                   measureLevels = save_info['measure_levels'],
                   alignments = save_info['alignments'],
                   columnWidths = save_info['column_widths'],
                   formats = save_info['formats']) as writer:

        for i, chunk in enumerate(chunked_data_iterator):
            chunk = (i, chunk)
            writer.writerows(process_chunk(chunk)[1])
Exemplo n.º 3
0
    def test_sav_special_char_columns(self):
        survey = create_survey_from_xls(
            _logger_fixture_path('grains/grains.xls'))
        export_builder = ExportBuilder()
        export_builder.TRUNCATE_GROUP_TITLE = True
        export_builder.set_survey(survey)
        export_builder.INCLUDE_LABELS = True
        export_builder.set_survey(survey)

        for sec in export_builder.sections:
            sav_options = export_builder._get_sav_options(sec['elements'])
            sav_file = NamedTemporaryFile(suffix=".sav")
            # No exception is raised
            SavWriter(sav_file.name, **sav_options)
    def test_date_conversion(self):
        with tempfile.NamedTemporaryFile(suffix='.sav') as tmpfile:
            with SavWriter(tmpfile.name, ['date'], {b'date': 0},
                           formats={b'date': b'EDATE40'},
                           ioUtf8=True,
                           ioLocale="C.UTF-8") as writer:
                record = [writer.spssDateTime(b"2000-01-01", "%Y-%m-%d")]
                writer.writerow(record)

            with SavReader(tmpfile.name,
                           returnHeader=False,
                           ioUtf8=True,
                           ioLocale="C.UTF-8") as reader:
                date = list(reader)[0][0]
                self.assertEqual('2000-01-01', date)
Exemplo n.º 5
0
def multi_process_run(chunked_data_iterator):
    """Process chunks in parallel."""
    with SavWriter(args.outfile, 
                   save_info['column_names'], 
                   save_info['types'], 
                   measureLevels = save_info['measure_levels'],
                   alignments = save_info['alignments'],
                   columnWidths = save_info['column_widths'],
                   formats = save_info['formats']) as writer:

        pool = mp.Pool(mp.cpu_count()-1)
        for i, df in pool.imap_unordered(process_chunk, enumerate(chunked_data_iterator), 1):
            print('Writing chunk {}.'.format(i))
            writer.writerows(df)
        pool.close()
        pool.join()
Exemplo n.º 6
0
def get_body(header):
    headers = config['authheader']
    headers.update({'Content-type': 'application/json'})
    r = requests.get(
        config['dburl'] + '/_design/doctype/_view/doctype?startkey="data"&endkey="data"',
        headers = headers
        )
    ids = []
    for row in yaml.safe_load(r.text)['rows']:
         ids.append(row['id'])
    blocknumber = 0
    blocksize = 200
    with SavWriter(config['outputfile'],
                   header['varNames'],
                   header['varTypes'],
                   valueLabels = header['valueLabels'],
                   varSets = header['varSets'],
                   varAttributes = header['varAttributes'],
                   varRoles = header['varRoles'],
                   measureLevels = header['measureLevels'],
                   caseWeightVar = header['caseWeightVar'],
                   varLabels = header['varLabels'],
                   formats = header['formats'],
                   multRespDefs = header['multRespDefs'],
                   columnWidths = header['columnWidths'],
                   fileAttributes = header['fileAttributes'],
                   alignments = header['alignments'],
                   fileLabel = header['fileLabel'],
                   missingValues = header['missingValues']) as writer:
        while blocknumber*blocksize < len(ids):
            document = dict(
                keys = ids[blocknumber*blocksize:(blocknumber+1)*blocksize])
            blocknumber += 1
            r = requests.post(
                config['dburl'] + '/_all_docs?include_docs=true',
                headers = headers,
                data = json.dumps(document)
                )
            for row in yaml.safe_load(r.text)['rows']:
                orderedrow = []
                for varName in header['varNames']:
                    orderedrow.append(row['doc'][varName])
                writer.writerow(orderedrow)
Exemplo n.º 7
0
    def test_sav_duplicate_columns(self):
        more_than_64_char = "akjasdlsakjdkjsadlsakjgdlsagdgdgdsajdgkjdsdgsj" \
            "adsasdasgdsahdsahdsadgsdf"
        md = """
        | survey |
        |        | type           | name | label | choice_filter |
        |        | select one fts | fruit| Fruit | active=1      |
        |	     | integer	      | age  | Age   |               |
        |	     | integer	      | {}   | Resp2 |               |
        |        | begin group    | {}   | Resp  |               |
        |	     | integer	      | age  | Resp  |               |
        |	     | text 	      | name | Name  |               |
        |        | begin group    | {}   | Resp2 |               |
        |	     | integer	      | age  | Resp2 |               |
        |	     | integer	      | {}   | Resp2 |               |
        |        | end group      |      |       |               |
        |        | end group      |      |       |               |


        | choices |
        |         | list name | name   | label  | active |
        |         | fts       | orange | Orange | 1      |
        |         | fts       | mango  | Mango  | 1      |
        """
        md = md.format(more_than_64_char, more_than_64_char, more_than_64_char,
                       more_than_64_char)
        survey = self.md_to_pyxform_survey(md)
        export_builder = ExportBuilder()
        export_builder.TRUNCATE_GROUP_TITLE = True
        export_builder.set_survey(survey)
        export_builder.INCLUDE_LABELS = True
        export_builder.set_survey(survey)

        for sec in export_builder.sections:
            sav_options = export_builder._get_sav_options(sec['elements'])
            sav_file = NamedTemporaryFile(suffix=".sav")
            # No exception is raised
            SavWriter(sav_file.name, **sav_options)
Exemplo n.º 8
0
    def to_zipped_sav(self, path, data, *args):
        def write_row(row, csv_writer, fields):
            sav_writer.writerow(
                [encode_if_str(row, field, True) for field in fields])

        sav_defs = {}

        # write headers
        for section in self.sections:
            fields = [element['title'] for element in section['elements']]\
                + self.EXTRA_FIELDS
            c = 0
            var_labels = {}
            var_names = []
            tmp_k = {}
            for field in fields:
                c += 1
                var_name = 'var%d' % c
                var_labels[var_name] = field
                var_names.append(var_name)
                tmp_k[field] = var_name

            var_types = dict(
                [(tmp_k[element['title']],
                  0 if element['type'] in ['decimal', 'int'] else 255)
                 for element in section['elements']] +
                [(tmp_k[item],
                  0 if item in ['_id', '_index', '_parent_index'] else 255)
                 for item in self.EXTRA_FIELDS])
            sav_file = NamedTemporaryFile(suffix=".sav")
            sav_writer = SavWriter(sav_file.name,
                                   varNames=var_names,
                                   varTypes=var_types,
                                   varLabels=var_labels,
                                   ioUtf8=True)
            sav_defs[section['name']] = {
                'sav_file': sav_file,
                'sav_writer': sav_writer
            }

        index = 1
        indices = {}
        survey_name = self.survey.name
        for d in data:
            # decode mongo section names
            joined_export = dict_to_joined_export(d, index, indices,
                                                  survey_name)
            output = ExportBuilder.decode_mongo_encoded_section_names(
                joined_export)
            # attach meta fields (index, parent_index, parent_table)
            # output has keys for every section
            if survey_name not in output:
                output[survey_name] = {}
            output[survey_name][INDEX] = index
            output[survey_name][PARENT_INDEX] = -1
            for section in self.sections:
                # get data for this section and write to csv
                section_name = section['name']
                sav_def = sav_defs[section_name]
                fields = [element['xpath'] for element in section['elements']
                          ] + self.EXTRA_FIELDS
                sav_writer = sav_def['sav_writer']
                row = output.get(section_name, None)
                if type(row) == dict:
                    write_row(self.pre_process_row(row, section), sav_writer,
                              fields)
                elif type(row) == list:
                    for child_row in row:
                        write_row(self.pre_process_row(child_row, section),
                                  sav_writer, fields)
            index += 1

        for section_name, sav_def in sav_defs.iteritems():
            sav_def['sav_writer'].closeSavFile(sav_def['sav_writer'].fh,
                                               mode='wb')

        # write zipfile
        with ZipFile(path, 'w') as zip_file:
            for section_name, sav_def in sav_defs.iteritems():
                sav_file = sav_def['sav_file']
                sav_file.seek(0)
                zip_file.write(sav_file.name,
                               "_".join(section_name.split("/")) + ".sav")

        # close files when we are done
        for section_name, sav_def in sav_defs.iteritems():
            sav_def['sav_file'].close()
Exemplo n.º 9
0
    def to_zipped_sav(self, path, data, *args, **kwargs):
        total_records = kwargs.get('total_records')

        def write_row(row, csv_writer, fields):
            sav_writer.writerow(
                [encode_if_str(row, field, True) for field in fields])

        sav_defs = {}

        # write headers
        for section in self.sections:
            sav_options = self._get_sav_options(section['elements'])
            sav_file = NamedTemporaryFile(suffix=".sav")
            sav_writer = SavWriter(sav_file.name,
                                   ioLocale="en_US.UTF-8",
                                   **sav_options)
            sav_defs[section['name']] = {
                'sav_file': sav_file,
                'sav_writer': sav_writer
            }

        media_xpaths = [] if not self.INCLUDE_IMAGES \
            else self.dd.get_media_survey_xpaths()

        index = 1
        indices = {}
        survey_name = self.survey.name
        for i, d in enumerate(data, start=1):
            # decode mongo section names
            joined_export = dict_to_joined_export(d, index, indices,
                                                  survey_name, self.survey, d,
                                                  media_xpaths)
            output = ExportBuilder.decode_mongo_encoded_section_names(
                joined_export)
            # attach meta fields (index, parent_index, parent_table)
            # output has keys for every section
            if survey_name not in output:
                output[survey_name] = {}
            output[survey_name][INDEX] = index
            output[survey_name][PARENT_INDEX] = -1
            for section in self.sections:
                # get data for this section and write to csv
                section_name = section['name']
                sav_def = sav_defs[section_name]
                fields = [element['xpath'] for element in section['elements']]
                sav_writer = sav_def['sav_writer']
                row = output.get(section_name, None)
                if type(row) == dict:
                    write_row(self.pre_process_row(row, section), sav_writer,
                              fields)
                elif type(row) == list:
                    for child_row in row:
                        write_row(self.pre_process_row(child_row, section),
                                  sav_writer, fields)
            index += 1
            track_task_progress(i, total_records)

        for section_name, sav_def in sav_defs.iteritems():
            sav_def['sav_writer'].closeSavFile(sav_def['sav_writer'].fh,
                                               mode='wb')

        # write zipfile
        with ZipFile(path, 'w', ZIP_DEFLATED, allowZip64=True) as zip_file:
            for section_name, sav_def in sav_defs.iteritems():
                sav_file = sav_def['sav_file']
                sav_file.seek(0)
                zip_file.write(sav_file.name,
                               "_".join(section_name.split("/")) + ".sav")

        # close files when we are done
        for section_name, sav_def in sav_defs.iteritems():
            sav_def['sav_file'].close()
Exemplo n.º 10
0
def write_to_SPSS(savFileName, records, varNames, varTypes):
    with SavWriter(savFileName, varNames, varTypes) as writer:
        for record in records:
                writer.writerow(record)
        res2 = i["valuelabels"]
        if res2 == "0":
            valueLabels[name] = {}
        else:
            res3 = pickle.loads(res2)
            valueLabels[name] = res3

    else:
        valueLabels[name] = {}

    formats[name] = i["formats"]


savFileName = '/opt/someFile.sav'
with SavWriter(savFileName=savFileName, varNames=varNames, varTypes=varTypes,
               formats=formats, varLabels=varLabels, valueLabels=valueLabels,
               ioUtf8=True, columnWidths={}) as writer:
    for row_data in query_data:
        sub_li = []
        for i in range(len(my_columns_types)):
            sub_data = row_data[varNames[i]]
            if my_columns_types[i] == "VARCHAR":
                sub_li.append(json.loads(sub_data))
            elif my_columns_types[i] == "DATETIME":
                sub_li.append(writer.spssDateTime(b'%s' % sub_data, '%Y-%m-%d %H:%M:%S'))
            elif my_columns_types[i] == "DATE":
                sub_li.append(writer.spssDateTime(b'%s' % sub_data, '%Y-%m-%d'))
            else:
                sub_li.append(sub_data)
        data.append(sub_li)
def create_sav_writer(filename, settings):
    from savReaderWriter import SavWriter
    return SavWriter(filename, ioUtf8=True, **settings)
Exemplo n.º 13
0
    def genreate_spss(self):
        self.adjust_data()
        mdt = my_datetime()
        nowtime = datetime.datetime.now().strftime("%Y%m%d")
        new_time1 = "%.6f" % float(time.time())
        new_time3 = new_time1.split(".")[0] + new_time1.split(".")[1]
        filename = "u" + str(
            self.user_id) + "_" + str(nowtime) + "_" + str(new_time3)
        filepath = Config().get_content("filepath")["download_path"]
        if filepath:
            user_file_path = os.path.join(filepath, str(self.user_id))
            time_now = datetime.datetime.now().strftime("%Y-%m-%d")
            user_subfilepath = os.path.join(user_file_path, time_now)

            if not os.path.exists(user_file_path):
                os.makedirs(user_file_path)

            if not os.path.exists(user_subfilepath):
                os.makedirs(user_subfilepath)

        else:

            filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                    "download")
            user_file_path = os.path.join(filepath, str(self.user_id))
            time_now = datetime.datetime.now().strftime("%Y-%m-%d")
            user_subfilepath = os.path.join(user_file_path, time_now)

            if not os.path.exists(user_file_path):
                os.makedirs(user_file_path)

            if not os.path.exists(user_subfilepath):
                os.makedirs(user_subfilepath)

        savFileName = os.path.join(user_subfilepath, filename + ".sav")
        print(self.varLabels)
        with SavWriter(savFileName=savFileName,
                       varNames=self.varNames,
                       varTypes=self.varTypes,
                       formats=self.formats,
                       varLabels=self.varLabels,
                       valueLabels=self.valueLabels,
                       ioUtf8=True,
                       columnWidths={}) as writer:
            for row_data in self.my_data:
                sub_li = []
                for i in range(len(self.my_columns_types)):

                    sub_data = row_data[self.varNames[i]]

                    if self.my_columns_types[i] == "VARCHAR":
                        sub_li.append(sub_data)
                    elif self.my_columns_types[i] == "DATETIME":
                        aaa = mdt.become_str(sub_data)
                        sub_li.append(
                            writer.spssDateTime(bytes(aaa, 'utf-8'),
                                                '%Y-%m-%d %H:%M:%S'))
                    elif self.my_columns_types[i] == "DATE":
                        sub_li.append(
                            writer.spssDateTime('%s' % sub_data, '%Y-%m-%d'))
                    else:
                        sub_li.append(sub_data)
                self.data.append(sub_li)

            writer.writerows(self.data)

        return savFileName
 def test_date_encoding(self):
     with SavWriter(self.savFileName, [b'date'], {b'date': 0}) as writer:
         seconds1 = writer.spssDateTime(b"2000-01-01", "%Y-%m-%d")
         seconds2 = writer.spssDateTime("2000-01-01", "%Y-%m-%d")
         self.assertEqual(seconds1, 13166064000.0)
         self.assertEqual(seconds1, seconds2)
Exemplo n.º 15
0
    #column_names and column_specifications are used by pandas.read_fwf to read Medi-Cal file.
    with open(config.county_load_info) as fp:
        column_names, column_specifications = zip(*json.load(fp))

    #All columns should be brought in as strings.
    converters = {name: str for name in column_names}

    df = pd.read_fwf(medical_file,
                     colspecs=column_specifications,
                     header=None,
                     names=column_names,
                     converters=converters)

    df = common.drop_summary_row(df)
    df = common.drop_cinless_rows(df)

    with open(config.county_save_info) as fp:
        save_info = json.load(fp)

    with SavWriter(save_file_name,
                   save_info['column_names'],
                   save_info['types'],
                   measureLevels=save_info['measure_levels'],
                   alignments=save_info['alignments'],
                   columnWidths=save_info['column_widths']) as writer:

        writer.writerows(df[save_info['column_names']].values)

    print('Program finished in: {}.'.format(
        str(datetime.now() - program_start_time)))