Exemplo n.º 1
0
def delimited_from_dichotomous(meta, df, name):
    """ Takes df, which should contain one or more columns of 
    dichotomous data (as 0s/1s) related to the same set of response
    options, and returns a single series. The returned series will be a
    delimited set if necessary, but if there is only 1 column in df or
    the responses indicated in the data are mutually exclusive then a
    normal 'single' series will be returned instead and the meta type
    for that column will be adjusted to single. 

    Parameters
    ----------
    meta : dict
        The meta document paired to the data being converted

    df : pandas.DataFrame
        The column/s in the dichotomous set. This may be a single-column
        DataFrame, in which case a non-delimited set will be returned.

    name : str
        The relevant key name for the resulting column in meta['columns']

    Returns
    -------
    meta : dict
        The meta document paired to the data being converted

    series: pandas.series
        The converted series
    """
    
    if df.shape[1]==1:
        # The set has only 1 possible response
        # Convert to single
        series = df.replace(0, np.NaN)
        # Update type in meta
        meta['columns'][name]['type'] = 'single'
        return meta, series
    
    elif all([v<=1 for v in df.sum(axis=1)]):
        # The set values are mutually exclusive  
        # Convert to single
        df = df.copy()
        for v, col in enumerate(df.columns, start=1):
            # Convert to categorical set
            df[v] = df[col].replace(1, v)
            del df[col]
        series = df.sum(axis=1).replace(0, np.NaN)
        # Update type in meta
        meta['columns'][name]['type'] = 'single'
        return meta, series
    
    else:
        series = condense_dichotomous_set(df, values_from_labels=False)
        
        return meta, series
Exemplo n.º 2
0
def delimited_from_dichotomous(meta, df, name):
    """ Takes df, which should contain one or more columns of 
    dichotomous data (as 0s/1s) related to the same set of response
    options, and returns a single series. The returned series will be a
    delimited set if necessary, but if there is only 1 column in df or
    the responses indicated in the data are mutually exclusive then a
    normal 'single' series will be returned instead and the meta type
    for that column will be adjusted to single. 

    Parameters
    ----------
    meta : dict
        The meta document paired to the data being converted

    df : pandas.DataFrame
        The column/s in the dichotomous set. This may be a single-column
        DataFrame, in which case a non-delimited set will be returned.

    name : str
        The relevant key name for the resulting column in meta['columns']

    Returns
    -------
    meta : dict
        The meta document paired to the data being converted

    series: pandas.series
        The converted series
    """

    if df.shape[1] == 1:
        # The set has only 1 possible response
        # Convert to single
        series = df.replace(0, np.NaN)
        # Update type in meta
        meta['columns'][name]['type'] = 'single'
        return meta, series

    elif all([v <= 1 for v in df.sum(axis=1)]):
        # The set values are mutually exclusive
        # Convert to single
        df = df.copy()
        for v, col in enumerate(df.columns, start=1):
            # Convert to categorical set
            df[v] = df[col].replace(1, v)
            del df[col]
        series = df.sum(axis=1).replace(0, np.NaN)
        # Update type in meta
        meta['columns'][name]['type'] = 'single'
        return meta, series

    else:
        series = condense_dichotomous_set(df, values_from_labels=False)

        return meta, series
Exemplo n.º 3
0
def extract_sav_meta(sav_file,
                     name="",
                     data=None,
                     ioLocale='en_US.UTF-8',
                     ioUtf8=True,
                     dichot=None,
                     dates_as_strings=False,
                     text_key="main"):

    if dichot is None: dichot = {'yes': 1, 'no': 0}
    """ see parse_sav_file doc """
    with sr.SavHeaderReader(sav_file, ioLocale=ioLocale,
                            ioUtf8=ioUtf8) as header:
        # Metadata Attributes
        # ['valueLabels', 'varTypes', 'varSets', 'varAttributes', 'varRoles',
        #  'measureLevels', 'caseWeightVar', 'varNames', 'varLabels', 'formats',
        #  'multRespDefs', 'columnWidths', 'fileAttributes', 'alignments',
        #  'fileLabel', 'missingValues']
        metadata = header.dataDictionary(True)

    meta = start_meta(text_key=text_key)
    meta['info']['text'] = 'Converted from SAV file {}.'.format(name)
    meta['info']['from_source'] = {'pandas_reader': 'sav'}
    meta['sets']['data file']['items'] = [
        'columns@{}'.format(varName) for varName in metadata.varNames
    ]

    # This should probably be somewhere in the metadata
    # weight_variable_name = metadata.caseWeightVar

    # Descriptions of attributes in metadata are are located here :
    # http://pythonhosted.org/savReaderWriter/#savwriter-write-spss-system-files
    for column in metadata.varNames:
        meta['columns'][column] = {}
        meta['columns'][column]['name'] = column
        meta['columns'][column]['parent'] = {}
        if column in metadata.valueLabels:
            # ValueLabels is type = 'single' (possibry 1-1 map)
            meta['columns'][column]['values'] = []
            meta['columns'][column]['type'] = "single"
            for value, text in metadata.valueLabels[column].iteritems():
                values = {
                    'text': {
                        text_key: unicode(text)
                    },
                    'value': int(value)
                }
                meta['columns'][column]['values'].append(values)
        else:
            if column in metadata.formats:
                f = metadata.formats[column]
                if 'DATETIME' in f:
                    if dates_as_strings:
                        # DATETIME fields from SPSS are currently
                        # being read in as strings because there's an
                        # as-yet undetermined discrepancy between the
                        # input and output dates if datetime64 is used
                        meta['columns'][column]['type'] = 'string'
                    else:
                        meta['columns'][column]['type'] = 'date'
                        data[column] = pd.to_datetime(data[column])
                elif f.startswith('A'):
                    meta['columns'][column]['type'] = 'string'
                elif '.' in f:
                    meta['columns'][column]['type'] = "float"
                else:
                    meta['columns'][column]['type'] = "int"
            else:
                # Infer meta from data
                if data is not None:
                    # print "VAR '{}' NOT IN value_labels".format(column)
                    column_values = data[column].dropna()
                    if len(column_values) > 0:
                        # Get the first "not nan" value from the column
                        value = column_values.values[0]
                        if isinstance(value, pd.np.float64):
                            # Float AND Int because savReaderWriter loads them both as float64
                            meta['columns'][column]['text'] = {
                                text_key: [column]
                            }
                            meta['columns'][column]['type'] = "float"
                            if (data[column].dropna() % 1).sum() == 0:
                                if (data[column].dropna() % 1).unique() == [0]:
                                    try:
                                        data[column] = data[column].astype(
                                            'int')
                                    except:
                                        pass
                                    meta['columns'][column]['type'] = "int"

                        elif isinstance(value, unicode) or isinstance(
                                value, str):
                            # Strings
                            meta['columns'][column]['text'] = {
                                text_key: [column]
                            }
                            meta['columns'][column]['type'] = "string"

        if column in metadata.varTypes:
            pass

        if column in metadata.varSets:
            pass

        if column in metadata.varAttributes:
            pass

        if column in metadata.varRoles:
            pass

        if column in metadata.measureLevels:
            pass

        # Some labels are empty strings.
        if column in metadata.varLabels:
            meta['columns'][column]['text'] = {
                text_key: metadata.varLabels[column]
            }

    for mrset in metadata.multRespDefs:
        # meta['masks'][mrset] = {}
        # 'D' is "multiple dichotomy sets" in SPSS
        # 'C' is "multiple category sets" in SPSS
        if metadata.multRespDefs[mrset]['setType'] == 'C':
            'C'
#             meta['masks'][mrset]['type'] = "categorical set"
        elif metadata.multRespDefs[mrset]['setType'] == 'D':
            'D'
            varNames = metadata.multRespDefs[mrset]['varNames']
            # Find the index where there delimited set should be inserted
            # into data, which is immediately prior to the start of the
            # dichotomous set columns
            dls_idx = data.columns.tolist().index(varNames[0])
            # Generate the delimited set from the dichotomous set
            dls = condense_dichotomous_set(data[varNames],
                                           values_from_labels=False,
                                           **dichot)
            # Insert the delimited set into data
            data.insert(dls_idx, mrset, dls)
            # Generate the column meta for the new delimited set
            meta['columns'][mrset] = {
                'name':
                mrset,
                'type':
                'delimited set',
                'text': {
                    text_key: metadata.multRespDefs[mrset]['label']
                },
                'parent': {},
                'values': [{
                    'text': {
                        text_key: metadata.varLabels[varName]
                    },
                    'value': int(v)
                } for v, varName in enumerate(varNames, start=1)]
            }
            # Add the new delimited set to the 'data file' set
            df_items = meta['sets']['data file']['items']
            df_items.insert(df_items.index('columns@{}'.format(varNames[0])),
                            'columns@{}'.format(mrset))

            data = data.drop(varNames, axis=1)
            for varName in varNames:
                df_items.remove('columns@{}'.format(varName))
                del meta['columns'][varName]

    return meta, data
Exemplo n.º 4
0
def extract_sav_meta(sav_file, name="", data=None, ioLocale='en_US.UTF-8', ioUtf8=True):
    """ see parse_sav_file doc """
    with sr.SavHeaderReader(sav_file, ioLocale=ioLocale, ioUtf8=ioUtf8) as header:
        # Metadata Attributes
        # ['valueLabels', 'varTypes', 'varSets', 'varAttributes', 'varRoles',
        #  'measureLevels', 'caseWeightVar', 'varNames', 'varLabels', 'formats',
        #  'multRespDefs', 'columnWidths', 'fileAttributes', 'alignments',
        #  'fileLabel', 'missingValues']
        metadata = header.dataDictionary(True)

    meta = start_meta(name=name)
    meta['info']['text'] = 'Converted from SAV file %s.' % (name)
    meta['info']['from_source'] = {'pandas_reader':'sav'}
    meta['sets']['data file']['items'] = [
        'columns@%s' % (varName)
        for varName in metadata.varNames
    ]

    # This should probably be somewhere in the metadata
    # weight_variable_name = metadata.caseWeightVar

    # Descriptions of attributes in metadata are are located here :
    # http://pythonhosted.org/savReaderWriter/#savwriter-write-spss-system-files
    for column in metadata.varNames:
        meta['columns'][column] = {}

        if column in metadata.valueLabels:
            # ValueLabels is type = 'single' (possibry 1-1 map)
            meta['columns'][column]['values'] = []
            meta['columns'][column]['type'] = "single"
            for value, text in metadata.valueLabels[column].iteritems():
                values = {'text': {'main': unicode(text)},
                          'value': unicode(int(value))}
                meta['columns'][column]['values'].append(values)
        else:
            if column in metadata.formats:
                f = metadata.formats[column]
                if '.' in f:
                    meta['columns'][column]['type'] = "float"
                else:
                    meta['columns'][column]['type'] = "int"
            else:
                # Infer meta from data
                if data is not None:
                    # print "VAR '{}' NOT IN value_labels".format(column)
                    column_values = data[column].dropna()
                    if len(column_values) > 0:
                        # Get the first "not nan" value from the column
                        value = column_values.values[0]
                        if isinstance(value, pd.np.float64):
                            # Float AND Int because savReaderWriter loads them both as float64
                            meta['columns'][column]['text'] = {'main': [column]}
                            meta['columns'][column]['type'] = "float"
                            if (data[column].dropna() % 1).sum() == 0:
                                if (data[column].dropna() % 1).unique() == [0]:
                                    try:
                                        data[column] = data[column].astype('int')
                                    except:
                                        pass
                                    meta['columns'][column]['type'] = "int"

                        elif isinstance(value, unicode) or isinstance(value, str):
                            # Strings
                            meta['columns'][column]['text'] = {'main': [column]}
                            meta['columns'][column]['type'] = "string"

        if column in metadata.varTypes:
            pass

        if column in metadata.varSets:
            pass

        if column in metadata.varAttributes:
            pass

        if column in metadata.varRoles:
            pass

        if column in metadata.measureLevels:
            pass

        # Some labels are empty strings.
        if column in metadata.varLabels:
            meta['columns'][column]['text'] = {'main': metadata.varLabels[column]}

    for mrset in metadata.multRespDefs:
        # meta['masks'][mrset] = {}
        # 'D' is "multiple dichotomy sets" in SPSS
        # 'C' is "multiple category sets" in SPSS
        if metadata.multRespDefs[mrset]['setType'] == 'C':
            'C'
#             meta['masks'][mrset]['type'] = "categorical set"
        elif metadata.multRespDefs[mrset]['setType'] == 'D':
            'D'
#             meta['masks'][mrset]['type'] = "dichotomous set"
#             meta['masks'][mrset]['countedValue'] = metadata.multRespDefs[mrset]['countedValue']
            varNames = metadata.multRespDefs[mrset]['varNames']
#             meta, data[mrset] = delimited_from_dichotomous(meta, data[varNames], mrset)
            data[mrset] = condense_dichotomous_set(data[varNames], values_from_labels=False)
            meta['columns'][mrset] = {
                'type': 'delimited set',
                'text': {'main': metadata.multRespDefs[mrset]['label']},
                'values': [
                    {
                        'text': {'main': metadata.varLabels[varName]},
                        'value': v
                    }
                    for v, varName in enumerate(varNames, start=1)
                ]
            }
            idx = meta['sets']['data file']['items'].index('columns@%s' % (varNames[0]))
            items = meta['sets']['data file']['items']
            meta['sets']['data file']['items'] = items[:idx] + ['columns@%s' % (mrset)] + items[idx+len(varNames):]
            
#         meta['masks'][mrset]['text'] = [metadata.multRespDefs[mrset]['label']]
#         meta['masks'][mrset]['items'] = []
#         for var_name in metadata.multRespDefs[mrset]['varNames']:
#             meta['masks'][mrset]['items'].append({'source':"columns@{0}".format(var_name)})

        # df = make_delimited_from_dichotmous(data[common_vars[var]])

    return meta, data
Exemplo n.º 5
0
def quantipy_from_ascribe(path_xml, path_txt, text_key='main'):
 
    # Read the AScribe data (tab-delimited)
    meta_ascribe = xmltodict.parse(open(path_xml))
    data_ascribe = pd.DataFrame.from_csv(
        path_txt, 
        sep='\t', 
        header=0, 
        encoding='utf-16'
    )
     
    # Start a Quantipy meta document
    meta = start_meta(text_key=text_key)
    meta['columns']['responseid'] = {
        'type': 'int',
        'text': {text_key: 'responseid'}
    }
    
    # Container to record the names, in order, of the resulting
    # coded columns
    coded_names = []
     
    for var in meta_ascribe['CodedQuestions']['MultiForm']:
        name = var['Name']
        coded_names.append(name)
        coded_from = var['FormTexts']['FormText']['Title']
        var_text = var['FormTexts']['FormText']['Text']
        if var_text is None: var_text = 'Label not provided'
        var_text = {text_key: var_text}
        columns = []
        values = []
        for val in var['Answers']['Answer']:
            value = int(val['@Precode'])
            if value==0:
                msg = (
                    "The value 0 has been assigned to a code for the "
                    "variable '%s'."
                ) % (name)
                warnings.warn(msg)
            val_text = val['Texts']['Text']['#text']
            if val_text is None: val_text = 'Label not provided'
            val_text = {text_key: val_text}
            values.append({'value': value, 'text': val_text})
            columns.append('%s_%s' % (name, value))
             
        # Create a single series from the dichotomous set
        data_ascribe[name] = condense_dichotomous_set(
            data_ascribe[columns], 
            sniff_single=True
        )
         
        # Determine the Quantipy type of the returned
        # series from its dtype (see 'sniff_sinlge' in 
        # condense_dichotomous_set()
        if data_ascribe[columns].sum(axis=1).max()==1:
            col_type = 'single'    
        else:
            col_type = 'delimited set'
             
        # Create the new Quantipy column meta 
        column = {
            'type': col_type,
            'text': var_text,
            'values': values
        }
         
        # Add the newly defined column to the Quantipy meta
        meta['columns'][name] = column
        meta['sets']['data file']['items'] = [
            'columns@%s' % (col_name)
            for col_name in coded_names
        ]
     
    # Keep only the slice that has been converted.
    data = data_ascribe[coded_names]

    return meta, data
Exemplo n.º 6
0
def quantipy_from_ascribe(path_xml, path_txt, text_key='main'):

    # Read the AScribe data (tab-delimited)
    meta_ascribe = xmltodict.parse(open(path_xml))
    data_ascribe = pd.DataFrame.from_csv(path_txt,
                                         sep='\t',
                                         header=0,
                                         encoding='utf-16')

    # Start a Quantipy meta document
    meta = start_meta(text_key=text_key)
    meta['columns']['responseid'] = {
        'type': 'int',
        'text': {
            text_key: 'responseid'
        }
    }

    # Container to record the names, in order, of the resulting
    # coded columns
    coded_names = []

    for var in meta_ascribe['CodedQuestions']['MultiForm']:
        name = var['Name']
        coded_names.append(name)
        coded_from = var['FormTexts']['FormText']['Title']
        var_text = var['FormTexts']['FormText']['Text']
        if var_text is None: var_text = 'Label not provided'
        var_text = {text_key: var_text}
        columns = []
        values = []
        for val in var['Answers']['Answer']:
            value = int(val['@Precode'])
            if value == 0:
                msg = ("The value 0 has been assigned to a code for the "
                       "variable '%s'.") % (name)
                warnings.warn(msg)
            val_text = val['Texts']['Text']['#text']
            if val_text is None: val_text = 'Label not provided'
            val_text = {text_key: val_text}
            values.append({'value': value, 'text': val_text})
            columns.append('%s_%s' % (name, value))

        # Create a single series from the dichotomous set
        data_ascribe[name] = condense_dichotomous_set(data_ascribe[columns],
                                                      sniff_single=True)

        # Determine the Quantipy type of the returned
        # series from its dtype (see 'sniff_sinlge' in
        # condense_dichotomous_set()
        if data_ascribe[columns].sum(axis=1).max() == 1:
            col_type = 'single'
        else:
            col_type = 'delimited set'

        # Create the new Quantipy column meta
        column = {'type': col_type, 'text': var_text, 'values': values}

        # Add the newly defined column to the Quantipy meta
        meta['columns'][name] = column
        meta['sets']['data file']['items'] = [
            'columns@%s' % (col_name) for col_name in coded_names
        ]

    # Keep only the slice that has been converted.
    data = data_ascribe[coded_names]

    return meta, data
Exemplo n.º 7
0
def extract_sav_meta(sav_file,
                     name="",
                     data=None,
                     ioLocale='en_US.UTF-8',
                     ioUtf8=True,
                     dichot=None,
                     dates_as_strings=False,
                     text_key="en-GB",
                     engine='savReaderWriter'):

    if engine == 'readstat':
        df, metadata = pyreadstat.read_sav(sav_file,
                                           encoding=ioLocale.split(".")[-1],
                                           metadataonly=True)
        meta = start_meta(text_key=text_key)

        meta['info']['text'] = 'Converted from SAV file {}.'.format(name)
        meta['info']['from_source'] = {'pandas_reader': 'sav'}
        meta['sets']['data file']['items'] = [
            'columns@{}'.format(varName) for varName in metadata.column_names
        ]

        for index, column in enumerate(metadata.column_names):
            meta['columns'][column] = {}
            meta['columns'][column]['name'] = column
            meta['columns'][column]['parent'] = {}
            if column in metadata.variable_value_labels:
                meta['columns'][column]['values'] = []
                meta['columns'][column]['type'] = "single"
                for value, text in metadata.variable_value_labels[
                        column].items():
                    values = {
                        'text': {
                            text_key: str(text)
                        },
                        'value': int(value)
                    }
                    meta['columns'][column]['values'].append(values)
                    # if user has stored single answer data as a string rather than number
                    # we convert it to floats and store non convertables as nan (with coerce)
                    if column in data.columns and data[column].dtype == 'O':
                        data[column] = pd.to_numeric(data[column],
                                                     errors='coerce',
                                                     downcast='float')
            else:
                if column in metadata.original_variable_types:
                    f = metadata.original_variable_types[column]
                    if 'DATETIME' in f:
                        if dates_as_strings:
                            # DATETIME fields from SPSS are currently
                            # being read in as strings because there's an
                            # as-yet undetermined discrepancy between the
                            # input and output dates if datetime64 is used
                            meta['columns'][column]['type'] = 'string'
                        else:
                            meta['columns'][column]['type'] = 'date'
                            data[column] = pd.to_datetime(data[column])
                    elif f.startswith('A'):
                        meta['columns'][column]['type'] = 'string'
                    elif '.' in f:
                        meta['columns'][column]['type'] = "float"
                    else:
                        meta['columns'][column]['type'] = "int"

            # add the variable label to the meta
            meta['columns'][column]['text'] = {
                text_key: metadata.column_labels[index]
            }
        return meta, data

    elif engine == 'savReaderWriter':
        if dichot is None: dichot = {'yes': 1, 'no': 0}
        """ see parse_sav_file doc """
        with sr.SavHeaderReader(sav_file, ioLocale=ioLocale,
                                ioUtf8=ioUtf8) as header:
            # Metadata Attributes
            # ['valueLabels', 'varTypes', 'varSets', 'varAttributes', 'varRoles',
            #  'measureLevels', 'caseWeightVar', 'varNames', 'varLabels', 'formats',
            #  'multRespDefs', 'columnWidths', 'fileAttributes', 'alignments',
            #  'fileLabel', 'missingValues']
            metadata = header.dataDictionary(True)

        meta = start_meta(text_key=text_key)
        meta['info']['text'] = 'Converted from SAV file {}.'.format(name)
        meta['info']['from_source'] = {'pandas_reader': 'sav'}
        meta['sets']['data file']['items'] = [
            'columns@{}'.format(varName) for varName in metadata.varNames
        ]

        # This should probably be somewhere in the metadata
        # weight_variable_name = metadata.caseWeightVar

        # Descriptions of attributes in metadata are are located here :
        # http://pythonhosted.org/savReaderWriter/#savwriter-write-spss-system-files
        for column in metadata.varNames:
            meta['columns'][column] = {}
            meta['columns'][column]['name'] = column
            meta['columns'][column]['parent'] = {}
            if column in metadata.valueLabels:
                # ValueLabels is type = 'single' (possibry 1-1 map)
                meta['columns'][column]['values'] = []
                meta['columns'][column]['type'] = "single"
                for value, text in metadata.valueLabels[column].items():
                    values = {
                        'text': {
                            text_key: str(text)
                        },
                        'value': int(value)
                    }
                    meta['columns'][column]['values'].append(values)
            else:
                if column in metadata.formats:
                    f = metadata.formats[column]
                    if 'DATETIME' in f:
                        if dates_as_strings:
                            # DATETIME fields from SPSS are currently
                            # being read in as strings because there's an
                            # as-yet undetermined discrepancy between the
                            # input and output dates if datetime64 is used
                            meta['columns'][column]['type'] = 'string'
                        else:
                            meta['columns'][column]['type'] = 'date'
                            data[column] = pd.to_datetime(data[column])
                    elif f.startswith('A'):
                        meta['columns'][column]['type'] = 'string'
                    elif '.' in f:
                        meta['columns'][column]['type'] = "float"
                    else:
                        meta['columns'][column]['type'] = "int"
                else:
                    # Infer meta from data
                    if data is not None:
                        # print "VAR '{}' NOT IN value_labels".format(column)
                        column_values = data[column].dropna()
                        if len(column_values) > 0:
                            # Get the first "not nan" value from the column
                            value = column_values.values[0]
                            if isinstance(value, pd.np.float64):
                                # Float AND Int because savReaderWriter loads them both as float64
                                meta['columns'][column]['text'] = {
                                    text_key: [column]
                                }
                                meta['columns'][column]['type'] = "float"
                                if (data[column].dropna() % 1).sum() == 0:
                                    if (data[column].dropna() %
                                            1).unique() == [0]:
                                        try:
                                            data[column] = data[column].astype(
                                                'int')
                                        except:
                                            pass
                                        meta['columns'][column]['type'] = "int"

                            elif isinstance(value, str) or isinstance(
                                    value, str):
                                # Strings
                                meta['columns'][column]['text'] = {
                                    text_key: [column]
                                }
                                meta['columns'][column]['type'] = "string"

            if column in metadata.varTypes:
                pass

            if column in metadata.varSets:
                pass

            if column in metadata.varAttributes:
                pass

            if column in metadata.varRoles:
                pass

            if column in metadata.measureLevels:
                pass

            # Some labels are empty strings.note
            if column in metadata.varLabels:
                meta['columns'][column]['text'] = {
                    text_key: metadata.varLabels[column]
                }

        for mrset in metadata.multRespDefs:
            # meta['masks'][mrset] = {}
            # 'D' is "multiple dichotomy sets" in SPSS
            # 'C' is "multiple category sets" in SPSS
            varNames = list(metadata.multRespDefs[mrset]['varNames'])
            # Find the index where there delimited set should be inserted
            # into data, which is immediately prior to the start of the
            # dichotomous set columns
            dls_idx = data.columns.tolist().index(varNames[0])
            if metadata.multRespDefs[mrset]['setType'] == 'C':
                # Raise if value object of columns is not equal
                if not all(meta['columns'][v]['values'] == meta['columns'][
                        varNames[0]]['values'] for v in varNames):
                    msg = 'Columns must have equal values to be combined in a set: {}'
                    raise ValueError(msg.format(varNames))
                # Concatenate columns to set
                df_str = data[varNames].astype('str')
                dls = df_str.apply(lambda x: ';'.join([
                    v.replace('.0', '')
                    for v in x.tolist() if not v in ['nan', 'None']
                ]),
                                   axis=1) + ';'
                dls.replace({';': np.NaN}, inplace=True)
                # Get value object
                values = meta['columns'][varNames[0]]['values']

            elif metadata.multRespDefs[mrset]['setType'] == 'D':
                # Generate the delimited set from the dichotomous set
                dls = condense_dichotomous_set(data[varNames],
                                               values_from_labels=False,
                                               **dichot)
                # Get value object
                values = [{
                    'text': {
                        text_key: metadata.varLabels[varName]
                    },
                    'value': int(v)
                } for v, varName in enumerate(varNames, start=1)]
            else:
                continue
            # Insert the delimited set into data
            data.insert(dls_idx, mrset, dls)
            # Generate the column meta for the new delimited set
            meta['columns'][mrset] = {
                'name': mrset,
                'type': 'delimited set',
                'text': {
                    text_key: metadata.multRespDefs[mrset]['label']
                },
                'parent': {},
                'values': values
            }
            # Add the new delimited set to the 'data file' set
            df_items = meta['sets']['data file']['items']
            df_items.insert(df_items.index('columns@{}'.format(varNames[0])),
                            'columns@{}'.format(mrset))

            data = data.drop(varNames, axis=1)
            for varName in varNames:
                df_items.remove('columns@{}'.format(varName))
                del meta['columns'][varName]

        return meta, data