Пример #1
0
def main():
    if len(sys.argv) < 2:
        logger.error("Too few arguments. Usage: python ex2rsd.py <dataset>.tab <outFile>.pl")
    
    # Get the dataset's path
    try:
        logger.debug("Data set: % s" % sys.argv[1])
        data = ExampleTable(sys.argv[1])
    except:
        logger.error("Couldn't read the data set!")
    
    # Get the output file name.
    try:
        logger.debug("Out file: % s" % sys.argv[2])
        out = open("%s" % sys.argv[2], 'w')
    except:
        logger.error("Couldn't create the out file: %s" % sys.argv[2])
    
    clVar = data.domain.classVar.name
    for ex in data:
        first = True   # Just for proper comma handling.
        terms = ""
        for attrVal in filter(lambda x: x.variable.name != clVar, ex):
            #terms = terms + "%s\"%s\"" % ("" if first else ", ", attrVal.value)
            terms = terms + "%s%s" % ("" if first else ", ", attrVal.value.lower())
            if first:
                first = False
        #print "individual(%s, [%s])." % (ex.getclass().value.lower(), terms)
        out.write("individual(%s, [%s]).\n" % (ex.getclass().value.lower(), terms))
    
    out.close()
Пример #2
0
def main():
    global map_available    
    
    if len(sys.argv) < 4:
        logger.error("Too few arguments. Usage: python ex2segs.py <dataset>.tab <outFile> <positiveClass> <map, yes or no>")
    
    # Get the dataset's path
    try:
        logger.debug("Data set: % s" % sys.argv[1])
        data = ExampleTable(sys.argv[1])
    except:
        logger.error("Couldn't read the data set!")
    
    # Get the output file name.
    try:
        logger.debug("Out file: % s" % sys.argv[2])
        out = open(sys.argv[2], 'w')
    except:
        logger.error("Couldn't create the out file: %s" % sys.argv[2])
    
    # Get the positive class name.
    logger.debug("Positive class: % s" % sys.argv[3])
    posClass = sys.argv[3]
    if posClass not in data.domain.classVar.values:
        logger.error("The specified value for the positive class is undefined.")
    
    map_available = sys.argv[4].lower() == "yes"
        
    basename = os.path.basename(sys.argv[1]).split('.')[0]
    outDir = os.path.dirname(sys.argv[2])
    
    # Create the csv file.
    csv = open(os.path.normpath('%s/%s.csv') % (outDir,basename), 'w')
    logger.debug('csv path: %s' % os.path.normpath('%s/%s.csv') % (outDir,basename))
    
    # Sort by class values.
    data.sort(data.domain.classVar)
    
    # Convert to SEGS file.
    # First write the positive examples.
    pos = data.filter({data.domain.classVar : posClass})
    neg = data.filter({data.domain.classVar : posClass}, negate=1)
    ratio = 0.5
    
    for ex in pos:
        writeExample(data, out, csv, ex, ratio)
    # Then write all other examples.
    for ex in neg:
        writeExample(data, out, csv, ex, ratio)
    # Tidy up.
    out.flush()
    out.close()
    csv.flush()
    csv.close()
    
    out = open(sys.argv[2], 'r')
    out2 = open('g2ont', 'w')
    
    out2.write(out.read())
    out2.close()
    out.close()
    
    logger.debug(open(os.path.normpath('%s/%s.csv') % (outDir,basename), 'r').read())
    
    # Remember some info about the data set. This can be later used for processing the SEGS results.
    # Deprecated! This isn't used anymore.
    dataInfo = open('scripts/dataInfo.py', 'a')
    dataInfo.write('posClass = "%s=%s"\n' % (data.domain.classVar.name, posClass))
    dataInfo.close()