print "Second argument expected: path to a labels' file."
     sys.exit(-1)
 try:
     out_path = sys.argv[3]
 except:
     print "Third argument expected: output-report file."
     sys.exit(-1)        
 #try:
 #    training_size = int(sys.argv[3])
 #except:
 #    print "Third argument expected: how many samples should be used for training."
 #    sys.exit(-1) 
     
     
 print "Loading labels' file:",  labels_path
 labels = jrs_io.load_labels(open(labels_path))
 single_labels = list(set(reduce(lambda l1,l2: l1+l2, (ll for ll in labels))))
 #training_labels = labels[:training_size]
 n = len(labels)    
 print n," multi-labels sets loaded (",len(single_labels),"single labels:",single_labels,")..."
 print "Sample five labels:", labels[:5]
 print "------------------------------------------"
 
 print "Extracting label occurrence vectors"
 label2occurrences = dict( (label,label_ocur(labels, label)) for label in single_labels )
 #print label2occurrences    
 print "------------------------------------------"
 
 print "Loading features from file:",  features_matrix_path
 f = open(features_matrix_path)
 features = jrs_io.load_data(f, cast_method = float, numrows = LOAD_MAX_ROWS)
    except:
        print "Argument expected: path to a labels' file."
        sys.exit(-1)
    try:
        labels2_path = sys.argv[2]
    except:
        print "Argument expected: path to a second labels' file."
        sys.exit(-1)
    try:
        out_path = sys.argv[3]
    except:
        print "Argument expected: output labels' file."
        sys.exit(-1)

    print "Loading labels' 1 file:", labels1_path
    labels1 = jrs_io.load_labels(open(labels1_path))

    try:
        print "Loading labels' 2 file:", labels2_path
        labels2 = jrs_io.load_labels(open(labels2_path))
    except:
        print "Failed loading file", labels2_path
        print "Using empty file"
        labels2 = [[] for i in xrange(len(labels1))]

    labels12 = []
    for i in xrange(len(labels1)):
        labels12.append(sorted(set(labels1[i] + labels2[i])))

    print "Writing to 1+2 file:", out_path
    jrs_io.store_labels(open(out_path, "w"), labels12)
        print "Next argument expected: path to a pos-features-ind file."
        sys.exit(-1)
    try:
        negfeaturesind_path = sys.argv[4]
    except:
        print "Next argument expected: path to a neg-features-ind file."
        sys.exit(-1)            
    try:
        out_path = sys.argv[5]
    except:
        print "Next argument expected: output-features file."
        sys.exit(-1)        
        
        
    print "Loading labels' file:",  labels_path
    labels = jrs_io.load_labels(open(labels_path))
    single_labels = list(set(reduce(lambda l1,l2: l1+l2, (ll for ll in labels))))
    n = len(labels)    
    print n," multi-labels sets loaded (",len(single_labels),"single labels:",single_labels,")..."
    print "------------------------------------------"
    
    print "Extracting label occurrence vectors"
    label2occurrences = dict( (label,label_ocur(labels, label)) for label in single_labels )        
    #print label2occurrences    
    print "------------------------------------------"

    print "Loading features from file:",  features_matrix_path
    f = open(features_matrix_path)
    features = jrs_io.load_data(f, cast_method = int, numrows = LOAD_MAX_ROWS)
    print "","loaded", len(features),"x",len(features[0])
    print "------------------------------------------"
 except:
     print "Argument expected: path to a labels' file."
     sys.exit(-1)
 try:
     labels2_path = sys.argv[2]
 except:
     print "Argument expected: path to a second labels' file."
     sys.exit(-1)        
 try:
     out_path = sys.argv[3]
 except:
     print "Argument expected: output labels' file."
     sys.exit(-1)      
                     
 print "Loading labels' 1 file:",  labels1_path
 labels1 = jrs_io.load_labels(open(labels1_path))
 
 try:
     print "Loading labels' 2 file:",  labels2_path
     labels2 = jrs_io.load_labels(open(labels2_path))
 except:
     print "Failed loading file", labels2_path
     print "Using empty file"
     labels2 = [[] for i in xrange(len(labels1))]
 
 labels12 = []
 for i in xrange(len(labels1)):
      labels12.append( sorted(set(labels1[i]+labels2[i])) )
      
      
 print "Writing to 1+2 file:", out_path
Exemplo n.º 5
0
        sys.exit(-1)

    try:
        fmeasure_paths = sys.argv[3]
        fmeasure_paths = fmeasure_paths.split(',')
    except:
        print "Third argument expected: list of fmeasure files separated with <,> (comma)"
        print "Argument not given using default..."
        fmeasure_paths = [DEFAULT_FMEASURE_PATH for path in paths]

    print "LOADING JRS-OUTPUT-FILES:", paths
    files_labels = []  #at position list of lists of labels
    for path in paths:
        print "", "loading:", path
        multilabels = jrs_io.load_labels(open(path),
                                         cast_method=int,
                                         separator=',')
        print "", "sample:", multilabels[:5]
        files_labels.append(multilabels)

    print "LOADING FMEASURE-FILES:", fmeasure_paths
    files_fmeasures = []  #at position dictionary{label_no: fmeasure}
    for path in fmeasure_paths:
        print "", "loading:", path
        label2fmeasure = {}
        for line in open(path).xreadlines():
            #print "","line:",line
            if len(line.strip()) == 0: break
            label_no = int(line.split()[0])
            f1 = float(line.split()[1])
            label2fmeasure[label_no] = f1
        sys.exit(-1)

    try:
        out_path = sys.argv[3]
    except:
        print "Third argument expected: output file"
        sys.exit(-1)

    try:
        label_no = int(sys.argv[4])
    except:
        print "Argument expected: label to be overwritten no."
        sys.exit(-1)

    print "Loading labels' 1 file:", main_file
    labels1 = jrs_io.load_labels(open(main_file))
    print "", len(labels1), "rows loaded"

    print "Loading labels' (weka-csv) 2 file:", aux_file
    # inst#,actual,predicted,error,prediction
    lines = open(aux_file).readlines()
    labels2bool = list(line.split(",")[2].find("present") >= 0 for line in lines[1:] if len(line.strip()) > 0)
    print "", len(labels2bool), "rows loaded"
    # print labels2bool

    removed = 0
    added = 0
    for i in xrange(len(labels1)):
        if labels2bool[i]:
            if not label_no in labels1[i]:
                print "adding label no", label_no, "to", i, "sample"
        sys.exit(-1)

    try:
        out_path = sys.argv[3]
    except:
        print "Third argument expected: output file"
        sys.exit(-1)

    try:
        label_no = int(sys.argv[4])
    except:
        print "Argument expected: label to be overwritten no."
        sys.exit(-1)

    print "Loading labels' 1 file:", main_file
    labels1 = jrs_io.load_labels(open(main_file))
    print "", len(labels1), "rows loaded"

    print "Loading labels' (weka-csv) 2 file:", aux_file
    #inst#,actual,predicted,error,prediction
    lines = open(aux_file).readlines()
    labels2bool = list(
        line.split(",")[2].find("present") >= 0 for line in lines[1:]
        if len(line.strip()) > 0)
    print "", len(labels2bool), "rows loaded"
    #print labels2bool

    removed = 0
    added = 0
    for i in xrange(len(labels1)):
        if labels2bool[i]:
     sys.exit(-1)
     
 try:
     fmeasure_paths = sys.argv[3]
     fmeasure_paths = fmeasure_paths.split(',') 
 except:
     print "Third argument expected: list of fmeasure files separated with <,> (comma)"
     print "Argument not given using default..."
     fmeasure_paths = [DEFAULT_FMEASURE_PATH for path in paths]        
  
                
 print "LOADING JRS-OUTPUT-FILES:",paths         
 files_labels = [] #at position list of lists of labels
 for path in paths:
     print "","loading:", path
     multilabels = jrs_io.load_labels(open(path), cast_method = int, separator=',')
     print "","sample:",multilabels[:5]
     files_labels.append( multilabels )
 
 print "LOADING FMEASURE-FILES:",fmeasure_paths         
 files_fmeasures = [] #at position dictionary{label_no: fmeasure}
 for path in fmeasure_paths:
     print "","loading:", path
     label2fmeasure = {}
     for line in open(path).xreadlines():
         #print "","line:",line
         if len(line.strip()) == 0: break
         label_no = int(line.split()[0])
         f1 = float(line.split()[1])    
         label2fmeasure[label_no] = f1
     print "","sample:",list(label2fmeasure.iteritems())[:5]