def main(): parser = argparse.ArgumentParser(description="Extracts random samples from datasets, supports multiple parallel datasets (such as parallel corpora), provided that corresponding data is on the same line.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-t','--testsetsize', help="Test set size (lines)", type=float, action='store',default=0) parser.add_argument('-d','--devsetsize', help="Development set size (lines)", type=float, action='store',default=0) parser.add_argument('-T','--trainsetsize', help="Training set size (lines), leave unassigned (0) to automatically use all of the remaining data", type=float, action='store',default=0) parser.add_argument('-S','--seed', help="Seed for random number generator", type=int, action='store',default=0) parser.add_argument('files', type=str, nargs='+', help="The data sets to sample from, must be of equal size (i.e., same number of lines)") args = parser.parse_args() if args.seed: random.seed(args.seed) if args.testsetsize == 0: print("ERROR: Specify at least a testset size!",file=sys.stderr) sys.exit(2) try: if not args.files: print("ERROR: Specify at least one file!",file=sys.stderr) sys.exit(2) except: print("ERROR: Specify at least one file!",file=sys.stderr) sys.exit(2) filesampler(args.files, args.testsetsize, args.devsetsize, args.trainsetsize)
def main(): try: opts, args = getopt.getopt(sys.argv[1:], "ht:d:S:T:", ["help"]) except getopt.GetoptError as err: # print help information and exit: print(str(err),file=sys.stderr) usage() sys.exit(2) trainsetsize = testsetsize = devsetsize = 0 for o, a in opts: if o == "-t": try: testsetsize = int(a) except: try: testsetsize = float(a) except: print("ERROR: Invalid testsize",file=sys.stderr) sys.exit(2) elif o == "-d": try: devsetsize = int(a) except: try: devsetsize = float(a) except: print("ERROR: Invalid devsetsize",file=sys.stderr) sys.exit(2) elif o == '-T': try: trainsetsize = int(a) except: try: trainsetsize = float(a) except: print("ERROR: Invalid trainsetsize",file=sys.stderr) sys.exit(2) elif o == "-S": random.seed(int(a)) elif o == "-h": usage() sys.exit(0) else: print("ERROR: No such option: ",o,file=sys.stderr) sys.exit(2) if testsetsize == 0: print("ERROR: Specify at least a testset size!",file=sys.stderr) usage() sys.exit(2) elif len(args) == 0: print("ERROR: Specify at least one file!",file=sys.stderr) usage() sys.exit(2) filesampler(args, testsetsize, devsetsize, trainsetsize)
def main(): try: opts, args = getopt.getopt(sys.argv[1:], "ht:d:S:T:", ["help"]) except getopt.GetoptError as err: # print help information and exit: print(str(err), file=sys.stderr) usage() sys.exit(2) trainsetsize = testsetsize = devsetsize = 0 for o, a in opts: if o == "-t": try: testsetsize = int(a) except: try: testsetsize = float(a) except: print("ERROR: Invalid testsize", file=sys.stderr) sys.exit(2) elif o == "-d": try: devsetsize = int(a) except: try: devsetsize = float(a) except: print("ERROR: Invalid devsetsize", file=sys.stderr) sys.exit(2) elif o == '-T': try: trainsetsize = int(a) except: try: trainsetsize = float(a) except: print("ERROR: Invalid trainsetsize", file=sys.stderr) sys.exit(2) elif o == "-S": random.seed(int(a)) elif o == "-h": usage() sys.exit(0) else: print("ERROR: No such option: ", o, file=sys.stderr) sys.exit(2) if testsetsize == 0: print("ERROR: Specify at least a testset size!", file=sys.stderr) usage() sys.exit(2) elif len(args) == 0: print("ERROR: Specify at least one file!", file=sys.stderr) usage() sys.exit(2) filesampler(args, testsetsize, devsetsize, trainsetsize)
def main(): parser = argparse.ArgumentParser( description= "Extracts random samples from datasets, supports multiple parallel datasets (such as parallel corpora), provided that corresponding data is on the same line.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-t', '--testsetsize', help="Test set size (lines)", type=float, action='store', default=0) parser.add_argument('-d', '--devsetsize', help="Development set size (lines)", type=float, action='store', default=0) parser.add_argument( '-T', '--trainsetsize', help= "Training set size (lines), leave unassigned (0) to automatically use all of the remaining data", type=float, action='store', default=0) parser.add_argument('-S', '--seed', help="Seed for random number generator", type=int, action='store', default=0) parser.add_argument( 'files', type=str, nargs='+', help= "The data sets to sample from, must be of equal size (i.e., same number of lines)" ) args = parser.parse_args() if args.seed: random.seed(args.seed) if args.testsetsize == 0: print("ERROR: Specify at least a testset size!", file=sys.stderr) sys.exit(2) try: if not args.files: print("ERROR: Specify at least one file!", file=sys.stderr) sys.exit(2) except: print("ERROR: Specify at least one file!", file=sys.stderr) sys.exit(2) filesampler(args.files, args.testsetsize, args.devsetsize, args.trainsetsize)
print("ERROR: Invalid testsize",file=sys.stderr) sys.exit(2) elif o == "-d": try: devsetsize = int(a) except: try: devsetsize = float(a) except: print("ERROR: Invalid devsetsize",file=sys.stderr) sys.exit(2) elif o == "-h": usage() sys.exit(0) else: print("ERROR: No such option: ",o,file=sys.stderr) sys.exit(2) if testsetsize == 0: print("ERROR: Specify at least a testset size!",file=sys.stderr) usage() sys.exit(2) elif len(args) == 0: print("ERROR: Specify at least one file!",file=sys.stderr) usage() sys.exit(2) filesampler(args, testsetsize, devsetsize)