示例#1
0
def main():
    parser = argparse.ArgumentParser(description="Extracts random samples from datasets, supports multiple parallel datasets (such as parallel corpora), provided that corresponding data is on the same line.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-t','--testsetsize', help="Test set size (lines)", type=float, action='store',default=0)
    parser.add_argument('-d','--devsetsize', help="Development set size (lines)", type=float, action='store',default=0)
    parser.add_argument('-T','--trainsetsize', help="Training set size (lines), leave unassigned (0) to automatically use all of the remaining data", type=float, action='store',default=0)
    parser.add_argument('-S','--seed', help="Seed for random number generator", type=int, action='store',default=0)
    parser.add_argument('files', type=str, nargs='+', help="The data sets to sample from, must be of equal size (i.e., same number of lines)")

    args = parser.parse_args()
    if args.seed:
        random.seed(args.seed)

    if args.testsetsize == 0:
        print("ERROR: Specify at least a testset size!",file=sys.stderr)
        sys.exit(2)

    try:
        if not args.files:
            print("ERROR: Specify at least one file!",file=sys.stderr)
            sys.exit(2)
    except:
        print("ERROR: Specify at least one file!",file=sys.stderr)
        sys.exit(2)

    filesampler(args.files, args.testsetsize, args.devsetsize, args.trainsetsize)
示例#2
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ht:d:S:T:", ["help"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print(str(err),file=sys.stderr)
        usage()
        sys.exit(2)

    trainsetsize = testsetsize = devsetsize = 0

    for o, a in opts:
        if o == "-t":
            try:
                testsetsize = int(a)
            except:
                try:
                    testsetsize = float(a)
                except:
                    print("ERROR: Invalid testsize",file=sys.stderr)
                    sys.exit(2)
        elif o == "-d":
            try:
                devsetsize = int(a)
            except:
                try:
                    devsetsize = float(a)
                except:
                    print("ERROR: Invalid devsetsize",file=sys.stderr)
                    sys.exit(2)
        elif o == '-T':
            try:
                trainsetsize = int(a)
            except:
                try:
                    trainsetsize = float(a)
                except:
                    print("ERROR: Invalid trainsetsize",file=sys.stderr)
                    sys.exit(2)
        elif o == "-S":
            random.seed(int(a))
        elif o == "-h":
            usage()
            sys.exit(0)
        else:
            print("ERROR: No such option: ",o,file=sys.stderr)
            sys.exit(2)

    if testsetsize == 0:
        print("ERROR: Specify at least a testset size!",file=sys.stderr)
        usage()
        sys.exit(2)
    elif len(args) == 0:
        print("ERROR: Specify at least one file!",file=sys.stderr)
        usage()
        sys.exit(2)

    filesampler(args, testsetsize, devsetsize, trainsetsize)
示例#3
0
文件: sampler.py 项目: zzmjohn/pynlpl
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ht:d:S:T:", ["help"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print(str(err), file=sys.stderr)
        usage()
        sys.exit(2)

    trainsetsize = testsetsize = devsetsize = 0

    for o, a in opts:
        if o == "-t":
            try:
                testsetsize = int(a)
            except:
                try:
                    testsetsize = float(a)
                except:
                    print("ERROR: Invalid testsize", file=sys.stderr)
                    sys.exit(2)
        elif o == "-d":
            try:
                devsetsize = int(a)
            except:
                try:
                    devsetsize = float(a)
                except:
                    print("ERROR: Invalid devsetsize", file=sys.stderr)
                    sys.exit(2)
        elif o == '-T':
            try:
                trainsetsize = int(a)
            except:
                try:
                    trainsetsize = float(a)
                except:
                    print("ERROR: Invalid trainsetsize", file=sys.stderr)
                    sys.exit(2)
        elif o == "-S":
            random.seed(int(a))
        elif o == "-h":
            usage()
            sys.exit(0)
        else:
            print("ERROR: No such option: ", o, file=sys.stderr)
            sys.exit(2)

    if testsetsize == 0:
        print("ERROR: Specify at least a testset size!", file=sys.stderr)
        usage()
        sys.exit(2)
    elif len(args) == 0:
        print("ERROR: Specify at least one file!", file=sys.stderr)
        usage()
        sys.exit(2)

    filesampler(args, testsetsize, devsetsize, trainsetsize)
示例#4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Extracts random samples from datasets, supports multiple parallel datasets (such as parallel corpora), provided that corresponding data is on the same line.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-t',
                        '--testsetsize',
                        help="Test set size (lines)",
                        type=float,
                        action='store',
                        default=0)
    parser.add_argument('-d',
                        '--devsetsize',
                        help="Development set size (lines)",
                        type=float,
                        action='store',
                        default=0)
    parser.add_argument(
        '-T',
        '--trainsetsize',
        help=
        "Training set size (lines), leave unassigned (0) to automatically use all of the remaining data",
        type=float,
        action='store',
        default=0)
    parser.add_argument('-S',
                        '--seed',
                        help="Seed for random number generator",
                        type=int,
                        action='store',
                        default=0)
    parser.add_argument(
        'files',
        type=str,
        nargs='+',
        help=
        "The data sets to sample from, must be of equal size (i.e., same number of lines)"
    )

    args = parser.parse_args()
    if args.seed:
        random.seed(args.seed)

    if args.testsetsize == 0:
        print("ERROR: Specify at least a testset size!", file=sys.stderr)
        sys.exit(2)

    try:
        if not args.files:
            print("ERROR: Specify at least one file!", file=sys.stderr)
            sys.exit(2)
    except:
        print("ERROR: Specify at least one file!", file=sys.stderr)
        sys.exit(2)

    filesampler(args.files, args.testsetsize, args.devsetsize,
                args.trainsetsize)
示例#5
0
                print("ERROR: Invalid testsize",file=sys.stderr)
                sys.exit(2)
    elif o == "-d":
        try:
            devsetsize = int(a)
        except:
            try:
                devsetsize = float(a)
            except:
                print("ERROR: Invalid devsetsize",file=sys.stderr)
                sys.exit(2)
    elif o == "-h":
        usage()
        sys.exit(0)
    else:
        print("ERROR: No such option: ",o,file=sys.stderr)
        sys.exit(2)

if testsetsize == 0:
    print("ERROR: Specify at least a testset size!",file=sys.stderr)
    usage()
    sys.exit(2)
elif len(args) == 0:
    print("ERROR: Specify at least one file!",file=sys.stderr)
    usage()
    sys.exit(2)

filesampler(args, testsetsize, devsetsize)