# Argument parser parser = argparse.ArgumentParser(description="RCNLP - Authorship attribution with Echo State Network") # Argument parser.add_argument("--dataset", type=str, help="Dataset's directory.") parser.add_argument("--author1", type=str, help="First author.", default="1") parser.add_argument("--author2", type=str, help="Second author.", default="2") parser.add_argument("--training-size", type=int, help="Training size.", default=4) parser.add_argument("--test-size", type=int, help="Test size.", default=40) parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20) parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en') args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name(locals())) logging.save_globals() logging.save_variables(locals()) # >> 4. Generate W w = mdp.numx.random.choice([0.0, 1.0], (rc_size, rc_size), p=[1.0 - rc_w_sparsity, rc_w_sparsity]) w[w == 1] = mdp.numx.random.rand(len(w[w == 1])) # Inputs reps = dict() reps['pos'] = [-1] reps['tag'] = [-1, 20] reps['fw'] = [-1, 60, 40, 20] reps['wv'] = [-1, 60, 40, 20] reps['letter'] = [-1]
r_data = [t_in, zip(t_in, t_out)] # Train r_flow.train(r_data) return r_flow # end create_reservoir #################################################### # Main function #################################################### if __name__ == "__main__": # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value="LR=0.9vs0.05_slen=5000_mlen=1000_size=50") logging.save_globals() # Symbols switching_symbols = [1] other_symbols = [[0]] # Save locals logging.save_variables(locals()) # Generate the data set generator = RCNLPSwitchingAttractorLanguage(tag_symbol=switching_symbols, other_symbols=other_symbols, memory_length=ds_memory_length, sparsity=ds_sparsity) inputs, outputs = generator.generate_data_set(sample_length=ds_sample_length, n_samples=ds_data_set_size) # Training and test
help="PCA model to load", default=None) parser.add_argument( "--in-components", type=int, help="Number of principal component to reduce inputs to.", default=-1) parser.add_argument("--sentence", action='store_true', help="Test sentence classification rate?", default=False) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "tag":
default=None) parser.add_argument( "--in-components", type=int, help="Number of principal component to reduce inputs to.", default=-1) parser.add_argument("--samples", type=int, help="Samples", default=20) parser.add_argument("--step", type=int, help="Step for training size value", default=5) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "tag":
# Argument parser.add_argument("--dataset", type=str, help="Dataset's directory.") parser.add_argument("--author1", type=int, help="Author 1' ID.") parser.add_argument("--author2", type=int, help="Author 2's ID.") parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20) parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en') parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos') parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None) parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.", default=-1) parser.add_argument("--k", type=int, help="n-Fold Cross Validation.", default=10) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name(locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "tag": converter = RCNLPTagConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "fw":
# Argument parser.add_argument("--dataset", type=str, help="Dataset's directory.") parser.add_argument("--author1", type=str, help="Author 1' ID.") parser.add_argument("--author2", type=str, help="Author 2's ID.") parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en') parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos') parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None) parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.", default=-1) parser.add_argument("--K", type=int, help="n-Fold Cross Validation", default=10) parser.add_argument("--k", type=int, help="Fold position to use", default=0) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name(locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model is not None: pca_model = pickle.load(open(args.pca_model, 'r')) # end if # Base converter base_converter = ReverseConverter() # Reverse WV converter reverse_wv_converter = WVConverter(pca_model=pca_model, upper_level=base_converter)
type=int, help="Number of reservoir to generate.", default=20) parser.add_argument("--pca-model", type=str, help="PCA model to load", default='') parser.add_argument("--output", type=str, help="Where to save the reservoir.", default="reservoir.p") args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # Results state_results = np.array([]) doc_results = np.array([]) # >> 1. Convert the text to symbolic or continuous representations
# Argument parser.add_argument("--dataset", type=str, help="Dataset's directory.") parser.add_argument("--author", type=int, help="Author's ID.") parser.add_argument("--training-size", type=int, help="How many texts from the author to use in the training") parser.add_argument("--negative-samples", type=int, help="How many texts from other authors to use as negative " "examples") parser.add_argument("--test-size", type=int, help="How many texts from each authors to test the model?") parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en') parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos') parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None) parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.", default=-1) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name(locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = PosConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "tag": converter = TagConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "fw":
help="Training size.", default=4) parser.add_argument("--test-size", type=int, help="Test size.", default=40) parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20) parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en') args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # Generate W w = mdp.numx.random.choice([0.0, 1.0], (rc_size, rc_size), p=[1.0 - rc_w_sparsity, rc_w_sparsity]) w[w == 1] = mdp.numx.random.rand(len(w[w == 1])) # Init original_size_perf = np.array([]) none_size_perf = np.array([]) # Inputs reps = dict()
type=int, help="Number of principal component to reduce inputs to.", default=-1) parser.add_argument("--model", type=str, help="ESN model to load", default='') parser.add_argument("--output", type=str, help="Output filename where to save the model.", default=None) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) if args.model == '': # PCA model pca_model = None if args.pca_model is not None: pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model)
type=int, help="n-Fold Cross Validation", default=10) parser.add_argument("--samples", type=int, help="Number of reservoir to sample", default=50) parser.add_argument("--verbose", action='store_true', help="Verbose mode", default=False) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # Choose a text to symbol converter if args.converter == "pos": converter = PosConverter(lang=args.lang, resize=args.in_components, pca_model=pca_model)
type=int, help="Step for reservoir size value", default=50) parser.add_argument("--min", type=int, help="Minimum reservoir size value", default=10) parser.add_argument("--max", type=int, help="Maximum reservoir size value", default=1000) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "tag":
ds_memory_length = 140 # How long time to remember the entry ds_training_length = 30 # Training set length (number of samples) ds_test_length = ds_data_set_size - ds_training_length ds_sample_length = 3000 # Length of a sample ds_slopping_memory = False # Is the memory slowly fading away? ds_sparsity = 0 # Number of samples with no switching #################################################### # Main function #################################################### if __name__ == "__main__": # Logging logging = RCNLPLogging( exp_name=ex_name, exp_inst=ex_instance, exp_value= "LR=0.5_size=100_slen=3000_mlen=140_IS=0.1_sparsity=0.05to1.0_2dim") logging.save_globals() # Symbols switching_symbol = [1, 0] #switch_back_symbol = [-1] other_symbols = [[0, 0], [0, 1]] # Save locals logging.save_variables(locals()) # Parameter average results parameter_remembering_rates = [] parameter_lucidity = []
type=int, help="Number of states to show", default=500) parser.add_argument("--samples", type=int, help="Samples to estimate performances", default=20) parser.add_argument("--pca-model", type=str, help="PCA model to load", default='') args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name( locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # Results to analyze explore_results = np.array([]) explore_deviation = np.array([]) explore_t_test = np.array([])
parser.add_argument("--dataset", type=str, help="Dataset's directory.") parser.add_argument("--training-size", type=int, help="Number of texts from the author", default=1) parser.add_argument("--test-size", type=int, help="Number of texts to assess the model.", default=20) parser.add_argument("--negatives", type=int, help="Number of negative texts to use", default=1) parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20) parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en') parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos') parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None) parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.", default=-1) parser.add_argument("--threshold", type=float, help="Confidence threshold", default=0.5) parser.add_argument("--sentence", action='store_true', help="Test sentence classification rate?", default=False) args = parser.parse_args() # Logging logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value=RCNLPLogging.generate_experience_name(locals())) logging.save_globals() logging.save_variables(locals()) # PCA model pca_model = None if args.pca_model != "": pca_model = pickle.load(open(args.pca_model, 'r')) # end if # >> 1. Choose a text to symbol converter. if args.converter == "pos": converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "tag": converter = RCNLPTagConverter(resize=args.in_components, pca_model=pca_model) elif args.converter == "fw":