def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) ## load entities from json file log("loading entity list") entities = json.load(urllib.urlopen("https://s3.amazonaws.com/trec-kba-2012/entity-urlnames.json")) self.entity_representations = toy_kba_algorithm.prepare_entities(entities)
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) for index, ar in enumerate(sys.argv): if ar == '--config-file': path = sys.argv[index + 1] path = path[path.rfind('/') + 1:] self.lines = [line for line in open(path, "r", encoding="utf-8")]
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) self.clusters = dict() self.names = dict() self.cluster_from_file = [ cluster_line for cluster_line in open( self.options.clusters[0], "r", encoding="utf-8") ]
def __init__(self, args): lines = [line1.strip() for line1 in sys.stdin] temp_list=list() for i in range(0,len(lines)-1): temp_list.append(lines[i].replace('"', '\\"').strip('\n')) temp_list.append(lines[i+1].replace('"', '\\"').strip('\n')) args=temp_list MRJob.__init__(self, args) yield self,args
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) cluster_path = self.get_cluster_file_path() self.clusters = dict() self.names = dict() self.cluster_from_file = [cluster_line for cluster_line in open('clusteronly.txt', "r", encoding="utf-8")] self.data_lines = [line for line in open('dataonly.txt', "r", encoding="utf-8")]
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) self.initial_counts = np.zeros(num_states) self.emission_probabilities = np.random.random((num_observations, num_states)) self.emission_probabilities /= self.emission_probabilities.sum(1)[:,None] self.initial_probabilities = np.random.random(num_states) self.initial_probabilities /= self.initial_probabilities.sum() self.final_probabilities = np.random.random(num_states) self.final_probabilities /= self.final_probabilities.sum() self.transition_probabilities = np.random.random((num_states, num_states)) self.transition_probabilities /= self.transition_probabilities.sum(1)[:,None]
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) from os import path filename = 'hmm.pkl' if path.exists(filename): self.hmm = pickle.loads(open(filename).read().decode('string-escape')) else: # Initialize the HMM parameters randomly. self.hmm = HMM(word_dict, tag_dict) self.hmm.initialize_random() self.log_likelihood = 0 self.initial_counts = 0 self.emission_counts = 0 self.transition_counts = 0 self.final_counts = 0
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) self.initial_counts = np.zeros(num_states) self.emission_probabilities = np.random.random( (num_observations, num_states)) self.emission_probabilities /= self.emission_probabilities.sum(1)[:, None] self.initial_probabilities = np.random.random(num_states) self.initial_probabilities /= self.initial_probabilities.sum() self.final_probabilities = np.random.random(num_states) self.final_probabilities /= self.final_probabilities.sum() self.transition_probabilities = np.random.random( (num_states, num_states)) self.transition_probabilities /= self.transition_probabilities.sum( 1)[:, None]
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) # Create HMM object. self.hmm = HMM(word_dict, tag_dict) from os import path filename = 'hmm.txt' if path.exists(filename): # Load the HMM parameters from a text file. load_parameters(filename, self.hmm, smoothing=0.1) else: # Initialize the HMM parameters randomly. self.hmm.initialize_random() self.log_likelihood = 0 self.initial_counts = 0 self.emission_counts = 0 self.transition_counts = 0 self.final_counts = 0
def __init__(self, args): MRJob.__init__(self, args)
def __init__(self, args): MRJob.__init__(self, args=args)
def __init__(self, *args, **kwargs): MRJob.__init__(self, *args, **kwargs) self.hmm = hmm