def handle(self, *args, **options): path_to_texts = os.path.join(BASE_DIR, 'input_texts') comp = TreeComparer() # Iterate over all the input texts, break each up into sentences and gather all into one # list of sentences. sr = SentenceReader() sentences = [] for file in os.listdir(path_to_texts): path = os.path.join(path_to_texts, file) sentences.extend([sentence for sentence in sr.get_sentences(path)]) # For each of the sentences, create a list of parsed objects. These are just tuples with, # the sentence, the list of chunks and the parse tree as a string print("*** Parsing sentences: ") parser = Parser() parsed_objects = [ parser.parse(sentence) for sentence in tqdm(sentences) ] # Iterate over the list of parsed objects. You need to create a Question for each chunk, with the # sentence and the parse tree for the chunk. # Make a list of Questions print("*** Creating Questions: ") translator = Translator() for par_obj in tqdm(parsed_objects): # Here we iterate over the chunks for each sentence and create a Question for each. whole_sentence = par_obj[0] sentence_tree_string = par_obj[2] sentence_object = Sentence.objects.get_or_create( sentence=whole_sentence, sentence_tree_string=sentence_tree_string)[0] for chunk in par_obj[1]: # check if suitable chunk_length = len(chunk.split(' ')) if 4 <= chunk_length <= 8: chunk_tree = parser.parse(chunk)[2] chunk_translation = translator.get_translation(chunk) question = Question.objects.get_or_create( sentence=sentence_object, chunk=chunk, chunk_translation=chunk_translation, chunk_tree_string=chunk_tree)[0] question.question_tree_string = comp.remove_chunk_from_parse_tree( question) question.save()
def _get_obj_to_save(self): """""" return StoresDB( store_id=self.store_id, store_name=self.store_name, url_prefix=Parser.parse_url_prefix_from_url(self.url_prefix), tag_name=self.tag_name, query=self.query, description=self.description, )
def get_features_pos(pk): parser = Parser() # get the card and tree string card = Card.objects.get(pk=pk) s = card.sentence.sentence tree = parser.parse(s)[2] # get a list of the tags in the parse tree x = re.findall(r'[A-HJ-Z]+', tree) # create a dict for this sentence dict = {} for tag in tags: dict[tag] = x.count(tag) # return an array of values for the dict return [val for val in dict.values()]
def __init__(self): # Get a logger self._log = Logger.get_instance() # Create the parser self._parser = Parser() # The topology self._topology = None # Factory loader. For each alternative, a new environment is loaded in accord with the alternative itself. self._loader = EnvironmentLoader() # ArgParse self._arg = argparse.ArgumentParser(description='Comparison Framework') self._arg.add_argument('-c', '--config-file', required=True, help='The framework configuration file.') self._arg.add_argument('-t', '--topology', required=True, help='The topology on which framework runs. It must be a GraphML file.')
def does_item_belong_to_store(cls, store_id, item_url): """""" store = StoresDB.objects.filter( store_id=store_id).only('url_prefix').first() return Parser.parse_url_prefix_from_url(item_url) == store.url_prefix
class ComparisonFramework(object): def __init__(self): # Get a logger self._log = Logger.get_instance() # Create the parser self._parser = Parser() # The topology self._topology = None # Factory loader. For each alternative, a new environment is loaded in accord with the alternative itself. self._loader = EnvironmentLoader() # ArgParse self._arg = argparse.ArgumentParser(description='Comparison Framework') self._arg.add_argument('-c', '--config-file', required=True, help='The framework configuration file.') self._arg.add_argument('-t', '--topology', required=True, help='The topology on which framework runs. It must be a GraphML file.') def __repr__(self): return "Comparison Framework v. 0.1" ''' Initialize the framework, namely take arguments, create the topology and parse the configuration file. ''' def _init(self): args = self._arg.parse_args() config_file = str(args.config_file) topology = str(args.topology) topology_path = os.path.abspath(topology) self._log.info(self.__class__.__name__, 'Creating the topology.') self._topology = Topology(topology_path) # Parse config_file self._log.info(self.__class__.__name__, 'Parsing configuration file.') self._parser.parse(config_file) ''' Run the framework ''' def run(self): self._init() # Run simulation: for each service to evaluate, create a simulation and delegate to the loader objects # the decision about the environment to load based on the alternatives services = self._parser.get_services() for service in services: # For each alternative of this service, create a simulation for alternative in service.get_alternatives(): ''' Creating overlay and adding it to the topology object. ''' self._log.info(self.__class__.__name__, 'Creating overlay for alternative %s.', alternative.get_name()) # Creating the overlay for current alternative overlay = alternative.create_overlay(self._topology.get_topology_from_graphml()) self._log.info(self.__class__.__name__, 'Adding overlay %s for alternative %s to the topology.', overlay.get_name(), alternative.get_name()) self._topology.add_overlay(overlay) ''' Loading environment, creating the simulation and running it. ''' self._log.info(self.__class__.__name__, 'Loading the environment for the alternative %s.', alternative) # Load an environment for the current alternative of this service environment = self._loader.load(alternative.get_environment()) # Create the simulation simulation = Simulation(self._topology, service, environment, alternative) self._log.info( self.__class__.__name__, 'A new simulation has been created for service %s and alternative %s.', service.get_name(), alternative) # Run the simulation simulation.start() simulation.join() self._log.info(self.__class__.__name__, 'All alternatives for service %s have been successfully tested.', service.get_name()) self._log.info(self.__class__.__name__, 'All services have been successfully tested; framework will stop.')
def _fetch_price(self): """The fetch_price method fetches a new price from the item's website""" return Parser.parse_price_from_url_page(self.url, self.store.tag_name, self.store.query)