Пример #1
0
    def handle(self, *args, **options):

        path_to_texts = os.path.join(BASE_DIR, 'input_texts')
        comp = TreeComparer()

        # Iterate over all the input texts, break each up into sentences and gather all into one
        # list of sentences.
        sr = SentenceReader()
        sentences = []
        for file in os.listdir(path_to_texts):
            path = os.path.join(path_to_texts, file)
            sentences.extend([sentence for sentence in sr.get_sentences(path)])

        # For each of the sentences, create a list of parsed objects. These are just tuples with,
        # the sentence, the list of chunks and the parse tree as a string
        print("*** Parsing sentences: ")
        parser = Parser()
        parsed_objects = [
            parser.parse(sentence) for sentence in tqdm(sentences)
        ]

        # Iterate over the list of parsed objects. You need to create a Question for each chunk, with the
        # sentence and the parse tree for the chunk.

        # Make a list of Questions
        print("*** Creating Questions: ")
        translator = Translator()
        for par_obj in tqdm(parsed_objects):
            # Here we iterate over the chunks for each sentence and create a Question for each.
            whole_sentence = par_obj[0]
            sentence_tree_string = par_obj[2]

            sentence_object = Sentence.objects.get_or_create(
                sentence=whole_sentence,
                sentence_tree_string=sentence_tree_string)[0]
            for chunk in par_obj[1]:
                # check if suitable
                chunk_length = len(chunk.split(' '))
                if 4 <= chunk_length <= 8:
                    chunk_tree = parser.parse(chunk)[2]
                    chunk_translation = translator.get_translation(chunk)

                    question = Question.objects.get_or_create(
                        sentence=sentence_object,
                        chunk=chunk,
                        chunk_translation=chunk_translation,
                        chunk_tree_string=chunk_tree)[0]
                    question.question_tree_string = comp.remove_chunk_from_parse_tree(
                        question)
                    question.save()
Пример #2
0
 def _get_obj_to_save(self):
     """"""
     return StoresDB(
         store_id=self.store_id,
         store_name=self.store_name,
         url_prefix=Parser.parse_url_prefix_from_url(self.url_prefix),
         tag_name=self.tag_name,
         query=self.query,
         description=self.description,
     )
Пример #3
0
def get_features_pos(pk):

	parser = Parser()

	# get the card and tree string
	card = Card.objects.get(pk=pk)

	s = card.sentence.sentence
	tree = parser.parse(s)[2]


	# get a list of the tags in the parse tree
	x = re.findall(r'[A-HJ-Z]+', tree)

	# create a dict for this sentence
	dict = {}
	for tag in tags:
		dict[tag] = x.count(tag)

	# return an array of values for the dict 
	return [val for val in dict.values()]
Пример #4
0
    def __init__(self):
        # Get a logger
        self._log = Logger.get_instance()
        # Create the parser
        self._parser = Parser()
        # The topology
        self._topology = None
        # Factory loader. For each alternative, a new environment is loaded in accord with the alternative itself.
        self._loader = EnvironmentLoader()

        # ArgParse
        self._arg = argparse.ArgumentParser(description='Comparison Framework')
        self._arg.add_argument('-c',
                               '--config-file',
                               required=True,
                               help='The framework configuration file.')
        self._arg.add_argument('-t',
                               '--topology',
                               required=True,
                               help='The topology on which framework runs. It must be a GraphML file.')
Пример #5
0
 def does_item_belong_to_store(cls, store_id, item_url):
     """"""
     store = StoresDB.objects.filter(
         store_id=store_id).only('url_prefix').first()
     return Parser.parse_url_prefix_from_url(item_url) == store.url_prefix
Пример #6
0
class ComparisonFramework(object):
    def __init__(self):
        # Get a logger
        self._log = Logger.get_instance()
        # Create the parser
        self._parser = Parser()
        # The topology
        self._topology = None
        # Factory loader. For each alternative, a new environment is loaded in accord with the alternative itself.
        self._loader = EnvironmentLoader()

        # ArgParse
        self._arg = argparse.ArgumentParser(description='Comparison Framework')
        self._arg.add_argument('-c',
                               '--config-file',
                               required=True,
                               help='The framework configuration file.')
        self._arg.add_argument('-t',
                               '--topology',
                               required=True,
                               help='The topology on which framework runs. It must be a GraphML file.')

    def __repr__(self):
        return "Comparison Framework v. 0.1"

    '''
    Initialize the framework, namely take arguments, create the topology and parse the configuration file.
    '''
    def _init(self):
        args = self._arg.parse_args()
        config_file = str(args.config_file)
        topology = str(args.topology)
        topology_path = os.path.abspath(topology)
        self._log.info(self.__class__.__name__, 'Creating the topology.')
        self._topology = Topology(topology_path)

        # Parse config_file
        self._log.info(self.__class__.__name__, 'Parsing configuration file.')
        self._parser.parse(config_file)

    '''
    Run the framework
    '''
    def run(self):
        self._init()

        # Run simulation: for each service to evaluate, create a simulation and delegate to the loader objects
        # the decision about the environment to load based on the alternatives
        services = self._parser.get_services()

        for service in services:
            # For each alternative of this service, create a simulation
            for alternative in service.get_alternatives():
                '''
                Creating overlay and adding it to the topology object.
                '''
                self._log.info(self.__class__.__name__, 'Creating overlay for alternative %s.', alternative.get_name())
                # Creating the overlay for current alternative
                overlay = alternative.create_overlay(self._topology.get_topology_from_graphml())
                self._log.info(self.__class__.__name__, 'Adding overlay %s for alternative %s to the topology.',
                               overlay.get_name(), alternative.get_name())
                self._topology.add_overlay(overlay)

                '''
                Loading environment, creating the simulation and running it.
                '''
                self._log.info(self.__class__.__name__, 'Loading the environment for the alternative %s.', alternative)
                # Load an environment for the current alternative of this service
                environment = self._loader.load(alternative.get_environment())
                # Create the simulation
                simulation = Simulation(self._topology, service, environment, alternative)
                self._log.info(
                    self.__class__.__name__,
                    'A new simulation has been created for service %s and alternative %s.',
                    service.get_name(), alternative)
                # Run the simulation
                simulation.start()
                simulation.join()

            self._log.info(self.__class__.__name__, 'All alternatives for service %s have been successfully tested.',
                           service.get_name())

        self._log.info(self.__class__.__name__, 'All services have been successfully tested; framework will stop.')
Пример #7
0
 def _fetch_price(self):
     """The fetch_price method fetches a new price from the item's website"""
     return Parser.parse_price_from_url_page(self.url, self.store.tag_name,
                                             self.store.query)