def download(self):
     """
     Download Stanford Sentiment Treebank to data directory
     """
     path = Path(self.filename)
     path.parent.mkdir(parents=True, exist_ok=True)
     generic_download(
         url="https://s3.amazonaws.com/enso-data/SST-binary.csv",
         text_column="Text",
         target_column="Target",
         filename=SST_FILENAME)
Exemplo n.º 2
0
    def _download_data(cls):
        
        #Download Stanford Sentiment Treebank to data directory
        
        path = Path(cls.classifier_dataset_path)
        if path.exists():
            return

        path.parent.mkdir(parents=True, exist_ok=True)
        generic_download(
            url="https://s3.amazonaws.com/enso-data/SST-binary.csv",
            text_column="Text",
            target_column="Target",
            filename=SST_FILENAME
        )

        #Download Reuters Dataset to enso `data` directory
        
        path = Path(cls.sequence_dataset_path)
        if not path.exists():
            path.parent.mkdir(parents=True, exist_ok=True)

        if not os.path.exists(cls.sequence_dataset_path):
            url = "https://raw.githubusercontent.com/dice-group/n3-collection/master/reuters.xml"
            r = requests.get(url)
            with open(cls.sequence_dataset_path, "wb") as fp:
                fp.write(r.content)

        with codecs.open(cls.sequence_dataset_path, "r", "utf-8") as infile:
            soup = bs(infile, "html.parser")

        docs = []
        docs_labels = []
        for elem in soup.find_all("document"):
            texts = []
            labels = []

            # Loop through each child of the element under "textwithnamedentities"
            for c in elem.find("textwithnamedentities").children:
                if type(c) == Tag:
                    if c.name == "namedentityintext":
                        label = "Named Entity"  # part of a named entity
                    else:
                        label = "<PAD>"  # irrelevant word
                    texts.append(c.text)
                    labels.append(label)

            docs.append(texts)
            docs_labels.append(labels)


        with open(cls.processed_path, 'wt') as fp:
            json.dump((docs, docs_labels), fp)
 def download(self):
     """
     Download Stanford Sentiment Treebank to data directory
     """
     path = Path(self.filename)
     path.parent.mkdir(parents=True, exist_ok=True)
     generic_download(
         url="https://s3.amazonaws.com/enso-data/SST-binary.csv",
         text_column="Text",
         target_column="Target",
         filename=SST_FILENAME
     )
Exemplo n.º 4
0
 def download(self):
     """
     Download Stanford Sentiment Treebank to data directory
     """
     path = Path(self.filename)
     path.parent.mkdir(parents=True, exist_ok=True)
     generic_download(
         url=
         "https://www.figure-eight.com/wp-content/uploads/2016/03/McDonalds-Yelp-Sentiment-DFE.csv",
         text_column="review",
         target_column="policies_violated",
         filename=SST_FILENAME,
         target_transformation=target_transform)
Exemplo n.º 5
0
 def download(self):
     """
     Download Stanford Sentiment Treebank to data directory
     """
     path = Path(self.filename)
     path.parent.mkdir(parents=True, exist_ok=True)
     generic_download(
         url="https://www.figure-eight.com/wp-content/uploads/2016/03/McDonalds-Yelp-Sentiment-DFE.csv",
         text_column="review",
         target_column="policies_violated",
         filename=SST_FILENAME,
         target_transformation=target_transform
     )
Exemplo n.º 6
0
    def _download_sst(cls):
        """
        Download Stanford Sentiment Treebank to data directory
        """
        path = Path(cls.dataset_path)
        if path.exists():
            return

        path.parent.mkdir(parents=True, exist_ok=True)
        generic_download(
            url="https://s3.amazonaws.com/enso-data/SST-binary.csv",
            text_column="Text",
            target_column="Target",
            filename=SST_FILENAME)