示例#1
0
 def __init__(self,
              runtime_total: int = 10,
              runtime_between_slices: int = 10,
              runtime_forever: bool = False,
              out_directory: str = "./",
              track_keywords: list = custom_stopwords.main(),
              compression: bool = True):
     """ Creates an instance of the GenerateDataset class.
         Requires:
             - 'runtime_total' specifies the total runtime
                 of the data gathering.
             - 'runtime_between_slices' specifies how long
                 it will take between each save. If
                 this parameter = runtime_total, then only
                 one file will be created.
             - 'runtime_forever' will ignore 'runtime_total'
             - 'out_directory' is simply where the data will
                 be saved
             - 'track_keywords' is what the the Twitter API 
                 will send back. If this is left empty, then
                 only stopwords will be tracked.
             - 'compression'=True will compress all saved files.
     """
     self.runtime_total = runtime_total
     self.runtime_between_slices = runtime_between_slices
     self.runtime_forever = runtime_forever
     self.out_directory = out_directory
     self.track_keywords = track_keywords
     self.compression_enabled = compression
示例#2
0
 def clean_stopwords(content:str) -> str:
     """ This method removes stop-words from
         a string before returning it back.
     """
     content = content.split()
     filtered = [item for item in content
                 if not item in custom_stopwords.main()]
     return ' '.join(filtered)
def test(_path="../DataCollection/",
         _time_total=10,
         _time_between_slices=10,
         _track=None):
    run_forever = False
    if _track is None:
        _track = custom_stopwords.main()

    gen = Generate_Dataset(_time_total, _time_between_slices, run_forever,
                           _path, _track)
    gen.run_collector()

    print("terminated")
 def clean_stopwords(content:str) -> str:
     content = content.split()
     filtered = [item for item in content
                 if not item in custom_stopwords.main()]
     return ' '.join(filtered)
示例#5
0
#from packages.graphical.generate_wordcloud import write_csv
from packages.cleaning.custom_stopwords import main


#mods
import csv
import seaborn as sb



generator = gd(
    _runtime_total = 20, 
    _runtime_between_slices = 20, 
    _runtime_forever = False, 
    _out_directory = 'C:/Users\Erlend-PC/Documents/Coding/Noodle/TwitterNoodle-master/packages/', 
    _track_keywords = main())


generator.run_collector()


# // MERGE

# scale = DatasetScalingHelper(_verbosity = True)
# scale.set_dir_input('C:\\Users\\Joakim\\Desktop\\TwitterNoodle-master\\packages\\zIN')
# scale.set_dir_output('C:\\Users\\Joakim\\Desktop\\TwitterNoodle-master\\packages\\zOUT')
# scale.merge_datasets_by_directory()



# fp = 'C:\\Users\\Joakim\\Desktop\\TwitterNoodle-master\\200116-15_06_06--200116-15_06_11.csv'