def test_GetDueTodayTask(self):
        extractor = Extractor()
        result1 = extractor.extract("Apa saja deadline hari ini?",
                                    Context.getDueTodayTask)
        result2 = extractor.extract("Deadline tubes hari ini apa saja, ya?",
                                    Context.getDueTodayTask)
        result3 = extractor.extract("yang deadline pada hari ini",
                                    Context.getDueTodayTask)
        result4 = extractor.extract(
            "Bot, minta daftar deadline dong pada hari ini. Makasih :)",
            Context.getDueTodayTask)
        result5 = extractor.extract(
            "Untuk tucil, deadline pada hari ini apa saja?",
            Context.getDueTodayTask)
        result6 = extractor.extract(
            "Tubes yang deadline pada hari ini apa saja?",
            Context.getDueTodayTask)

        assert result1 != None
        assert result1.jenisTask == ""
        assert result2 != None
        assert result2.jenisTask == "tubes"
        assert result3 != None
        assert result3.jenisTask == ""
        assert result4 != None
        assert result4.jenisTask == ""
        assert result5 != None
        assert result5.jenisTask == "tucil"
        assert result6 != None
        assert result6.jenisTask == "tubes"
示例#2
0
    def process_all(self):
        try:
            print('process_all ... begin')

            extractor = Extractor(self.language)
            transformer = Transformer()
            extractor.connect()
            num_of_products = extractor.get_num_of_products()

            extractor.execute()
            num_of_rows = 10
            rows = extractor.get_next_batch(num_of_rows)
            pipeline = self.loader.create_pipeline()

            while len(rows) > 0:

                for product in rows:
                    print('\n {}'.format(product))

                    tf = transformer.get_tf(product)
                    print('len tf: {}'.format(len(tf)))
                    self.loader.insert_tf(product['id'], tf, pipeline)

                pipeline.execute()
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            self.loader.count_df()
            self.loader.count_tf_idf(num_of_products)

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
示例#3
0
 def __init__(self, localDownloadQueue="PendingDownloadQueue"):
     Base.__init__(self)
     self.download_queue = localDownloadQueue
     self.ftp_sync = FileSyncer()
     self.move_file_into_processing()
     Extractor(self.local_directory_to_sync)
     Cleaner(self.local_directory_to_sync)
示例#4
0
 def test_attributes2(self):
     """Test the setting of attributes.
     """
     e = Extractor('testing')
     self.assertEqual(e.name, 'testing')
     self.assertTrue(e.JsonRecorder and isinstance(e.JsonRecorder, JsonRecorder)) # The existence and type correctness of the json recorder.
     self.assertEqual(e.JsonRecorder.name, 'testing') # extractor.JsonRecorder has the same name as the extractor.
示例#5
0
async def main(args):
    extractor = Extractor('patterns.json')
    if args.file: source = URLGenerator.source(args.file)
    else: source = ClipboardReader.source
    visited = set()
    if args.destination: folder = args.destination
    else: folder = 'images/'
    if not os.path.isdir(folder): return

    count = 1
    async for target in source():
        if not extractor.input.match(target): continue
        content, url = await Downloader.downloadContent(target)
        if url in visited: break
        visited.add(url)

        for img in extractor.resource.finditer(content):
            if not img.group(): continue
            link = img.group()
            name = link.split('/')[-1] + '.png'
            await Downloader.downloadFile(link, os.path.join(folder, name))

            print('\033[K\033[A')
            print('Downloading ' + '.' * count, end='\r')
            count = (count + 1) % 5 + 1

    await waitTilFinish()
示例#6
0
    def __init__(self, password, image_folder_path):
        self.image_folder_path = image_folder_path
        self.mode = None
        self._gcm = None
        self._password = password
        self._data_length = 0

        self.extractor = Extractor(image_folder_path)
示例#7
0
    def __init__(self):

        # Init basic objects
        self.cropper = Cropper()
        self.extractor = Extractor(self.cropper)
        self.classifier = Classifier(self.extractor.images)
        self.connections = Connections(self.extractor, self.classifier)
        self.visualization = Visualization(self.connections)
示例#8
0
    def __init__(self, histogram):
        self.distribution = []
        self.generalization = []
        self.abstraction = []

        self.filter = Propagator()
        self.plotter = Estimator()
        self.detector = Extractor()
        self.plotter.setspace(histogram)
示例#9
0
    def OnExtract(self, events):
        text = self.sourcePage.GetValue().strip()
        keyword_result = ''
        result = ''
        if text != '':
            if self.languageType.GetSelection() == 1:
                sentences_percent = self.sentencesPercent.GetValue()
                similarity_function = self.similarityFunction.GetValue()
                print similarity_function
                extractor = Extractor(
                    stop_words_file='./TextRank/trainer/stopword_zh.data')
                keyword, keyphrase = extractor.keyword_train(text=text)
                abstract = extractor.sentence_train(
                    text,
                    sentences_percent=sentences_percent,
                    sim_func=similarity_function)

                keyword_result = '/'.join(keyword)
                keyword_result += '\n关键短语:\n' + '/'.join(keyphrase)
                result += '。'.join(abstract) + r'。'
                self.abstractPage.SetValue(result)
                #设置文本样式
                #f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True)  #创建一个字体
                #self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f))
                self.keywordPage.SetValue(keyword_result)
            else:
                art_type = self.articleType.GetSelection()
                extractor = EnExtractor(
                    stop_words_file='./TextRank/trainer/stopword_en.data')
                if art_type == 1:
                    keyphrase = extractor.keyphrase_train(
                        text, article_type='Abstract')
                    keyword_result = 'Keyphrases:\n' + '/'.join(keyphrase)
                else:
                    sentences_percent = self.sentencesPercent.GetValue()
                    similarity_function = self.similarityFunction.GetValue()
                    keyphrase = extractor.keyphrase_train(
                        text, article_type='Fulltext')
                    summary = extractor.summary_train(
                        text,
                        sentences_percent=sentences_percent,
                        sim_func=similarity_function)
                    keyword_result = '/'.join(keyphrase)
                    result += '   ' + ' '.join(summary)
                self.abstractPage.SetValue(result)
                #设置文本样式
                f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True)  #创建一个字体
                self.abstractPage.SetStyle(
                    0, len(result), wx.TextAttr('black', wx.NullColor, f))

                self.keywordPage.SetValue(keyword_result)
        else:
            #test
            #sentences_percent = self.sentencesPercent.GetValue()
            #print filter(lambda x:x.isdigit(), sentences_percent)
            print "No article"
示例#10
0
def c_07():
    ''' get number of products
    '''
    try:
        extractor = Extractor(language='es')
        extractor.connect()
        n = extractor.get_num_of_products()
        print(n)
    except Exception as e:
        print('c_06(), error: {}'.format(e))
示例#11
0
 def __init__(self,trainDataPath=None,
              extractorOptions=[True,False,True],loadPath=None):
     if not loadPath==None:
         f=open(loadPath,"rb")
         self.gpc,self.ext=pickle.load(f)
         f.close()
     elif not (trainDataPath==None or extractorOptions==[True,False,True]):
         self.ext=Extractor(extractorOptions)
         data=ext.readTsv(trainDataPath)
         features=self.ext.features(data[0])
         labels=data[1]
         self.gpc=GaussianProcessClassifier().fit(features,labels)
     else:
         raise Exception("Either path to saved classifier or (dataset+extractor options) should be given")
 def extraer_rep(argumentos, pipe):
     url = argumentos['url']
     url = url.split('/')
     if url[2] != 'gitlab.com':
         pipe.send(400)
         return 1
     url = url[3] + '/' + url[4]
     try:
         if 'token' in argumentos.keys():
             ext = Extractor(link=url, token=argumentos['token'])
         else:
             ext = Extractor(url)
         p = ext.extraer()
         Almacen.guardar(p)
         pipe.send(200)
     except Exception as e:
         ServidorLogica.log(str(e))
         if str(e) == 'Proyecto no encontrado':
             pipe.send(404)
             return 1
         if str(e) == 'Permisos insuficientes':
             pipe.send(401)
             return 1
         pipe.send(e)
示例#13
0
def run(file_path):
    dis = Dissector()
    parser = Parser()
    extrator = Extractor()
    if os.path.isdir(file_path):
        dir_files_list = os.listdir(file_path)
        for files in dir_files_list:
            sample = Sample(os.path.join(file_path, files))
            dis.extract_file(sample)
            parser.parse(sample)
            extrator.extract(sample)
            sample.print_info()

    else:
        sample = Sample(file_path)
        dis.extract_file(sample)
        parser.parse(sample)
        extrator.extract(sample)
        sample.print_info()
示例#14
0
def hello2():
    user_input = request.form["user-input"]
    print("\"{}\"".format(user_input))
    context_identifier = ContextIdentifier()
    context = context_identifier.getContext(user_input)
    bot_response = ""
    suggested_word = []
    
    if context == Context.unknown:
        print("Unknown?")
        suggested_word = SpellChecker().getWordSuggestion(user_input)

    elif context == Context.help:
        bot_response = "Terdapat beberapa hal yang dapat dilakukan:\n"
        bot_response += "- Menambah tugas (coba \"Tolong ingatkan kalau ada kuis IF3110 Bab 2 pada 22/04/21\")\n"
        bot_response += "- Melihat semua tugas (coba \"bot tugas apa saja sejauh ini ya?\")\n"
        bot_response += "-. Melihat tugas pada periode tertentu (coba \"Apa saja deadline antara 03/04/2021 sampai 15/04/2021\")\n"
        bot_response += "- Melihat tugas beberapa hari/minggu ke depan (coba \"Ada tugas apa saja 2 hari ke depan\")\n"
        bot_response += "- Melihat tugas yang deadline-nya hari ini (coba \"Deadline tucil hari ini apa saja, ya?\")\n"
        bot_response += "- Menampilkan deadline dari suatu tugas tertentu (coba \"Deadline tucil IF2230 itu kapan?\")\n"
        bot_response += "- Memperbarui tugas (coba \"Deadline tucil IF2230 diundur menjadi 02/02/2021\")\n"
        bot_response += "- Menghapus/menyelesaikan tugas (coba \"bot ujian IF2230 sudah selesai ya jadi gausah disimpan lagi\")\n"
        bot_response += "Kata kunci:\n" + "\n".join(list(map(lambda x: "- " + x, ["kuis", "tubes", "tucil", "ujian"])))
    
    else:
        extractor = Extractor()
        print("\"{}\"".format(user_input))
        command = extractor.extract(user_input, context)
        
        if command == None:
            suggested_word = SpellChecker().getWordSuggestion(user_input)
        else:
            command.execute()
            bot_response = command.getResult()
            
    if bot_response == "":
        if len(suggested_word) > 0:
            bot_response = "Mungkin maksud kata kunci Anda: " + ", ".join(suggested_word)
        else:
            bot_response = "Saya tidak paham .-."
            
    chat_data.append((user_input, bot_response.split("\n")))
    return render_template("index.html", message_data = chat_data[(-5 if len(chat_data) >= 5 else 0):])
示例#15
0
def process_sample(file_path):
    print file_path
    config = Config()
    load_sucess = config.load_config()
    if load_sucess:
        dis = Dissector()
        parser = Parser()
        extrator = Extractor()
        operator = Operator(config)
        r_generator = Report_Generator()
        sample = Sample(file_path)
        rlt = dis.extract_file(sample, config.get_output_dir())
        bin_time_list = list()
        if rlt:
            parser.parse(sample)
            extrator.extract(sample)
            # config.print_info()
            operator.operate(sample, config)
            r_generator.write_report(sample)

    return sample
示例#16
0
    def process_all(self):
        '''This method Extract all the rows SELECTed from the table (mySQL product_translation),
        Transform, and  Load to Redis. This is used for Kill and Fill'''

        try:
            extractor = Extractor()
            #transformer= Transformer( self.params )
            extractor.connect()
            num_of_rows = 20
            extractor.execute()
            rows = extractor.get_next_batch(num_of_rows)
            transformer = Transformer()
            courier = CourierClicoh()
            loader = LoaderCsv()

            while len(rows) > 0:
                products = []
                for row in rows:
                    print('id : {}'.format(row['id']))
                    j = courier.add_product(row)

                    product = transformer.get_csv_row(j, row)
                    '''d = {
                         'id'                   : row[ 'id'                 ], 
                         'sku'                  : row[ 'sku'                ], 
                         'clicoh_id'            : "row[ 'clicoh_id'          ]", 
                         'clicoh_variant_id'    : "row[ 'clicoh_variant_id'  ]",
                    }'''
                    products.append(product)

                loader.write_rows(products)
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            print('\n ETL.procell_all() ... end')

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
            raise
示例#17
0
def c_01():
    '''get batches of rows from mySQL database. '''
    try:

        e = Extractor()
        e.connect()
        e.execute()
        rows = e.get_next_batch(num_of_rows=2)

        while len(rows) > 0:

            print('\n ut_01.c_01(), looping BATCH of rows')

            for r in rows:
                print('\n {}'.format(r))

            rows = e.get_next_batch(num_of_rows=2)

        e.close()

    except Exception as e:
        print('ut_01.c_01(), error: {}'.format(e))

    print('\n end of case 1.')
    def test_AddTask(self):
        extractor = Extractor()
        # Normal
        result1 = extractor.extract(
            "Halo bot, tolong ingetin kalau ada kuis IF3110 Bab 2 sampai 3 pada 22/04/21",
            Context.addTask)
        assert result1 != None
        assert result1.matkul == "IF3110"
        assert result1.jenis == "kuis"
        assert result1.deskripsi == "Bab 2 sampai 3"
        assert result1.tahun == 2021
        assert result1.bulan == 4
        assert result1.tanggal == 22

        # Normal dengan tanggal yang berbeda format, UAS adalah ujian
        result2 = extractor.extract(
            "Ingatkan saya ada UAS IF2230 pada 20 Mei 2021. Saya sedang chaos nih. :(",
            Context.addTask)
        assert result2 != None
        assert result2.matkul == "IF2230"
        assert result2.jenis == "ujian"
        assert result2.deskripsi == "UAS"
        assert result2.tahun == 2021
        assert result2.bulan == 5
        assert result2.tanggal == 20

        # Tahun yang sama secara implisit, UTS adalah ujian
        result3 = extractor.extract(
            "Beritahukan saya tentang UTS IF2250 pada 1 Januari",
            Context.addTask)
        assert result3 != None
        assert result3.matkul == "IF2250"
        assert result3.jenis == "ujian"
        assert result3.deskripsi == "UTS"
        assert result3.tahun == datetime.now().year
        assert result3.bulan == 1
        assert result3.tanggal == 1

        # Tanggal tidak diawali dengan kata pada
        result4 = extractor.extract(
            "saya ingin menambahkan tucil IF2220 tentang String Matching yang deadline-nya sudah dekat: 28 April",
            Context.addTask)
        assert result4 != None
        assert result4.matkul == "IF2220"
        assert result4.jenis == "tucil"
        assert result4.deskripsi == "String Matching"
        assert result4.tahun == datetime.now().year
        assert result4.bulan == 4
        assert result4.tanggal == 28

        # Tidak ada tanggal (invalid)
        result7 = extractor.extract(
            "Ada tubes IF2210 tentang Worms. Ingatkan.", Context.addTask)
        assert result7 == None

        # Tidak ada matkul (invalid)
        result8 = extractor.extract(
            "Ada tucil tentang objek. Deadline 7 September. Ingatkan.",
            Context.addTask)
        assert result8 == None

        # Tidak ada jenis tugas (invalid)
        result9 = extractor.extract(
            "Ingatkan tentang IF2211 tentang BFS dan DFS. Deadline 5 Desember.",
            Context.addTask)
        assert result9 == None
示例#19
0
def extract(item):
    date, text = item
    extractor = Extractor(text=text, max_len=max_len)
    words = extractor.extract_words(thresh=thresh)
    words['date'] = date
    return words, date
示例#20
0
        print("confusion matrix:\nP\\R\tY\tN\nY\t{0}\t{1}\nN\t{2}\t{3}".format(tp,fp,fn,tn))

    def showFeatures(self,dataPath=None,data=None):
        if not dataPath ==None:
            data=self.ext.readTsv(dataPath)
            features=self.ext.features(data[0],True)
        elif not data==None:
            features=self.ext.features(data,True)
        else:
            raise Exception("no data available")
        for i in features:
            print(i)
        
    
if __name__=="__main__":
    ext=Extractor()
    if(input("Are you here to evaluate?\n>>> ").lower()=="y"):
        itemsList=[f for f in listdir("data/") if f[-4:]==".pkl"]
        for item in range(len(itemsList)):
            print("{0}: {1}".format(item,itemsList[item]))
        clfPath=int(input("Select your classifier PICKLE file\n>>> "))
        clf=Classifier(loadPath="data/"+itemsList[clfPath])
        dataList=[f for f in listdir("data/") if f[-4:]==".tsv"]
        for datum in range(len(dataList)):
            print("{0}: {1}".format(datum,dataList[datum]))
        dataPath=int(input("Select your evaluation data\n>>> "))
        targetLabels=[l for l in input("target labels\n>>> ")]
        clf.evaluate("data/"+dataList[dataPath],targetLabels)
        _exit(0)
    opStr=input("Input your option string\nex)'fttt'\n>>> ")
    options=[]
示例#21
0
    options['baseUrl'] = 'http://supervisor/core/api/states/'
except:
    logging.warning("Couldn't get token from Enviroment assuming this is dev")
    optionsFile = 'local.json'

with open(optionsFile) as json_file:
    options.update(loadJson(json_file))

logging.info("Got {} for database".format(options['db_name']))

client = InfluxDBClient(host=options['db_ip'],
                        port=options['db_port'],
                        username=options['db_user'],
                        password=options['db_pass'])
extractor = Extractor(options['baseUrl'], options['sensorPrefix'],
                      options[TOKENKEY], options['Timezone'],
                      options['db_measurement_name'])
Einf = Einfluxer(client, options['db_name'])

message = ""

try:
    data = extractor.GetMeasurements()
except ValueError:
    message = "Got ValueError when fetching data from Home assistant, The sensor probably haven't fetched data yet."

if message == "":
    if not data[0]['tags']['Metering date'] == Einf.GetLatestMeterDate():
        message = "Inserted data for: {}".format(
            data[0]['tags']['Metering date'])
        try:
示例#22
0
 def __init__(self, prop):
     self.prop = prop
     self.extractor = Extractor(prop)
     self.transformer = Transformation(prop)
     self.loader = Loader(prop)
示例#23
0
                    X_test,
                    y_test,
                    file,
                    generation=20,
                    scale=20,
                    conjunction=False,
                    maxsat_on=True,
                    tailor=False,
                    fitness_func='Pro')
    param = m.pso()
    phi = param[0]
    theta = param[1]
    psi = param[2]
    k = param[3]

    ex = Extractor(clf, phi, theta, psi)
    ex.extract_forest_paths()
    ex.rule_filter()
    print('max_rule', ex.max_rule, 'max_node', ex.max_node)
    print("original path number: ", ex.n_original_leaves_num)
    print('original scale: ', ex.scale)
    print("original path number after rule filter: ", len(ex._forest_values))

    sat = Z3Process(ex, k)
    sat.leaves_partition()
    sat.maxsat()
    sat.run_filter()

    print("original path number after maxsat: ", sat.n_rules_after_max,
          " after filter: ", sat.n_rules_after_filter, '\n')
    print('classes:', clf.classes_)
示例#24
0
from Extractor import Extractor

inputfiles = [
    "sample1.rtf", "sample2.rtf", "sample3.rtf", "sample4.rtf", "sample5.rtf",
    "sample6.rtf"
]
result = Extractor(inputfiles)
ans = result.getalldiagnosis()
new = []
for i, j in ans.items():
    new += j
for i in new:
    print(i)
from InfoToTextFile import RecordEntry

excelFile = "FinalResult.xlsx"
mailingListFile = "Mailing/MailingList.csv"

# list of emails, to be written to a text file
mail_list = []

# now to action!

# create record writer instance

recWriter = RecordEntry()

# extract mail
extractor = Extractor(workBookPath=excelFile)
mail_list = extractor.extractAllEmailAddress()

# write to file
recWriter.writeRecords(text_file=mailingListFile, records=mail_list)

# get team records by name and write mail lists or full info
# writing mailing lists here
teamNames = [
    "Corporate", "Operations", "Publications", "Promotions", "Logistics"
]
stats = {}

for teamName in teamNames:
    recordsFile = "Mailing/{}.csv".format(teamName)
    records = extractor.extractRecordByTeam(teamName=teamName)
示例#26
0
def main(args: str) -> None:
    url: str = __check_error_input(args)
    print("Extracting subjects...")
    Extractor(WebScraping(url).extract_subjects()).extract_data_frame().to_csv(
        "./Dati/subjects.csv", index=False)
    print("DONE!\n")
示例#27
0
from Extractor import Extractor
from Granulator import Granulator
from Agent import Agent
from Metric import Metric
from Representative import Representative
from Clustering_MBSAS import Clustering_MBSAS
from Clustering_K_Means import Clustering_K_Means

extractor1 = Extractor()

obj_clustering_MBSAS = Clustering_MBSAS(3, 0.2, 0.1, 1.1) # Lambda, theta_start ,theta_step, theta_stop
agent1 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_MBSAS)
agent1.execute(3.1,0.5) # S_T, eta

obj_clustering_K_Means = Clustering_K_Means(1,3) #k, k_max
agent2 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_K_Means)
agent2.execute(3.1,0.5) # S_T,  eta
示例#28
0
    def __init__(self, path, viewer=None, green_screen=False):
        """ Extract informations of pieces in the img at `path` and start computation of the solution """
        green_screen = True
        self.pieces_ = None
        factor = 0.40
        while self.pieces_ is None:
            factor += 0.01
            self.extract = Extractor(path, viewer, green_screen, factor)
            self.pieces_ = self.extract.extract()

        self.viewer = viewer
        self.green_ = green_screen
        self.connected_directions = []
        self.diff = {}
        self.edge_to_piece = {}

        for p in self.pieces_:
            for e in p.edges_:
                self.edge_to_piece[e] = p

        self.extremum = (-1, -1, 1, 1)
        self.log('>>> START solving puzzle')

        border_pieces = []
        non_border_pieces = []
        connected_pieces = []
        # Separate border pieces from the other
        for piece in self.pieces_:
            if piece.number_of_border():
                border_pieces.append(piece)
            else:
                non_border_pieces.append(piece)

        self.possible_dim = self.compute_possible_size(len(self.pieces_),
                                                       len(border_pieces))

        # Start by a corner piece
        for piece in border_pieces:
            if piece.number_of_border() > 1:
                connected_pieces = [piece]
                border_pieces.remove(piece)
                break
        self.log("Number of border pieces: ", len(border_pieces) + 1)

        self.export_pieces('/tmp/stick{0:03d}'.format(1) + ".png",
                           '/tmp/colored{0:03d}'.format(1) + ".png",
                           'Border types'.format(1),
                           'Step {0:03d}'.format(1),
                           display_border=True)

        self.log('>>> START solve border')
        start_piece = connected_pieces[0]
        self.corner_pos = [((0, 0), start_piece)]  # we start with a corner

        for i in range(4):
            if start_piece.edge_in_direction(
                    Directions.S).connected and start_piece.edge_in_direction(
                        Directions.W).connected:
                break
            start_piece.rotate_edges(1)

        self.extremum = (0, 0, 1, 1)

        self.strategy = Strategy.BORDER
        connected_pieces = self.solve(connected_pieces, border_pieces)
        self.log('>>> START solve middle')
        self.strategy = Strategy.FILL
        self.solve(connected_pieces, non_border_pieces)

        self.log('>>> SAVING result...')
        self.translate_puzzle()
        self.export_pieces("/tmp/stick.png",
                           "./images/output/solved.png",
                           display=True)
示例#29
0
                    required=False,
                    default=False,
                    type=bool,
                    dest='preprocess')

if __name__ == '__main__':
    tic = time()
    args = parser.parse_args()
    rfpath = join(RFDIR, args.fname)
    print(args.preprocess, args.count)
    if not args.preprocess:
        try:
            text = open(rfpath, "r").readlines()
        except:
            text = open(rfpath, "r", encoding="utf-8").readlines()
        text = [line.strip() for line in text]
        extracter = Extractor(text=text, max_len=args.ngram)
    else:
        extracter = Extractor(rfpath=rfpath, max_len=args.ngram)
    words = extracter.extract_words(score_thresh=args.thresh,
                                    cnt_thresh=args.count)
    if args.save:
        if args.oname:
            opath = join(WFDIR, args.oname)
            words.to_csv(opath, encoding="utf_8_sig", index=False, sep='\t')
        else:
            opath = join(WFDIR, args.fname)
            words.to_csv(opath, encoding="utf_8_sig", index=False, sep='\t')
    print(words)
    toc = time()
    print("Total time: %.2fs" % (toc - tic))
示例#30
0
from Crawler import Crawler
from Extractor import Extractor
from Loader import Loader
import sys

url = sys.argv[1]
csv_file = sys.argv[2]
data_base = sys.argv[3]

cr = Crawler(url)
rates = cr.get_response()

ex = Extractor(rates, csv_file)
ex.extraction()

ld = Loader('rates.csv', data_base)
ld.save_to_db()