Exemplo n.º 1
0
 def __init__(self, response, url):
     self.response = response
     self.url = url
     self.processors = Processors()
     self.brands = Brands()
     self.memory = Memory()
     self.storages = Storages()
Exemplo n.º 2
0
class DataExtractor():
    def __init__(self, response, url):
        self.response = response
        self.url = url
        self.processors = Processors()
        self.brands = Brands()
        self.memory = Memory()
        self.storages = Storages()

    #{ _id, available, brand, color, display_feature, display_size, graphics_processor_name, graphics_processor, name, operating_system, price, processor, ram_memory, sku, screen_resolution, storage, storage_type, url, img_url}

    # TODO: display_feature, display_size, graphics_processor_name, graphics_processor, operating_system, screen_resolution, storage_type, img_url

    def parse(self):
        data = {}

        # produtos das casas bahia
        data['store'] = "casas_bahia"

        # nome do produto
        data['name'] = self.response.findAll("b", {"itemprop": "name"})
        data['name'] = self.validate_field(data, 'name')

        # url como variavel global da classe
        data['url'] = self.url

        # preco do produto
        data['price'] = self.response.findAll("i", {"class": "sale price"})
        data['price'] = self.normalize_price(data['price'])

        # disponibilidade: nas casas bahia, se o produto possuir preco, o produto esta disponivel
        data['available'] = data['price'] != None and data['price'] != 0.0

        data['img_url'] = self.response.findAll('img', {'itemprop': 'image'})
        data['img_url'] = self.normalize_img_url(data['img_url'])

        # processador
        data['processor'] = self.response.findAll("", {"class": "Processador"})
        data['processor'] = self.normalize_processor(
            self.validate_field(data, 'processor'))

        # marca
        data['brand'] = self.normalize_brand(data['name'])

        # memoria ram
        data['ram_memory'] = self.response.findAll("dl",
                                                   {"class": "Memoria-RAM"})
        data['ram_memory'] = self.normalize_memory(
            self.validate_field(data, 'ram_memory'))

        # sku para identificacao
        data['sku'] = self.url.split('?')[0].split('-')[-1].split('.')[0]

        # armazenamento (SSD/HD)
        hd = self.response.findAll(
            "dl", {"class": ["Disco-rigido--HD-", "Memoria-Flash--SSD-"]})
        data['storage'] = self.normalize_storage(hd)

        # tamanho de tela
        data['display_size'] = self.response.findAll(
            "dl", {"class": "Tamanho-da-tela"})
        data['display_size'] = data['display_size'][0].find(
            'dd').get_text().strip() if (len(data["display_size"]) > 0) else ""

        return data

    def validate_field(self, data, field):
        return (data[field][0].get_text().strip() if
                (len(data[field]) > 0) else "")

    def normalize_img_url(self, img_url):
        return img_url[0]['src'] if (len(img_url) > 0) else None

    def normalize_storage(self, hd):
        if (len(hd) > 0):
            hd = hd[0].find('dd').get_text()

        result = ''
        if hd != None and len(hd) > 0:
            result = re.search('\d+.+[TG]B', hd)
            if result != None:
                result = self.get_storage_capacity(result.group())

        return result

    def normalize_memory(self, raw_data):
        if (re.search('16GB|16 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_16GB()
        elif (re.search('12GB|12 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_12GB()
        elif (re.search('14GB|14 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_14GB()
        elif (re.search('10GB|10 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_10GB()
        elif (re.search('8GB|8 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_8GB()
        elif (re.search('6GB|6 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_6GB()
        elif (re.search('4GB|4 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_4GB()
        elif (re.search('2GB|2 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_2GB()
        elif (re.search('1GB|1 GB', raw_data, re.IGNORECASE) != None):
            return self.memory.get_1GB()

    # transforma 1.000,00 em 1000.00
    def normalize_price(self, raw_data):
        try:
            raw_data = raw_data[0].get_text() if (len(raw_data) > 0) else ""
            raw_data = raw_data.replace('.', '').replace(',', '.')
            return float(raw_data)
        except ValueError:
            return 0.0

    def normalize_brand(self, raw_data):
        if (re.search('dell', raw_data, re.IGNORECASE) != None):
            return self.brands.get_dell()
        elif (re.search('asus', raw_data, re.IGNORECASE) != None):
            return self.brands.get_asus()
        elif (re.search('apple', raw_data, re.IGNORECASE) != None):
            return self.brands.get_apple()
        elif (re.search('acer', raw_data, re.IGNORECASE) != None):
            return self.brands.get_acer()
        elif (re.search('samsung', raw_data, re.IGNORECASE) != None):
            return self.brands.get_samsung()
        elif (re.search('positivo', raw_data, re.IGNORECASE) != None):
            return self.brands.get_positivo()
        elif (re.search('lenovo', raw_data, re.IGNORECASE) != None):
            return self.brands.get_lenovo()
        elif (re.search('lg', raw_data, re.IGNORECASE) != None):
            return self.brands.get_lg()
        elif (re.search('hp', raw_data, re.IGNORECASE) != None):
            return self.brands.get_hp()
        elif (re.search('sony', raw_data, re.IGNORECASE) != None):
            return self.brands.get_sony()

    # ['Intel Core i3', 'Intel Core i5', 'Intel Core i7', 'Intem Pentium Quad Core', 'Intel Baytrail', 'AMD Dual Core', 'Item Atom', 'Intel Core M', 'Intel Celeron']
    def normalize_processor(self, raw_data):

        # remove erros de enconding (ex: \u84d2)
        raw_data = re.sub('\\\u\w\w\w\w', '', raw_data)

        if (re.search("i3", raw_data, re.IGNORECASE) != None):
            return self.processors.get_i3()

        elif (re.search("i5", raw_data, re.IGNORECASE) != None):
            return self.processors.get_i5()

        elif (re.search("i7", raw_data, re.IGNORECASE) != None):
            return self.processors.get_i7()

        elif (re.search("Pentium", raw_data, re.IGNORECASE) != None):
            return self.processors.get_pentium_quad()

        elif (re.search("byt|baytrail", raw_data, re.IGNORECASE) != None):
            return self.processors.get_baytrail()

        elif (re.search('Intel.+[Dd]ual [Cc]ore', raw_data, re.IGNORECASE) !=
              None):
            return self.processors.get_intel_dual()

        elif (re.search('Intel.+[Qq]uad [Cc]ore', raw_data, re.IGNORECASE) !=
              None):
            return self.processors.get_intel_quad()

        elif (re.search("amd.+dual core", raw_data, re.IGNORECASE) != None):
            return self.processors.get_amd_dual()

        elif (re.search("atom", raw_data, re.IGNORECASE) != None):
            return self.processors.get_atom()

        elif (re.search("Intel.+Core.+M", raw_data, re.IGNORECASE) != None):
            return self.processors.get_core_m()

        elif (re.search("Celeron", raw_data, re.IGNORECASE) != None):
            return self.processors.get_celeron()

        elif (re.search("arm", raw_data, re.IGNORECASE) != None):
            return self.processors.get_arm_a9()

        elif (re.search("samsung", raw_data, re.IGNORECASE) != None):
            return self.processors.get_samsung()

    # normalização de capacidade
    def get_storage_capacity(self, raw_data):
        if (re.search('2TB|2 TB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_2tb()

        elif (re.search('1TB|1 TB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_1tb()

        elif (re.search('750 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_750()

        elif (re.search('640 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_640()

        elif (re.search('500 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_500()

        elif (re.search('320GB|320 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_320()

        elif (re.search('256GB|256 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_256()

        elif (re.search('160GB|160 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_160()

        elif (re.search('128GB|128 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_128()

        elif (re.search('80GB|80 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_80()

        elif (re.search('64GB|64 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_64()

        elif (re.search('32GB|32 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_32()

        elif (re.search('16GB|16 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_16()
Exemplo n.º 3
0
class DataExtractor:
    def __init__(self, response, url):
        self.response = response
        self.url = url
        self.processors = Processors()
        self.brands = Brands()
        self.memory = Memory()
        self.storages = Storages()

    def parse(self):

        data = {}

        r = self.response

        # produtos da shoptime
        data['store'] = 'shoptime'

        # nome do produto
        try:
            data['name'] = r.find('h1', {'id': 'main-product-name'})
            data['name'] = self.validate_field(data, 'name')
            data['name'] = re.sub(re.compile(u' \(Cód\. ([0-9])+\)'), '',
                                  data['name'])
        except (ValueError, TypeError, AttributeError):
            data['name'] = ''

            # url como variavel global da classe
        data['url'] = self.url

        # preço do produto
        try:
            data['price'] = r.find(
                'span', {'data-partner-value': True})['data-partner-value']
            data['price'] = self.normalize_price(data['price'])
        except (ValueError, TypeError, AttributeError):
            data['price'] = 0.0

        # disponibilidade
        try:
            data['available'] = self.set_available(r, data['price'])
        except (ValueError, TypeError, AttributeError):
            data['available'] = False

        try:
            # processador
            data['processor'] = r.find(
                'th', text=re.compile(r'Processador')).parent.find('td').text
            data['processor'] = self.normalize_processor(data['processor'])
        except (ValueError, TypeError, AttributeError):
            data['processor'] = ''

        # marca
        try:
            data['brand'] = r.find(
                'th', text=re.compile(r'Marca')).parent.find('td').text
            data['brand'] = (self.normalize_brand(data['name'])).strip()
        except (ValueError, TypeError, AttributeError):
            data['brand'] = ''

            # memória ram
        try:
            data['ram_memory'] = r.find(
                'th', text=re.compile(u'Memória RAM')).parent.find('td').text
        except (ValueError, TypeError, AttributeError):
            data['ram_memory'] = ''

            # sku para identificação
        try:
            data['sku'] = r.find('div', {'data-sku': True})['data-sku']
        except (ValueError, TypeError, AttributeError):
            data['sku'] = ''

        # armazenamento (SSD/HD)
        try:

            try:
                hd = r.find('th',
                            text=re.compile(r'HD')).parent.find('td').text
            except (ValueError, TypeError, AttributeError):
                hd = ''

            try:
                ssd = r.find('th',
                             text=re.compile(r'SSD')).parent.find('td').text
            except (ValueError, TypeError, AttributeError):
                ssd = ''

            data['storage'] = self.normalize_storage(hd, ssd)
        except (ValueError, TypeError, AttributeError):
            data['storage'] = self.normalize_storage('', '')

        # tamanho da tela
        try:
            data['display_size'] = self.normalize_display_size(
                r.find('th', text=re.compile(
                    r'Polegadas da Tela')).parent.find('td').text)
        except (ValueError, TypeError, AttributeError):
            data['display_size'] = ''

        try:
            data['img_url'] = (r.findAll(
                'img', {'class': 'p-image'})[0]['src']).strip()
        except (ValueError, TypeError, AttributeError, IndexError):
            data['img_url'] = ''

        return data

    def set_available(self, response, price):
        meta = response.find('div', {'class': 'product-unavailable'})
        if (meta == None) and (price != 0.0):
            return True
        else:
            return False

    def validate_field(self, data, field):
        if data[field] != None:
            return (data[field].get_text().strip() if
                    (len(data[field]) > 0) else '')

    def normalize_display_size(self, text):
        if text != None and len(text) > 0:
            return (text.strip(" \"").replace(",", ".") + "\"").strip()
        else:
            return ''

    def normalize_storage(self, hd, ssd):
        result = ''
        if hd != None and len(hd) > 0:
            result = re.search('\d+.+[TG]B', hd)
            if result != None:
                return self.get_storage_capacity(result.group())

        if ssd != None and len(ssd) > 0:
            result = re.search('\d+.+[TG]B', ssd)
            if result != None:
                return self.get_storage_capacity(result.group())

    def normalize_memory(self, raw_data):
        if re.search('16', raw_data, re.IGNORECASE) != None:
            return self.memory.get_16GB()
        elif re.search('12', raw_data, re.IGNORECASE) != None:
            return self.memory.get_12GB()
        elif re.search('14', raw_data, re.IGNORECASE) != None:
            return self.memory.get_14GB()
        elif re.search('10', raw_data, re.IGNORECASE) != None:
            return self.memory.get_10GB()
        elif re.search('8', raw_data, re.IGNORECASE) != None:
            return self.memory.get_8GB()
        elif re.search('6', raw_data, re.IGNORECASE) != None:
            return self.memory.get_6GB()
        elif re.search('4', raw_data, re.IGNORECASE) != None:
            return self.memory.get_4GB()
        elif re.search('2', raw_data, re.IGNORECASE) != None:
            return self.memory.get_2GB()
        elif re.search('1', raw_data, re.IGNORECASE) != None:
            return self.memory.get_1GB()

    def normalize_price(self, raw_data):
        try:  # transforma 1.000, 00 em 1000.00
            raw_data = (raw_data if len(raw_data) > 0 else '')
            price = float(raw_data)
            price = round(price, 2)
            return price
        except ValueError:
            return 0.0

    def normalize_brand(
        self, raw_data
    ):  # ["Samsung", "Asus", "Acer", "Dell", "Apple", "Positivo", "LG", "Lenovo"]

        if re.search('dell', raw_data, re.IGNORECASE) != None:
            return self.brands.get_dell()
        elif re.search('asus', raw_data, re.IGNORECASE) != None:
            return self.brands.get_asus()
        elif re.search('apple', raw_data, re.IGNORECASE) != None:
            return self.brands.get_apple()
        elif re.search('acer', raw_data, re.IGNORECASE) != None:
            return self.brands.get_acer()
        elif re.search('samsung', raw_data, re.IGNORECASE) != None:
            return self.brands.get_samsung()
        elif re.search('positivo', raw_data, re.IGNORECASE) != None:
            return self.brands.get_positivo()
        elif re.search('lenovo', raw_data, re.IGNORECASE) != None:
            return self.brands.get_lenovo()
        elif re.search('lg', raw_data, re.IGNORECASE) != None:
            return self.brands.get_lg()
        elif re.search('compaq', raw_data, re.IGNORECASE) != None:
            return self.brands.get_compaq()
        elif re.search('seagate', raw_data, re.IGNORECASE) != None:
            return self.brands.get_seagate()
        elif re.search('gigabyte', raw_data, re.IGNORECASE) != None:
            return self.brands.get_gigabyte()
        elif (re.search('hp', raw_data, re.IGNORECASE) != None):
            return self.brands.get_hp()
        elif (re.search('sony', raw_data, re.IGNORECASE) != None):
            return self.brands.get_sony()

    def normalize_processor(self, raw_data):
        raw_data = re.sub('\\\u\w\w\w\w', '', raw_data)

        if re.search('i3', raw_data, re.IGNORECASE) != None:
            return self.processors.get_i3()
        elif re.search('i5', raw_data, re.IGNORECASE) != None:

            return self.processors.get_i5()
        elif re.search('i7', raw_data, re.IGNORECASE) != None:

            return self.processors.get_i7()
        elif re.search('Pentium', raw_data, re.IGNORECASE) != None:

            return self.processors.get_pentium_quad()
        elif re.search('byt|baytrail', raw_data, re.IGNORECASE) != None:

            return self.processors.get_baytrail()
        elif re.search('amd.+dual core', raw_data, re.IGNORECASE) \
                != None:

            return self.processors.get_amd_dual()
        elif re.search('atom', raw_data, re.IGNORECASE) != None:

            return self.processors.get_atom()
        elif re.search('Intel.+Core.+M', raw_data, re.IGNORECASE) \
                != None:

            return self.processors.get_core_m()
        elif re.search('Celeron', raw_data, re.IGNORECASE) != None:

            return self.processors.get_celeron()
        elif re.search('arm', raw_data, re.IGNORECASE) != None:

            return self.processors.get_arm_a9()
        elif re.search('samsung', raw_data, re.IGNORECASE) != None:

            return self.processors.get_samsung()

    # normalização de capacidade
    def get_storage_capacity(self, raw_data):
        if (re.search('2TB|2 TB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_2tb()

        elif (re.search('1TB|1 TB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_1tb()

        elif (re.search('750 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_750()

        elif (re.search('640 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_640()

        elif (re.search('500 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_500()

        elif (re.search('320GB|320 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_320()

        elif (re.search('256GB|256 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_256()

        elif (re.search('160GB|160 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_160()

        elif (re.search('128GB|128 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_128()

        elif (re.search('80GB|80 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_80()

        elif (re.search('64GB|64 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_64()

        elif (re.search('32GB|32 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_32()

        elif (re.search('16GB|16 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_16()
Exemplo n.º 4
0
class DataExtractor():
    def __init__(self, response, url):
        self.response = response
        self.url = url
        self.processors = Processors()
        self.brands = Brands()
        self.memory = Memory()
        self.storages = Storages()

    #{ _id, available, brand, color, display_feature, display_size, graphics_processor_name, graphics_processor, name, operating_system, price, processor, ram_memory, sku, screen_resolution, storage, storage_type, url, img_url}

    # TODO: display_feature, display_size, graphics_processor_name, graphics_processor, operating_system, screen_resolution, storage_type, img_url

    def parse(self):
        data = {}

        r = self.response

        # produtos das casas bahia
        data['store'] = "mega_eletronicos"

        # nome do produto
        try:
            data['name'] = r.find('div', {
                'class': 'col-md-12 col-sm-12'
            }).parent.find('h1').text
            data['name'] = self.validate_field(data, 'name')
        except (ValueError, TypeError, AttributeError):
            data['name'] = ''

        # url como variavel global da classe
        data['url'] = self.url

        # preço do produto
        try:
            data['price'] = r.find('h3', {
                'class': 'real'
            }).parent.find('span').text
            data['price'] = self.normalize_price(data['price'])
        except (ValueError, TypeError, AttributeError):
            data['price'] = 0.0

        # disponibilidade: nas casas bahia, se o produto possuir preco, o produto esta disponivel
        data['available'] = data['price'] != None and data['price'] != 0.0

        try:
            # processador
            data['processor'] = r.find(
                'td',
                text=re.compile(r'(Processor|Processador|Intel)')).parent.find(
                    'td', {
                        'width': '65%'
                    }).text
            data['processor'] = self.normalize_processor(data['processor'])
        except (ValueError, TypeError, AttributeError):
            data['processor'] = ''

        # marca
        try:
            data['brand'] = r.find('div', {
                'class': 'col-md-12 col-sm-12'
            }).parent.find('h1').text
            data['brand'] = self.normalize_brand(data['brand'])
        except (ValueError, TypeError, AttributeError):
            data['brand'] = ''

        # memória ram
        try:
            data['ram_memory'] = r.find(
                'td', text=re.compile(r'(DDR|SDRAM|RAM)')).parent.find(
                    'td', {
                        'width': '65%'
                    }).text
            data['ram_memory'] = self.normalize_memory(data['ram_memory'])
        except (ValueError, TypeError, AttributeError):
            data['ram_memory'] = ''

        # sku para identificacao
        data['sku'] = self.url.split('?')[0].split('-')[-1].split('.')[0]

        # armazenamento (SSD/HD)
        try:
            data['storage'] = self.normalize_storage(
                r.find('td', text=re.compile(r'(rpm|HDD|SSD)')).parent.find(
                    'td', {
                        'width': '65%'
                    }).text)
        except (ValueError, TypeError, AttributeError):
            data['storage'] = ''

        # tamanho da tela
        try:
            #data['display_size'] = r.find('td', text=re.compile(r'(LED|LCD)')).parent.find('td', {'width': '65%'}).text
            #data['display_size'] = re.sub(r'([a-z])', '', data['display_size'])
            #data['display_size'] = re.sub(r'([A-Z])', '', data['display_size'])
            # comentei tudo pois não consegui formalizar os tamanhos da tela
            data['display_size'] = ''
        except (ValueError, TypeError, AttributeError):
            data['display_size'] = ''

        return data

    def validate_field(self, data, field):
        return (data[field][0].get_text().strip() if
                (len(data[field]) > 0) else "")

    def normalize_storage(self, hd):

        result = ''
        if hd != None and len(hd) > 0:
            result = re.search('\d+.+[TG]B', hd)
            if result != None:
                result = result.group()

        return self.get_storage_capacity(result)

    def normalize_memory(self, raw_data):
        if (re.search('16', raw_data, re.IGNORECASE) != None):
            return self.memory.get_16GB()
        elif (re.search('12', raw_data, re.IGNORECASE) != None):
            return self.memory.get_12GB()
        elif (re.search('14', raw_data, re.IGNORECASE) != None):
            return self.memory.get_14GB()
        elif (re.search('10', raw_data, re.IGNORECASE) != None):
            return self.memory.get_10GB()
        elif (re.search('8', raw_data, re.IGNORECASE) != None):
            return self.memory.get_8GB()
        elif (re.search('6', raw_data, re.IGNORECASE) != None):
            return self.memory.get_6GB()
        elif (re.search('4', raw_data, re.IGNORECASE) != None):
            return self.memory.get_4GB()
        elif (re.search('2', raw_data, re.IGNORECASE) != None):
            return self.memory.get_2GB()
        elif (re.search('1', raw_data, re.IGNORECASE) != None):
            return self.memory.get_1GB()

    def normalize_price(self, raw_data):
        try:
            # transforma 1.000,00 em 1000.00
            #raw_data = raw_data[0].get_text() if (len(raw_data) > 0) else ""
            raw_data = raw_data.replace('.', '').replace(',', '.')
            return float(raw_data)
        except ValueError:
            return 0.0

    def normalize_brand(self, raw_data):
        # ["Samsung", "Asus", "Acer", "Dell", "Apple", "Positivo", "LG", "Lenovo"]

        if (re.search('dell', raw_data, re.IGNORECASE) != None):
            return self.brands.get_dell()
        elif (re.search('asus', raw_data, re.IGNORECASE) != None):
            return self.brands.get_asus()
        elif (re.search('apple', raw_data, re.IGNORECASE) != None):
            return self.brands.get_apple()
        elif (re.search('acer', raw_data, re.IGNORECASE) != None):
            return self.brands.get_acer()
        elif (re.search('samsung', raw_data, re.IGNORECASE) != None):
            return self.brands.get_samsung()
        elif (re.search('positivo', raw_data, re.IGNORECASE) != None):
            return self.brands.get_positivo()
        elif (re.search('lenovo', raw_data, re.IGNORECASE) != None):
            return self.brands.get_lenovo()
        elif (re.search('lg', raw_data, re.IGNORECASE) != None):
            return self.brands.get_lg()

    def normalize_processor(self, raw_data):
        # ['Intel Core i3', 'Intel Core i5', 'Intel Core i7', 'Intem Pentium Quad Core', 'Intel Baytrail', 'AMD Dual Core', 'Item Atom', 'Intel Core M', 'Intel Celeron']

        # remove erros de enconding (ex: \u84d2)
        raw_data = re.sub('\\\u\w\w\w\w', '', raw_data)

        if (re.search("i3", raw_data, re.IGNORECASE) != None):
            return self.processors.get_i3()

        elif (re.search("i5", raw_data, re.IGNORECASE) != None):
            return self.processors.get_i5()

        elif (re.search("i7", raw_data, re.IGNORECASE) != None):
            return self.processors.get_i7()

        elif (re.search("Pentium", raw_data, re.IGNORECASE) != None):
            return self.processors.get_pentium_quad()

        elif (re.search("byt|baytrail", raw_data, re.IGNORECASE) != None):
            return self.processors.get_baytrail()

        elif (re.search("amd.+dual core", raw_data, re.IGNORECASE) != None):
            return self.processors.get_amd_dual()

        elif (re.search("amd.+quad core", raw_data, re.IGNORECASE) != None):
            return self.processors.get_amd_quad()

        elif (re.search("atom", raw_data, re.IGNORECASE) != None):
            return self.processors.get_atom()

        elif (re.search("Intel.+Core.+M", raw_data, re.IGNORECASE) != None):
            return self.processors.get_core_m()

        elif (re.search("Celeron", raw_data, re.IGNORECASE) != None):
            return self.processors.get_celeron()

        elif (re.search("arm", raw_data, re.IGNORECASE) != None):
            return self.processors.get_arm_a9()

        elif (re.search("samsung", raw_data, re.IGNORECASE) != None):
            return self.processors.get_samsung()

        # normalização de capacidade
    def get_storage_capacity(self, raw_data):
        if (re.search('2TB|2 TB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_2tb()

        elif (re.search('1TB|1 TB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_1tb()

        elif (re.search('750 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_750()

        elif (re.search('640 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_640()

        elif (re.search('500 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_500()

        elif (re.search('320GB|320 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_320()

        elif (re.search('256GB|256 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_256()

        elif (re.search('160GB|160 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_160()

        elif (re.search('128GB|128 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_128()

        elif (re.search('80GB|80 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_80()

        elif (re.search('64GB|64 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_64()

        elif (re.search('32GB|32 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_32()

        elif (re.search('16GB|16 GB', raw_data, re.IGNORECASE) != None):
            return self.storages.get_16()