Пример #1
0
    def process_item(self, item, spider):
        adapter = ItemAdapter(item)

        if 'inv_id' in adapter.keys():
            # If inv_id is passed in item, look up the investment instance and
            # save the data directly.
            inv = get_object_or_404(Investment,
                                    id=adapter['inv_id'],
                                    template=True)
            setattr(inv, adapter['type'], adapter['data'])
            inv.save()
        else:
            lowercase_adapter = {k.lower(): v for k, v in adapter.items()}
            # If all investments are in the same item, get all investments for
            # the platform and iterate through them.
            platform = adapter['platformname']
            investments = Investment.objects.filter(name__platform=platform,
                                                    template=True)

            for inv in investments:
                # For each investment, assign the data from the item to the
                # relevant field - the adapter['type'])
                setattr(inv, adapter['type'],
                        lowercase_adapter[str(inv.name.name).casefold()])
                inv.save()

        return item
Пример #2
0
 def save_items(self, items):
     for item in items:
         table_name = item.__class__.__name__
         item_dict = ItemAdapter(item).asdict()
         item_dict = self.__check_size(item_dict)
         item_dict = self.__clean_dict(item_dict)
         try:
             _columns = ', '.join(item_dict.keys())
             updated_values = ', '.join(i[0] + "='" + i[1] + "'"
                                        for i in item_dict.items()
                                        if i[0] != 'url')
             values = ", ".join("'{}'".format(k)
                                for k in item_dict.values())
             sql = "INSERT INTO sro.{} ({}) VALUES ({})".format(
                 table_name, _columns, values)
             self._cursor.execute(sql)
             print(sql)
         except:
             url = item_dict.pop('url')
             _columns = ', '.join(item_dict.keys())
             set_str = ", ".join("{}=%s".format(k)
                                 for k in item_dict.keys())
             sql = "UPDATE sro.{} SET {} WHERE url = '{}'".format(
                 table_name, set_str, url)
             self._cursor.execute(sql, list(item_dict.values()))
             print(sql)
     self._connection.commit()
Пример #3
0
    def process_item(self, item, spider):

        if spider.name == 'novels':
            collection_name = 'novels'
            search_by = 'title'
        if spider.name == 'comments':
            collection_name = 'comments'
            search_by = 'comment_id'

        my_item = ItemAdapter(item).asdict()
        
        # find one document by title
        my_doc = self.db[collection_name].find_one({search_by: my_item[search_by]})

        if not my_doc:
            # if there is no matched document, insert a new one
            self.db[collection_name].insert_one(ItemAdapter(item).asdict())
        else:
            # update the existing document with not-none values
            self.db[collection_name].update(
                {'_id': ObjectId(my_doc['_id'])},
                {
                    '$set': {k: v for k, v in my_item.items() if v is not None}
                }
                )

        return item
Пример #4
0
    def process_item(self, item, spider):
        adapter = ItemAdapter(item)

        if "\\n" in adapter.get('invname', ""):
            # Replaces newlines with spaces
            invname = adapter['invname'].replace("\\n", " ")
            adapter['invname'] = invname

        # Removes extra spaces inside string when collecting new investments
        if adapter.get('invname', ''):
            invname = " ".join(adapter['invname'].split())
            adapter['invname'] = invname

        # Removes keys with NoneType vals. Could be changed to a dict comprehension
        del_items = []
        for key, val in adapter.items():
            if key not in excluded_keys:
                if not val:
                    del_items.append(key)
        for k in del_items:
            del adapter[k]

        for key, val in adapter.items():
            if key not in excluded_keys:
                # Removes percentage sign + any text after and divides by 100
                if "%" in val:
                    adapter[key] = val.split("%")[0]

                #Nil items that are not numbers ("Zero", "Nil", etc)
                try:
                    adapter[key] = Decimal(adapter[key])
                except Exception:
                    adapter[key] = 0

                # Converts integer/float to correct percentage decimal.
                adapter[key] = Decimal(adapter[key]) / 100

        return item
Пример #5
0
    def process_item(self, item, spider):

        adapter = ItemAdapter(item)

        # Converts keys to lowercase so they can be matched to
        # case-insensitive models
        lowercase_adapter = {k.lower(): v for k, v in adapter.items()}

        # Gets AA Names again (new ones added)
        existing_aa_names = get_list_or_404(AssetAllocationName)

        try:
            existing_aa = get_list_or_404(AssetAllocation,
                                          investment=adapter['inv_id'])
        except Exception:
            existing_aa = []

        #delete old allocationss (if they have changed)
        unused_names = [
            aa for aa in existing_aa
            if str(aa.name).lower() not in lowercase_adapter.keys()
        ]
        for name in unused_names:
            name.delete()

        #Update existing ones
        for aa in existing_aa:
            # Matches the lowercase existing aa name to the lowercase
            # dict value and saves the updated allocation.
            try:
                aa.percentage = lowercase_adapter[str(aa.name).lower()]
            except KeyError as e:
                print(f"LOG: Asset class {e} no longer exists in scraped data")
            aa.save()

            lowercase_adapter.pop(str(aa.name).lower())

        #Create new ones
        for k, v in lowercase_adapter.items():

            aa_name = next(
                (i for i in existing_aa_names if i.name.lower() == k), None)
            #Get the AA NAME ID
            if k not in excluded_keys:
                allocation = AssetAllocation(name=aa_name,
                                             percentage=v,
                                             investment_id=adapter['inv_id'])
                allocation.save()

        return item
Пример #6
0
 def strip_encoding(adapter: ItemAdapter) -> dict:
     for key, value in adapter.items():
         if isinstance(value, str):
             adapter[key] = value.strip().replace("\n",
                                                  "").replace("\r", "")
     return adapter
Пример #7
0
 def process_item(self, item, spider):
     adapter = ItemAdapter(item)
     for key, value in adapter.items():
         adapter[key] = strip(value)
     return item