示例#1
0
class TestApi(unittest.TestCase):
    def setUp(self):
        """Utility code shared among all tests."""
        self.sr = SpinRewriter('*****@*****.**', 'test_api_key')

    def test_init(self):
        """Test initialization of SpinRewriter.

        SpinRewriter is initialized on every test run and stored as self.sr.
        We need to test for stored values and if underlying Api class was
        also initialized correctly.
        """
        self.assertEquals(self.sr.email_address, '*****@*****.**')
        self.assertEquals(self.sr.api_key, 'test_api_key')
        self.assertIsInstance(self.sr.api, Api)

    @mock.patch('spinrewriter.urllib2')
    def test_unique_variation_default_call(self, urllib2):
        """Test call of unique_variation() with default values."""
        # mock response from SpinRewriter
        mocked_response = u"""{
            "status":"OK",
            "response":"This is my pet.",
            "api_requests_made":1,
            "api_requests_available":99,
            "protected_terms":"",
            "nested_spintax":"false",
            "confidence_level":"medium"
        }"""
        urllib2.urlopen.return_value.read.return_value = mocked_response

        # test call
        self.assertEquals(
            self.sr.unique_variation('This is my dog.'),
            'This is my pet.',
        )

    @mock.patch('spinrewriter.urllib2')
    def test_text_with_spintax_default_call(self, urllib2):
        """Test call of text_with_spintax_call() with default values."""
        # mock response from SpinRewriter
        mocked_response = u"""{
            "status":"OK",
            "response":"This is my {dog|pet|animal}.",
            "api_requests_made":2,
            "api_requests_available":98,
            "protected_terms":"",
            "nested_spintax":"false",
            "confidence_level":"medium"
        }"""
        urllib2.urlopen.return_value.read.return_value = mocked_response

        # test call
        self.assertEquals(
            self.sr.text_with_spintax('This is my dog.'),
            'This is my {dog|pet|animal}.',
        )
示例#2
0
 def setUp(self):
     """Utility code shared among all tests."""
     self.sr = SpinRewriter('*****@*****.**', 'test_api_key')
def spinrewriter_spinner(filepath, download_min=99999000):
    '''
    wordai content spinner api
    Parameters
    ----------
    dict
    username
    password

    Returns
    -------

    '''
    reload(sys)
    sys.setdefaultencoding('utf8')

    rewriter = SpinRewriter('', '')

    fieldnames = ['Video_URL',
                  'Author',
                  'Content_rating',
                  'Version',
                  'Filesize',
                  'screenshots',
                  'Updated',
                  'Description',
                  'Review_number',
                  'Downloads',
                  'Link',
                  'Genre',
                  'Developer_badge',
                  'Item_name',
                  'Rating_value',
                  'package_name',
                  'IAP',
                  'Physical_address',
                  'Author_link',
                  'Compatibility',
                  'Developer_ID',
                  'cover_image',
                  'Price']

    # decide what to spin, add to "to_spin"
    with open(filepath) as csvfile:
        reader = csv.DictReader(csvfile, fieldnames=fieldnames, delimiter=' ', quotechar='|', )
        to_spin = ""
        for row in reader:
            row["Description"] = row["Description"].decode('unicode_escape').encode('ascii',
                                                                                    'ignore')  # stardardize string
            if len(row["Description"].split()) > 25 and len(row["Description"].split()) < 4000:
                try:
                    # only spin if download number >10000
                    if row["Downloads"] and int(
                            row["Downloads"].split(" - ", 1)[0].replace(",", "").replace(" ", "")) > download_min:
                        # only spin if it is English. temporary turn off
                        # print row["Description"]
                        # if detect(unicode(row["Description"].split(".", 1)[0], errors='ignore')) == 'en':  # only take the first sentence
                        to_spin += str(row["Description"]).replace("\n", "\|/") + "||||||||"
                        # print countx
                except Exception as e:
                    print e
                    # if str(e) == "No features in text.":
                    #     to_spin += str(row["Description"]).replace("\n", "\|/") + "||||||||"
                    continue

        # print to_spin
        print "spinning...", (len(to_spin.split(" ")) / 3900 + 2), "blocks of 3.8k words are going to be spinned"
        result_spin = ""
        # spin each 3800 words #TODO not working
        count_spinned = 0
        for i in range(1, (len(to_spin.split(" ")) / 3900 + 2)):
            splitted_words = unicode(" ".join(to_spin.split(" ")[3900 * (i - 1):3900 * i]), errors='replace')
            try:
                spinned = str(rewriter.unique_variation(splitted_words))
                if len(spinned) > 20:
                    result_spin += spinned
                else:
                    result_spin += splitted_words
                count_spinned += 1
            except Exception as e:
                print e
                result_spin += splitted_words
                count_spinned += 1
                if str(e) == "Error!!!,  Quota limit for API calls reached.":
                    break
                else:
                    continue

            print count_spinned, "blocks per ", (len(to_spin.split(" ")) / 3800 + 2), "completed"

        # for i in range(0,len(result_spin.split("----------"))):
        #     print result_spin.split("----------")[i]

        print len(result_spin.split("||||||||"))

    result = []
    with open(filepath) as csvfile:
        count = 0
        count2 = 0
        count3 = 0
        reader = csv.DictReader(csvfile, fieldnames=fieldnames, delimiter=' ', quotechar='|', )

        for row in reader:
            # print row
            count3 += 1
            print count3
            if len(row["Description"].split()) > 25 and len(row["Description"].split()) < 4000:
                try:
                    # only spin if download number >10000
                    if row["Downloads"] and int(
                            row["Downloads"].split(" - ", 1)[0].replace(",", "").replace(" ", "")) > download_min:
                        # only spin if it is English. temporary turn off
                        # if detect(unicode(row["Description"].split(".", 1)[0], errors='ignore')) == 'en':  # only take the first sentence
                        print row["Description"]
                        row["Description"] = result_spin.split("||||||||")[count].replace("\|/", "\n")
                        print row["Description"]
                        count += 1
                        print "count1,", count
                        # spinned_content = str(rewriter.unique_variation(row["Description"].replace("\n", "|"))).replace("|", "\n")
                        # if len(spinned_content) > 20: # to debug: if an error return, skip
                        #     row["Description"] = spinned_content + ".."
                except Exception as e:
                    print "Error!!!, ", e
                    continue

            result.append(row)
            count2 += 1
            print "count2,", count2

            # number of spinned 3.8k for testing
            # print "result spin block", result_spin.count("SPINNEDDDDDDDDDDDDD")

            with open(filepath + "_spinned", 'w') as csvfile:
                spamwriter = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=' ', quotechar='|', )
                # spamwriter.writeheader()
                for row in result:
                    spamwriter.writerow(row)
                csvfile.close()
                spamwriter = None
示例#4
0
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from keyCombo.keyComboFinder import combify
from keyCombo.comboData import stopwords
from spinrewriter import SpinRewriter
rewriter = SpinRewriter('*****@*****.**',
                        '82109b0#51de401_45b0364?5ada2fd')


def number_check(text):
    try:
        int(text)
        return True
    except:
        return False


def rev_len(x):
    return -len(x)


def combo_counter(combos, paragraph):
    count = 0
    original_len = len(paragraph)
    combos.sort(key=rev_len)
    for each in combos:
        como_length = len(each)
        para_len = len(paragraph)
        paragraph = paragraph.replace(each, "")
        new_len = len(paragraph)
class TestApi(unittest.TestCase):

    def setUp(self):
        """Utility code shared among all tests."""
        self.sr = SpinRewriter('*****@*****.**', 'test_api_key')

    def test_init(self):
        """Test initialization of SpinRewriter.

        SpinRewriter is initialized on every test run and stored as self.sr.
        We need to test for stored values and if underlying Api class was
        also initialized correctly.
        """
        self.assertEquals(self.sr.email_address, '*****@*****.**')
        self.assertEquals(self.sr.api_key, 'test_api_key')
        self.assertIsInstance(self.sr.api, Api)

    @mock.patch('spinrewriter.urllib2')
    def test_unique_variation_default_call(self, urllib2):
        """Test call of unique_variation() with default values."""
        # mock response from SpinRewriter
        mocked_response = u"""{
            "status":"OK",
            "response":"This is my pet.",
            "api_requests_made":1,
            "api_requests_available":99,
            "protected_terms":"",
            "nested_spintax":"false",
            "confidence_level":"medium"
        }"""
        urllib2.urlopen.return_value.read.return_value = mocked_response

        # test call
        self.assertEquals(
            self.sr.unique_variation('This is my dog.'),
            'This is my pet.',
        )

    @mock.patch('spinrewriter.urllib2')
    def test_text_with_spintax_default_call(self, urllib2):
        """Test call of text_with_spintax_call() with default values."""
        # mock response from SpinRewriter
        mocked_response = u"""{
            "status":"OK",
            "response":"This is my {dog|pet|animal}.",
            "api_requests_made":2,
            "api_requests_available":98,
            "protected_terms":"",
            "nested_spintax":"false",
            "confidence_level":"medium"
        }"""
        urllib2.urlopen.return_value.read.return_value = mocked_response

        # test call
        self.assertEquals(
            self.sr.text_with_spintax('This is my dog.'),
            'This is my {dog|pet|animal}.',
        )

    @mock.patch('spinrewriter.urllib2')
    def test_text_with_spintax_error(self, urllib2):
        # mock response from SpinRewriter
        mocked_response = u"""{"status":"ERROR", "response":"Authentication failed. Unique API key is not valid for this user."}"""  # noqa
        urllib2.urlopen.return_value.read.return_value = mocked_response

        # test call
        with self.assertRaises(ex.AuthenticationError):
            self.sr.text_with_spintax('This is my dog.')

    @mock.patch('spinrewriter.urllib2')
    def test_unique_variation_error(self, urllib2):
        # mock response from SpinRewriter
        mocked_response = u"""{"status":"ERROR", "response":"Authentication failed. Unique API key is not valid for this user."}"""  # noqa
        urllib2.urlopen.return_value.read.return_value = mocked_response

        # test call
        with self.assertRaises(ex.AuthenticationError):
            self.sr.unique_variation('This is my dog.')
 def setUp(self):
     """Utility code shared among all tests."""
     self.sr = SpinRewriter('*****@*****.**', 'test_api_key')