Exemplo n.º 1
0
 def formatDate(self, data, country):
   month = "" #initialize so if they are not used prints empty in template
   day = "" 
   options = ""
   origData = data #save so if we find out it's not really a date
   usDateRegex = re.compile("^(january|february|march|april|may|june|july|august|september|october|november|december).[0-9]{1,2}.[0-9]{4}$", re.I)
   euDateRegex = re.compile("^[0-9]{1,2}.(january|february|march|april|may|june|july|august|september|october|november|december) [0-9]{4}$", re.I)
   shortDateRegex = re.compile("^(january|february|march|april|may|june|july|august|september|october|november|december).[0-9]{4}$", re.I)
   #If the date is 3 different items without a place in parens.  I use the re.sub to replace the place with nothing, removing it from the date. I find the
   #  format by replacing the different items with their format name from datetime. Then compile it all into a film date template.
   data = re.sub("{{flag.?icon.*?}}", "", data, 0, re.I).strip() #remove any flagicons
   #remove the wikilinks if they exist
   if(data.find("[[") != -1):
     data = self.removeWikilink(data)
   #remove the "th" from any number in the date that might have it.
   if(re.search("[0-9]{2}th", data)):
     data = re.sub("[0-9]{2}th", data[data.find(re.search("[0-9]{2}th", data).group(0)):data.find(re.search("[0-9]{2}th", data).group(0))+2], data)
   data = re.sub("<small>", "", re.sub("</small>", "", data)) #remove any small tags
   data = re.sub(",", "", data) # remove any commas in the date format
   justDate = re.sub("\([.A-Za-z ]+\)", "", data).strip()
   #If after the wikilink removal it isn't a proper date just skip it.
   if(not (usDateRegex.search(justDate) or euDateRegex.search(justDate) or shortDateRegex.search(justDate) or justDate.isdigit())):
     return origData
   if(len(justDate.split()) == 3):
     format = re.sub("[0-9]{1,2}", "%d", re.sub("[0-9]{4}", "%Y", re.sub("[A-Za-z]+", "%B", justDate))) #convert what is in the data field to what format it is in datetime.
     date = datetime.strptime(justDate, format) #convert to date
     month = "|" + str(date.month)
     day = "|" + str(date.day)
   #if it's only 2 it's usually a year and a month
   elif(len(justDate.split()) == 2):
     format = re.sub("[0-9]{4}", "%Y", re.sub("[A-Za-z]+", "%B", justDate)) #convert what is in the data field to what format it is in datetime.
     date = datetime.strptime(justDate, format) #convert to date
     month = "|" + str(date.month)
   #only 1 item is usually just the year
   elif(len(justDate.split()) == 1 and justDate.isdigit()):
     format = re.sub("[0-9]{4}", "%Y", justDate) #convert what is in the data field to what format it is in datetime.
     date = datetime.strptime(re.sub("\([A-Za-z. ]+\)", "", data).strip(), format) #convert to date
   #see if there is a place to add  
   try: re.search("\([.A-Za-z ]+\)", data).group()
   except AttributeError:
     place = ""
   else:
     place = "|" + re.search("\([.A-Za-z ]+\)", data).group().replace(")", "").replace("(", "")
     if filmfunctions.countryToTemplate(place[1:]) == country:
       place = ""
   if(euDateRegex.search(justDate) and not country == "{{Film US}}"): #if a EU date make the day appear first.
     options = "|df=y"
   if place != "" and month == "" and day == "":
     place = "||" + place    
   data = "{{Film date|"+str(date.year)+month+day+place+options+"}}"
   if(month == "" or day == ""):
     data += "<!-- {{Film date|Year|Month|Day|Location}} -->"
   return data
Exemplo n.º 2
0
    def addImdbInfo(self, infobox, movie):
      imdb.IMDb().update(movie, info=('release dates',)) #get the release date page
      country = ""
      if movie.get('country'):
        country = filmfunctions.countryToTemplate(movie.get('country')[0])
      for field in re.sub("<ref.*?/(ref)?>", " reference ", re.sub("{{.*}}", "template", infobox)).split("|"):
        data = ""
        try: field.split("=")[1]
        except IndexError:
          sys.exc_clear() #skip it if there is an index error, means it has no "=", invalid field
        else:
          if(field.split("=")[1].strip() == ""): #fill in fields without data
            if(field.split("=")[0].strip() == "director"):
              if movie.has_key('director'):
                for name in movie.get('director'):
                  data += "[[" + name['name'] + "]]+"
                if len(movie.get('director')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "producer"):
              if movie.get('producer'):
                for name in movie.get('producer')[0:2]:
                  data += name['name'] + "+"
                if len(movie.get('producer')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" +  re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "starring"):
              if movie.get('cast'):
                for name in movie.get('cast')[0:4]:
                  data += "[[" + name['name'] + "]]+"
                if len(movie.get('cast')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "music"):
              if movie.get('original music'):
                for name in movie.get('original music')[0:2]:
                  data += name['name'] + "+"
                if len(movie.get('original music')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "cinematography"):
              if movie.get('cinematographer'):
                for name in movie.get('cinematographer')[0:2]:
                  data += name['name'] + "+"
                if len(movie.get('cinematographer')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "editing"):
              if movie.get('editor'):
                for name in movie.get('editor')[0:2]:
                  data += name['name'] + "+"
                if len(movie.get('editor')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "studio"):
              if movie.get('studio'):
                for name in movie.get('studio')[0:2]:
                  data += name['name'] + "+"
                if len(movie.get('studio')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "released"):
              if movie.get('release dates'):
                for date in movie.get('release dates')[0:1]:
                  date = date.split("::")[1] + "(" + date.split("::")[0] + ")"
                  data += self.formatDate(date, country) + "+"
                infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + re.sub("\+", "<br>", data.rstrip("+")) + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "writer"):
              if movie.get('writer'):
                for name in movie.get('writer'):
                  data += name['name'] + "+"
                if len(movie.get('writer')) > 1:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + "{{Plainlist|\n*" + re.sub("\+", "\n*", data.rstrip("+")) + "}}" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:]                 
                else:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + data.rstrip("+") + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "runtime"):
              if movie.get('runtime'):
                try: 
                  if(movie.get('runtime')[0].split(":")[0].isdigit()):
                    infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + movie.get('runtime')[0].split(":")[0] + " minutes" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
                except IndexError:
                  infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + movie.get('runtime')[0] + " minutes" + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "country"):
              if movie.get('country'):
                infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + filmfunctions.countryToTemplate(movie.get('country')[0]) + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
            elif(field.split("=")[0].strip() == "language"):
              if movie.get('language'):
                for name in movie.get('language'):
                  data += name + "+"
                infobox = infobox[:infobox.find("=", infobox.find(field.split("=")[0]))+2] + re.sub("\+", "<br>", data.rstrip("+")) + infobox[infobox.find("=", infobox.find(field.split("=")[0]))+2:] 
              

      return infobox
Exemplo n.º 3
0
    def infoboxCleanup(self, infobox):
      unusedFields = ""
      country = ""
      infobox = infobox.replace("<br />", "<br>") #convert old style breaks to new style
      infobox = infobox.replace("<br/>", "<br>") #convert old style breaks to new style
      infobox = infobox.replace("<BR>", "<br>") #convert old style breaks to new style
      newBox = self.infoboxTemplate
      infoSplit = re.sub("<ref.*?/(ref)?>", " reference ", re.sub("{{.*}}", "template", infobox), flags=re.DOTALL).split("|")
      for field in infoSplit:
        try: field.split("=")[1]
        except IndexError:
          sys.exc_clear() #skip it if there is an index error, means it has no "=", invalid field
        else:
          if(field.split("=")[1].strip() != ""): #only extract fields with info
            #The info is going to be inserted into the new infobox, I find where the equals sign exists for the field where I'm inserting the info
            fieldRegex = re.compile(field.split("=")[0].lower().strip()+"[^\|]*?=", re.I) #find the field but it can't have a | after. This will ensure I get a field and not the data
            temp = fieldRegex.search(newBox)
            #first try and find where it should go in the new infobox
            try: equals = newBox.find("=", temp.start())
            except:
              equals = -1
            #but then make sure to check that it is not inside any wiki templates/refs that have been placed inside the new infobox.
            insideWiki = True
            while(insideWiki):
              insideWiki = False
              searches = itertools.chain(self.commentRegex.finditer(newBox), self.referenceRegex.finditer(newBox), self.templateRegex.finditer(newBox), self.wikilinkRegex.finditer(newBox)) #create a combine iterator
              for search in searches: 
                try: 
                  if(equals > search.start() and equals < search.end()):
                    try: equals = newBox.find("=", fieldRegex.search(newBox, search.end()).start()) #I need where we're place in the information in the new infobox
                    except:
                      equals = -1
                    insideWiki = True
                except:
                  sys.exc_clear()
            try: oldEquals = infobox.find("=", fieldRegex.search(infobox).start()) #I need where the information starts in the old infobox
            except:
              oldEquals = -1
            if(equals != -1 and oldEquals != -1): #if an old field is not used, do not copy it over
              #This used to be silly but now it's quite reasonable. Loop through and check against every search to make sure
              # that the equals sign is not inside any wiki-stuff.
              insideWiki = True
              y = oldEquals + 1
              x = infobox.find("=", y) #find the next equals
              while(insideWiki):
                insideWiki = False
                searches = itertools.chain(self.commentRegex.finditer(infobox), self.referenceRegex.finditer(infobox), self.templateRegex.finditer(infobox), self.wikilinkRegex.finditer(infobox))
                for search in searches: 
                  #pywikibot.output(str(x) + " " + str(search.start()) + " " + str(search.end()))
                  try: 
                    if(x > search.start() and x < search.end()):
                      insideWiki = True
                      y = x + 1
                      x = infobox.find("=", y)
                  except:
                    sys.exc_clear()
                #if it wasn't inside any of the wiki-stuff then it's ok to grab it
                if(x == -1 and not insideWiki):
                  data = infobox[oldEquals+1:].strip()
                  while(data[len(data)-1:] == "|"):
                    data = data[:len(data)-1].strip()
                elif(not insideWiki):
                  data = infobox[oldEquals+1:infobox.rfind("|", oldEquals, infobox.find("=", x))].strip()
              #pywikibot.output(field.split("=")[0].strip().lower() + ": " + data)

              #This will take care of any references and comments on the data
              refs = "" #initialize
              if self.referenceRegex.search(data) : #remove the ref and save it for later so I can format the date
                refs += data[self.referenceRegex.search(data).start():self.referenceRegex.search(data).end()]
                data = re.sub(self.referenceRegex, "", data)
              if self.commentRegex.search(data) :
                refs += data[self.commentRegex.search(data).start():self.commentRegex.search(data).end()]
                data = re.sub(self.commentRegex, "", data)

              data = re.sub(",<br>", "<br>", data) #if there are commas and line breaks, oh my
              if(field.split("=")[0].strip().lower() == "language"): #if the language is linked, unlink it.
                tmp = self.removeWikilink(data)
                if(data != tmp):
                  data = tmp
                  #self.canEditPage = 1
                  #self.summary = "Unwikilink language. " + self.summary
              elif(field.split("=")[0].strip().lower() == "country" and not re.search("image:flag", data.lower()) and not re.search("file:flag", data.lower())):
                #data = re.sub("<br>", ", ", data) Do I have to convert to commas?
                data = self.removeWikilink(data)
                tmp = filmfunctions.countryToTemplate(data)
                country = data
                if(data != tmp):
                  data = tmp
                  self.canEditPage = 1
                  if self.summary.find("film date") != -1:
                    self.summary = self.summary.replace("template.", "+ country templates")
                  else:
                    self.summary = "Use country template. " + self.summary
              elif(field.split("=")[0].strip().lower() == "released" and re.search("{{start date.*?}}", data.lower())):
                data = re.sub("start", "Film", data, 0, re.I)
              elif(field.split("=")[0].strip().lower() == "released" and re.search("{{filmdate.*?}}", data.lower())):
                data = re.sub("filmdate", "Film date", data)
              elif(field.split("=")[0].strip().lower() == "released" and not re.search("{{film date.*?}}", data.lower()) and data.find("<br>") == -1):
                tmp = self.formatDate(data, country)
                if(data != tmp):
                  data = tmp
                  self.canEditPage = 1
                  if self.summary.find("country") != -1:
                    self.summary = self.summary.replace("template.", "+ film date templates")
                  else:
                    self.summary = "use film date template. " + self.summary
              elif(field.split("=")[0].strip().lower() == "runtime") :
                data = self.removeWikilink(data)
                data = re.sub("(min(\.)|mins\.|mins|min)(?!utes)", "minutes", data)
              elif(field.split("=")[0].strip().lower() == "distributor"):
                data = re.sub("{{flag.?icon.*?}}", "", data, 0, re.I).strip()
              elif(field.split("=")[0].strip().lower() == "producer"):
                data = re.sub(",", "<br>", data)
            
              data += refs #attach the references and comments again
                
              #Break it down: Take everything before where I want to insert the info + the old info I found between the equals sign and the last "|" + everything
              #  after where I insert the data.
              #pywikibot.output(field.split("=")[0] + " " + data)
              newBox = newBox[:equals+2] + data + newBox[equals+2:]
              #pywikibot.output(newBox)
              #choice = pywikibot.inputChoice("This is a wait", ['Yes', 'No'], ['y', 'N'], 'N')
            else:
              if not (field.split("=")[0].strip().lower() == "preceded_by" or field.split("=")[0].strip().lower() == "followed_by" or field.split("=")[0].strip().lower() == "preceded by" or field.split("=")[0].strip().lower() == "followed by"):
                unusedFields += "| " + field.strip() + "\n"

      #if(self.imdbNum != 0): lol, probably no
      #  newBox = self.addImdbInfo(newBox, imdb.IMDb().get_movie(self.imdbNum))
        
      #remove typically unused parameters
      if re.search("\| image_size *=.*?\n", newBox).group().split("=")[1].strip() == "" :
        newBox = re.sub("\| image_size *=.*?\n", "", newBox)
      if re.search("\| narrator *=.*?\n", newBox).group().split("=")[1].strip() == "" :
        newBox = re.sub("\| narrator *=.*?\n", "", newBox)
      if re.search("\| border *=.*?\n", newBox).group().split("=")[1].strip() == "" :
        newBox = re.sub("\| border *=.*?\n", "", newBox)
      if re.search("\| based on *=.*?\n", newBox).group().split("=")[1].strip() == "" :
        newBox = re.sub("\| based on *=.*?\n", "", newBox)
      #if re.search("\| alt *=.*?\n", newBox).group().split("=")[1].strip() == "" :
      #  newBox = re.sub("\| alt *=.*?\n", "", newBox)
      if not re.search("\| writer *=.*?\n", newBox).group().split("=")[1].strip() == "" : #remove these fields if it has a writer and they're empty
        if re.search("\| story *=.*?\n", newBox).group().split("=")[1].strip() == "" :
          newBox = re.sub("\| story *=.*?\n", "", newBox)
        if re.search("\| screenplay *=.*?\n", newBox).group().split("=")[1].strip() == "" :
          newBox = re.sub("\| screenplay *=.*?\n", "", newBox)
      elif not re.search("\| story *=.*?\n", newBox).group().split("=")[1].strip() == "" : #remove these fields if it has a writer and they're empty
        if re.search("\| writer *=.*?\n", newBox).group().split("=")[1].strip() == "" :
          newBox = re.sub("\| writer *=.*?\n", "", newBox)
        if re.search("\| screenplay *=.*?\n", newBox).group().split("=")[1].strip() == "" :
          newBox = re.sub("\| screenplay *=.*?\n", "", newBox)
      elif not re.search("\| screenplay *=.*?\n", newBox).group().split("=")[1].strip() == "" : #remove these fields if it has a writer and they're empty
        if re.search("\| writer *=.*?\n", newBox).group().split("=")[1].strip() == "" :
          newBox = re.sub("\| writer *=.*?\n", "", newBox)
        if re.search("\| story *=.*?\n", newBox).group().split("=")[1].strip() == "" :
          newBox = re.sub("\| story *=.*?\n", "", newBox)
        
      if re.search("\| alt *=.*?\n", newBox).group().split("=")[1].strip() == "" :
        newBox = re.sub("\| alt *=.*?\n", "| alt            = <!-- see WP:ALT -->\n", newBox)
      #if re.search("\| based on *=.*?\n", newBox).group().split("=")[1].strip() == "" :
      #  newBox = re.sub("\| based on *=.*?\n", "| based on       = <!-- {{based on|title of the original work|writer of the original work}} -->\n", newBox)
      if re.search("\| released *=.*?\n", newBox).group().split("=")[1].strip() == "" :
        newBox = re.sub("\| released *=.*?\n", "| released       = <!-- {{Film date|Year|Month|Day|Location}} -->\n", newBox)
      
      if(unusedFields != ""):
        if not re.search("italic title", unusedFields.lower()) :
          self.canEditPage = 1
        newBox = newBox[:len(newBox)-2] + "<!-- unsupported parameters -->\n" + unusedFields + newBox[len(newBox)-2:]
        self.summary = "fix/rm unsupported parameter. " + self.summary
      return newBox.strip()