User:YiFeiBot/~/pywikibot/com end space.py

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Published by zhuyifei1999 (https://wikitech.wikimedia.org/wiki/User:Zhuyifei1999)
# under the terms of Creative Commons Attribution-ShareAlike 3.0 Unported (CC BY-SA 3.0)
# https://creativecommons.org/licenses/by-sa/3.0/

import os
import time
import re
import MySQLdb
import pywikibot
from pywikibot import catlib
from pywikibot import pagegenerators

class Robot(object):
    def __init__(self):
        self.site = pywikibot.getSite()
        self.moved = ''
        self.step = 100
        self.cmdp = pywikibot.Page(self.site, "User:CommonsDelinker/commands/filemovers")

    def run(self):
        connection = MySQLdb.connect(
            host = "commonswiki.labsdb",
            db = "commonswiki_p",
            read_default_file = "~/replica.my.cnf"
        )
        cursor = connection.cursor()
        cursor.execute(r"""
SELECT img_name
FROM image
WHERE img_name REGEXP "_\.[a-zA-Z]+$"
;""")
        p = cursor.fetchall()
        ps = [p[i:i+self.step] for i in range(0, len(p), self.step)]
        for psg in ps:
            for title, in psg:
                page = pywikibot.Page(self.site, unicode(title, "utf-8"), ns=6)
                if page.isRedirectPage() or (not page.exists()): continue
                title = page.title()
                title_o = title
                title = re.sub(r"\.?\s+(\.[a-zA-Z]+)$", r"\1", title)
                if title != title_o:
                    try:
                        cmt = "Robot: Removing space(s) before file extension"
                        time.sleep(10) # As slow as possible
                        page.move(title, cmt)
                        self.moved += u"{{universal replace|%s|%s|reason=%s}}\n" % \
                            (title_o[5:], title[5:], cmt)
                    except Exception, e:
                        pywikibot.output(type(e).__name__+": "+str(e))
            
            while True:
                cmds = self.cmdp.get(force=True)
                if len(cmds) > 10000:
                    if cmds.count(u"{{") > 300:
                        # 500 max for delinker, 100 for this bot, another 100 for buffering
                        time.sleep(60)
                        continue
                break

            self.cmdp.put(cmds + u"\n" + self.moved)
            self.moved = ''

def main():
    try:
        pywikibot.handleArgs()
        bot = Robot()
        bot.run()
    except:
        pywikibot.output("Exception! Dump:")
        pywikibot.output(bot.moved)
        raise

if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()