User:Pyfisch/Rob/source

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
import pywikibot
import re
commons = pywikibot.Site('commons', 'commons')
cat = pywikibot.Category(commons, 'Images by Rob Lavinsky')
members = cat.members(namespaces=[6], content=True)
same = {}

pattern_size = re.compile('[\d\.]*? x [\d\.]*? x [\d\.]*? cm')
pattern_loc = re.compile('http://www\.mindat.org/loc-.*?\.html')

for i in members:
    size_obj = pattern_size.search(i.get())
    loc_obj = pattern_loc.search(i.get())
    size = ''
    loc = ''
    if size_obj: size = size_obj.group()
    if loc_obj: loc = loc_obj.group()
    key = (size, loc)
    if 'http://www.mindat.org/photo-' in i.get():
        page = 'mindat'
    else:
        page = 'irock'
    if key in same:
        same[key].append((i, page))
#        if len(set([ x[1] for x in same[key]])) > 1 and size:
#            print '====', key[0], key[1], '===='
#            for k in same[key]:
#                print '*', k[1], k[0]
    else:
        same[key] = [(i, page)]

#print ('#' * 70 + '\n') * 3
for i in same:
    if len(set([ x[1] for x in same[i]])) > 1:
        print '====', i[0], i[1], '===='
        for k in same[i]:
            print '*', k[1], k[0]
Cheatset regexes to replace lists with galleries.
  • Find: \* (.+?) \[\[:File:(.*?)\]\]
  • Insert: \2 \| \1