User:Jean-Frédéric/DatabaseQueries

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

Useful queries to run on the databases available on Tool Labs.

All files in a given category[edit]

SELECT /* SLOW_OK */ page_title AS title 
    FROM page, categorylinks 
    WHERE page.page_id=categorylinks.cl_from
        AND categorylinks.cl_to = "Foreground_flowers";

With uploader[edit]

SELECT /* SLOW_OK */ image.img_name as image, img_user_text AS user 
    FROM image, page, categorylinks 
    WHERE page.page_id=categorylinks.cl_from
        AND image.img_name = page.page_title
        AND categorylinks.cl_to = "Foreground_flowers";

Number of files in a given category during a given timeframe[edit]

SELECT /* SLOW_OK */ COUNT(page_title) 
    FROM image, page, categorylinks 
    WHERE page.page_id=categorylinks.cl_from 
        AND image.img_name = page.page_title
        AND .categorylinks.cl_to = "All_medias_supported_by_Wikimedia_France"
        AND img_timestamp BETWEEN 20110101000000 AND 20120101000000 
    ORDER BY img_timestamp ASC;

QI by a given user[edit]

SELECT /* SLOW_OK */ page_title 
    FROM image, page, categorylinks 
    WHERE page.page_id=categorylinks.cl_from
        AND image.img_name = page.page_title
        AND categorylinks.cl_to = "Quality_images"
        AND img_user_text = "Example";

Top uploaders of a given category[edit]

SELECT /* SLOW_OK */ DISTINCT img_user_text AS user, COUNT(image.img_name) as img_count 
    FROM image, page, categorylinks 
    WHERE page.page_id=categorylinks.cl_from
        AND image.img_name = page.page_title
        AND categorylinks.cl_to = "All_medias_supported_by_Wikimedia_France" 
    GROUP BY user 
    ORDER BY img_count DESC;

With size average[edit]

SELECT /* SLOW_OK */ DISTINCT img_user_text AS user, COUNT(i.img_name) as img_count, AVG(img_size) as average_size 
    FROM image i, page p, categorylinks cl 
    WHERE i.img_name = p.page_title 
        AND p.page_id = cl.cl_from 
        AND cl.cl_to = "Foreground_flowers" 
    GROUP BY user 
    ORDER BY img_count DESC;

Top uploaders of a given category in a given year[edit]

SELECT /* SLOW_OK */ DISTINCT img_user_text AS user, COUNT(image.img_name) as img_count 
    FROM image, page, categorylinks 
    WHERE page.page_id=categorylinks.cl_from 
        AND image.img_name = page.page_title 
        AND .categorylinks.cl_to = "All_medias_supported_by_Wikimedia_France" 
        AND img_timestamp BETWEEN 20110101000000 AND 20120101000000
    GROUP BY user 
    ORDER BY img_count DESC;

Useless data on files in a given category in a given timeframe[edit]

Average size[edit]

SELECT /* SLOW_OK */  AVG(img_size) 
    FROM image i, page p, categorylinks cl 
    WHERE i.img_name = p.page_title 
        AND p.page_id = cl.cl_from 
        AND cl.cl_to = "Foreground_flowers";

Average size/width/height[edit]

SELECT /* SLOW_OK */  AVG(img_size), AVG(img_width), AVG(img_height) 
    FROM image i, page p, categorylinks cl 
    WHERE i.img_name = p.page_title 
        AND p.page_id = cl.cl_from 
        AND cl.cl_to = "All_medias_supported_by_Wikimedia_France" 
        AND img_timestamp BETWEEN 20110101000000 AND 20120101000000;

Number of uploaders for a given category[edit]

SELECT /* SLOW_OK */ COUNT(DISTINCT img_user_text) AS user 
    FROM image i, page p, categorylinks cl 
    WHERE i.img_name = p.page_title 
        AND p.page_id = cl.cl_from 
        AND cl.cl_to = "Foreground_flowers";

Files in a category intersection[edit]

SELECT /* SLOW_OK */ page_title 
    FROM page
        JOIN categorylinks AS cl1 ON page_id=cl1.cl_from
        JOIN categorylinks AS cl2 ON page_id=cl2.cl_from
    WHERE cl1.cl_to="Foreground_flowers" 
        AND cl2.cl_to="Laval,_Mayenne";

QIs in a given category in a given timeframe[edit]

SELECT /* SLOW_OK */ page_title, img_user_text AS uploader
    FROM page
        JOIN categorylinks AS cl1 ON page_id=cl1.cl_from
        JOIN categorylinks AS cl2 ON page_id=cl2.cl_from 
        JOIN image ON image.img_name = page.page_title
    WHERE cl1.cl_to="All_medias_supported_by_Wikimedia_France" 
        AND cl2.cl_to="Quality_images" 
        AND img_timestamp BETWEEN 20110101000000 AND 20120101000000 
    ORDER BY uploader DESC;

All files using a given template[edit]

SELECT /* SLOW_OK */ page_title
    FROM page
        JOIN templatelinks
    WHERE page.page_namespace = 6
        AND page.page_id = templatelinks.tl_from
        AND templatelinks.tl_namespace = 10 
        AND templatelinks.tl_title = "Specimen";

Count[edit]

SELECT /* SLOW_OK */ COUNT(page_title)
    FROM page
        JOIN templatelinks
    WHERE page.page_namespace = 6
        AND page.page_id = templatelinks.tl_from
        AND templatelinks.tl_namespace = 10 
        AND templatelinks.tl_title = "Specimen";

Global usage of a file[edit]

Count reuse in main NS, and distinct wikis[edit]

SELECT /* SLOW_OK */ COUNT(gil_wiki), COUNT(DISTINCT gil_wiki)
    FROM globalimagelinks
    WHERE gil_page_namespace_id = 0 AND (gil_wiki!='metawiki')
        AND gil_to = 'Final_Trophee_Monal_2012_n08.jpg';

Global usage in a category[edit]

Per file[edit]

SELECT /* SLOW_OK */ page_title AS title, COUNT(gil_wiki) as Uses, COUNT(DISTINCT gil_wiki) as NbWikis
    FROM page
        JOIN categorylinks
        JOIN globalimagelinks
    WHERE page.page_id=categorylinks.cl_from
        AND gil_page_namespace_id = 0 AND (gil_wiki!='metawiki')
        AND gil_to = page_title
        AND categorylinks.cl_to = "Foreground_flowers"
    GROUP BY title;

Total & counts[edit]

SELECT /* SLOW_OK */ COUNT(DISTINCT page_title) AS FilesUsed, COUNT(gil_wiki) AS Uses, COUNT(DISTINCT gil_wiki) AS NbWikis
    FROM page
        JOIN categorylinks
        JOIN globalimagelinks
        JOIN image ON image.img_name = page.page_title
    WHERE page.page_id=categorylinks.cl_from
        AND gil_page_namespace_id = 0 AND (gil_wiki!='metawiki')
        AND gil_to = page_title
        AND categorylinks.cl_to = "Foreground_flowers";
        AND img_timestamp BETWEEN 20130101000000 AND 20130701000000;

Template users[edit]

SELECT /* SLOW_OK */ DISTINCT img_user_text AS USER, COUNT(page.page_title) AS img_count 
    FROM image, page, templatelinks
    WHERE page.page_namespace = 6
        AND image.img_name = page.page_title
        AND page.page_id = templatelinks.tl_from
        AND templatelinks.tl_namespace = 10 
        AND templatelinks.tl_title = "Specimen"
    GROUP BY USER 
    ORDER BY img_count DESC;