User:Currentlybiscuit/ScrapCCAnimScript

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
<?php

/*
-------------------------------------------------------------------------
get-stroke-orders.php
-------------------------------------------------------------------------

Version 1.0

Contact: http://en.wikipedia.org/wiki/User_talk:WikiLaurent

This program is free software you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundationeither version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTYwithout even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this programif not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

-------------------------------------------------------------------------

Dependency:

Simple HTML DOM Parser (http://simplehtmldom.sourceforge.net/)

-------------------------------------------------------------------------

Usage:

php get-stroke-orders.php n=<page number> t=<animation type>

<page number> - The page number (1 to 7) at http://commons.wikimedia.org/wiki/Commons:Stroke_Order_Project/Simplified_Chinese_progress
<animation type> - "bw", "red" or "order"

-------------------------------------------------------------------------

Example:

Get all the gif animations:

php get-stroke-orders.php n=1 t=order
php get-stroke-orders.php n=2 t=order
php get-stroke-orders.php n=3 t=order
php get-stroke-orders.php n=4 t=order
php get-stroke-orders.php n=5 t=order
php get-stroke-orders.php n=6 t=order
php get-stroke-orders.php n=7 t=order

-------------------------------------------------------------------------

*/

require_once "simple_html_dom.php";
set_time_limit(3600 * 10);

function curl($url){
	$ch = curl_init();
	curl_setopt($ch, CURLOPT_URL,$url);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
	curl_setopt($ch, CURLOPT_USERAGENT, "StrokeOrderAnimScrapper/1.0");
	$output = curl_exec($ch);
	curl_close ($ch);
	return $output;
}


function downloadAnimations($pageNumber, $type = "bw") {
	$listBaseUrl = "http://commons.wikimedia.org/wiki/Commons:Stroke_Order_Project/Simplified_Chinese_progress";
	$pageUrl = $listBaseUrl;
	if ($pageNumber > 1) $pageUrl .= "/" . $pageNumber;
	
	echo "Parsing " . $pageUrl . "\n";
	$hmlString = curl($pageUrl);
	$html = new simple_html_dom();
	$html->load($hmlString);
	
	foreach ($html->find('tr') as $tr) {
		$tdIndex = 3;
		if ($type == "red") $tdIndex = 4;
		if ($type == "order") $tdIndex = 5;
		
		$td = $tr->find("td", $tdIndex);
		if (!$td) continue;
		$img = $td->find("img", 0);
		if (!$img) continue;
		$src = $img->getAttribute("src");
		if ($type == "bw" && strpos($src, "-bw.png") === false) continue;
		if ($type == "red" && strpos($src, "-red.png") === false) continue;
		if ($type == "order" && strpos($src, "-order.gif") === false) continue;
	
		$lastSlashIndex = strrpos($src, "/");
		$src = substr($src, 0, $lastSlashIndex);
		$src = str_replace("/thumb", "", $src);
		
		$filename = basename($src);
		
		echo "Downloading " . $src . "\n";
		$pngData = file_get_contents($src);
		file_put_contents($filename, $pngData);
	}
}


function getParam($name) {
	if (isset($_GET[$name])) return $_GET[$name];
	global $argv;
	foreach ($argv as $value) {;
		$pair = explode("=", $value);
		if (count($pair) < 2) continue;
		if (trim($pair[0]) != $name) continue;
		$equalPos = strpos($value, "=");
		return trim(substr($value, $equalPos + 1, strlen($value)));
	}
	return null;
}

downloadAnimations(getParam("n"), getParam("t"));