User:TheDJ/datacheck.js

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
Note: After saving, you have to bypass your browser's cache to see the changes. Internet Explorer: press Ctrl-F5, Mozilla: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Opera/Konqueror: press F5, Safari: hold down Shift + Alt while clicking Reload, Chrome: hold down Shift while clicking Reload.
/* 
 * Purpose: Show as much of the data as we can know in as 'raw' a format possible
 * This can help those who are interested in cleaning up metadata to find cases which are currently subpar
 *
 * Licensed: MIT
 *
 * TODO:
 * - Make it collapsible
 * - Add link to place for editors to collaborate on improving
 * - Add checks for duplicate statements
 * - Add checks for complex HTML
 * - Add checks for Geo location/orientation (camera and object)
 * - Add checks for FoP
 * - Add checks for derivates (using category)
 * - Run checks on the database for templates using the classes that we might not know about
 * - Add checks for type and microformats
 * - Add checks for campaigns and or institute donations
 * - Add check for retouched  + user
 * - Add check for FP/QI/VI
 * - Add checks for catalogue numbers/source ids (NASA image id, NARA etc)
 */
( function ( $, mw ) {
	'use strict';
	var $metadataView,
		warnings = [],
		errors = [];

	function reportAPIMetadata( data ) {
		$(function() {
			// add to the info to block
			var table = $('<table>');
			table.addClass( 'mw_metadata datacheck' );

			function buildRow( header, originalValue ) {
				var value = originalValue;
				if ( value._type && value._type === 'lang' ) {
					value = "";
					$.each( originalValue, function( langCode, description ) {
						if ( langCode === '_type' ) {
							return;
						} else {
							value += langCode + ": " + description + "<br />";
						}
					} );
				}
				return $("<tr>")
					.append( $("<th>").html( header ) )
					.append( $("<td>").html( value ) );
			}

			for( var page in data.query.pages ) {
				/*jshint -W083 */
				$.each( data.query.pages[page].imageinfo[0].extmetadata, function( k, v ) {
					table.append( buildRow(k, v.value ) );
				} );
				break; // Just one page
			}
			
			table.appendTo( $metadataView.find( '.metadata-api' ) );
		} );
	}

	function getAPIMetadata() {
		var api = new mw.Api();
		api.get( {
				action: 'query',
				titles: mw.config.get( 'wgPageName' ),
				prop: 'imageinfo',
				iiprop: 'timestamp|user|url|size|mime|mediatype|extmetadata',
				iiextmetadatalanguage: mw.config.get( 'wgUserLanguage' ),
				iimetadataversion: 'latest',
				iiextmetadatamultilang: '',
				iiextmetadatafilter: [
					'ObjectName',		// title of a book for instance
					'DateTimeOriginal', // date time from desc page
					'ImageDescription', // from desc page
					'Copyrighted',		// from desc page, false if PD
					'License',			// from template
					'LicenseShortName', // from desc page
					'UsageTerms',		// 'long name of terms' from desc page
					'LicenseUrl',		// link to license deed
					'Credit',			// source? from desc page
					'Artist'			// author/copyright holder from desc page
	//			'DateTime',			// datetime from EXIF file data
	//	    	'GPSLatitude',		// let this stuff be for now.
	//	    	'GPSLongitude',
	//	    	'Categories',
	//	    	'Permission'
					].join('|')
		} ).done ( reportAPIMetadata );
	}


	function collectPageMetadata() {
		var metadata = {},
			temp;
		metadata.html = {};
		metadata.restrictions = {};

		// Check for {{Information}}
		if ( $(".commons-file-information-table").length < 1 ) {
			errors.push( "No {{Information}}-template seems to be present");
		}

		// From {{Information}}
		metadata.html.description = $( '#fileinfotpl_desc + td' );
		metadata.description = {};
		temp = metadata.html.description.find( '.description[lang]' );
		if( temp.length > 0 ) {
			temp.each( function( i, e ) {
				var langValue = $( e ).clone();
				langValue.find( '.language' ).remove();
				metadata.description[ $( e ).attr( 'lang' ) ] = scrapeText( langValue );
			} );
		} else {
			// warn about lack of language info
			metadata.description = scrapeText(  metadata.html.description );
		}

		metadata.html.author = $( '#fileinfotpl_aut + td' );
		metadata.author = scrapeText( metadata.html.author );
		
		metadata.html.date = $( '#fileinfotpl_date + td' );
		metadata.date = scrapeText( metadata.html.date );
		
		// From {{date}}
		temp = metadata.html.date.find( 'time[datetime]' );
		if ( temp.length > 0 ) {
			metadata.date = temp.attr( 'datetime' );
			// register that this value is ISO unit
		} else {
			// warn that the date is not ISO, or not recognized as such
		}

		metadata.html.source = $( '#fileinfotpl_src + td' );
		metadata.source = scrapeText( metadata.html.source );

		// From {{Credit line}}, explicit attribution statement, supersedes what we can manufacture ourselves
		metadata.html.attribution = $( '.fileinfotpl_credit + td' );
		metadata.attribution = scrapeText( metadata.html.attribution );

		// From {{own}}, usually contained in #fileinfotpl_src
		metadata.ownwork = $( '#own-work, .int-own-work' ).length > 0;

		// From {{Creator}} which is usually contained in #fileinfotpl_aut
		metadata.html.creator = $( '#creator' );
		metadata.creator = scrapeText( $( '#creator' ) );

		// {{Personality rights}}
		metadata.restrictions.personality_rights = $( '#commons-template-personality-rights' ).length > 0;
		metadata.restrictions.trademarked = $( '.restriction-trademarked' ).length > 0;

		metadata.licenses = getLicenses();
		return metadata;
	}

	function getLicenses() {
		var licenses, $readable;

		licenses = [];
		$readable = $('.licensetpl');

		$readable.each(function () {
			var cL = {
				link: $(this).find('.licensetpl_link').html(),
				'short': $(this).find('.licensetpl_short').html(),
				'long': $(this).find('.licensetpl_long').html(),
				attr: $(this).find('.licensetpl_attr').html(),
				aut: $(this).find('.licensetpl_aut').html(),
				link_req: $(this).find('.licensetpl_link_req').html(),
				attr_req: $(this).find('.licensetpl_attr_req').html()
			};

			if (cL.short) {
				licenses.push(cL);
			}
		});
		return licenses;
	}
	
	function reportScrapedData( data ) {
		// add to the info to block
			var table = $('<table>');
			table.addClass( 'mw_metadata datacheck' );
			
			function buildRow( header, originalValue ) {
				var value = originalValue;
				if ( $.isPlainObject( originalValue )  ) {
					value = "";
					$.each( originalValue, function ( k, v ) {
						value += k + ': ' + v + '<br />';
					} );
				}
				if ( typeof value === 'boolean' ) {
					value = value.toString();
				}
				return $("<tr>")
					.append( $("<th>").text( header ) )
					.append( $("<td>").html( value ) );
			}

			$.each( data, function ( key, value ) {
				if ( key === 'licenses' || key === 'restrictions' || key === 'html' ) {
					return;
				}
				table.append( buildRow( key, value ) );
			});
			table.append( buildRow( 'licenseCount', data.licenses.length ) );
			$.each( data.restrictions, function ( key, value ) {
				table.append( buildRow( key, value ) );
			} );
			
			table.appendTo( $metadataView.find( '.metadata-scraped' ) );

			var $licenseTable = $( '<table>' );
			$licenseTable.addClass( 'mw_metadata datacheck licenses' );
			$licenseTable.append( $('<tr><th>Short name</th><th>Long name</th><th>License link</th><th>Attribution</th><th>Author</th>') );
			function buildLicenseRow( license ) {
				var $tr = $('<tr>');
				$tr.append( $( '<td>' ).html(license.short) );
				$tr.append( $( '<td>' ).html(license.long) );
				var required =  license.link_req != "false";
				if ( required ) {
					required = ' <b>(Required)</b>';
				} else {
					required = ' <b>(Not required)</b>';
				}
				$tr.append( $( '<td>' ).html( license.link + required ) );
				required =  license.attr_req != "false";
				if ( required ) {
					required = '<b>Required</b> ';
				} else {
					required = '<b>Not required</b> ';
				}
				$tr.append( $( '<td>' ).html( required + ( license.attr ? license.attr : "" ) ) );
				$tr.append( $( '<td>' ).html( (license.aut ? license.aut : "" ) ) );
				return $tr;
			}
			$.each( data.licenses, function ( i, license ) {
				$licenseTable.append( buildLicenseRow( license ) );
			} );
			$licenseTable.appendTo( $metadataView.find( '.metadata-scraped' ) );
	}
	
	function scrapeText( $element ) {
		$element = $element.clone();
		$element.find( 'style' ).remove();
		return $.trim( $element.text() );
	}
	
	function initDataCheck() {
		mw.util.addCSS( 'table.datacheck { width: auto; } table.datacheck td, table.datacheck th { text-align: left; } table.datacheck th { font-weight:bold;}' );
		getAPIMetadata();
		$( function() {
			$metadataView = $( '<div>' );
			$metadataView.addClass( 'metadata-content' );
			$metadataView.append( '<h2>Metadata API</h2>' );
			$metadataView.append( '<div class="metadata-api"></div>' );
			$metadataView.append( '<h2><a href="//commons.wikimedia.org/wiki/Commons:Machine-readable_data">Machine-readable metadata</a></h2>' );
			$metadataView.append( '<div class="metadata-scraped"></div>' );
			$( '#mw-imagepage-content').before( $metadataView );

			reportScrapedData( collectPageMetadata() );
		} );
	}

	if ( mw.config.get( 'wgNamespaceNumber') === 6 &&
			mw.config.get( 'wgAction' ) === 'view' &&
			mw.config.get( 'wgArticleId' ) !== 0 &&
			!mw.util.getParamValue('diff') )
	{
		mw.loader.using( 'mediawiki.api', initDataCheck );
	}
} )( jQuery, mediaWiki );