Fixing Favatars
Ever since this blog went online I used Favatars to make it more easy to recognize authors of comments. I think this worked out really well for my site, as many visitors have their own websites and favicons. However, despite Paul James pointing out that Cool URIs don’t change, I had more and more 404s when I viewed my site. All of these resulted from favicons that weren’t there anymore.
To fix this, I wrote a small PHP5 class that attempts to download the favicon from the commentators website. This class is very conservative – it stops a download if it’s bigger than 50 kb, has a short timeout, only allows PNG, GIF, JPEG and ICO extensions and silently returns if there’s any error. Here’s how you can use it:
// Create a new Favatars object and tell it to save // all favicons to files/favatars/ $favatars = new Favatars( 'files/favatars/' ); // The get method attempts to find a favicon for the given // URL and returns the local path of the downloaded favicon, // or an empty string if none could be found. $comment['favatar'] = $favatars->get( $comment['website'] );
Read on to view the source of this class.
// Based on Paul James' Favatars - http://www.peej.co.uk/projects/favatars.html class Favatars { public $maxDownload = 51200; protected $path = ''; public function __construct( $path = 'favatars/' ) { $this->path = $path; } public function get( $url ) { $faviconURL = $this->guessFaviconURL( $url ); // has the favicon a valid extension? if( !empty($faviconURL) && preg_match('/\.(png|gif|jpe?g|ico)$/i', $faviconURL) ) { // construct the local filename and path $urlParts = @parse_url( $url ); $fileName = $urlParts['host'] . (isset($urlParts['path']) ? $urlParts['path'] : ''); $fileName = trim( preg_replace( '/\W+/', '-', $fileName ), "-" ); $fileExt = strtolower( substr( strrchr( $faviconURL, '.' ), 1 ) ); $targetFile = $fileName . '.' . $fileExt; $targetPath = $this->path . $targetFile; if( $this->download($faviconURL, $targetPath, $url) ) { return $targetPath; } } return ''; } protected function guessFaviconURL( $url ) { $faviconURL = ''; if( !empty($url) && $html = @file_get_contents($url) ) { if (preg_match('/<link[^>]+rel="(?:shortcut )?icon"[^>]+?href="([^"]+?)"/si', $html, $matches)) { // Attempt to grab a favicon link from their webpage url $linkUrl = html_entity_decode($matches[1]); if (substr($linkUrl, 0, 1) == '/') { $urlParts = parse_url($url); $faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].$linkUrl; } else if (substr($linkUrl, 0, 7) == 'http://') { $faviconURL = $linkUrl; } else if (substr($url, -1, 1) == '/') { $faviconURL = $url.$linkUrl; } else { $faviconURL = $url.'/'.$linkUrl; } } else { // If unsuccessful, attempt to "guess" the favicon location $urlParts = parse_url($url); $faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].'/favicon.ico'; } } return $faviconURL; } protected function download( $url, $target, $referer = '' ) { $context = stream_context_create( array( 'http' => array( 'method' => "GET", 'header' => "Referer: $referer\r\n", 'timeout' => 2 ) )); if( $fp = @fopen($url, 'r', false, $context) ) { $file = ''; $downloaded = 0; while( !feof($fp) ) { $chunk = fread( $fp, 8192 ); $downloaded += strlen( $chunk ); if( $downloaded > $this->maxDownload ) { fclose( $fp ); return false; } $file .= $chunk; } fclose( $fp ); if( @file_put_contents($target, $file) ) { return true; } } return false; } }
This class requires allow_url_fopen to be enabled. No CURL option this time, sorry.