Fixing Favatars
Ever since this blog went online I used Favatars to make it more easy to recognize authors of comments. I think this worked out really well for my site, as many visitors have their own websites and favicons. However, despite Paul James pointing out that Cool URIs don’t change, I had more and more 404s when I viewed my site. All of these resulted from favicons that weren’t there anymore.
To fix this, I wrote a small PHP5 class that attempts to download the favicon from the commentators website. This class is very conservative – it stops a download if it’s bigger than 50 kb, has a short timeout, only allows PNG, GIF, JPEG and ICO extensions and silently returns if there’s any error. Here’s how you can use it:
// Create a new Favatars object and tell it to save
// all favicons to files/favatars/
$favatars = new Favatars( 'files/favatars/' );
// The get method attempts to find a favicon for the given
// URL and returns the local path of the downloaded favicon,
// or an empty string if none could be found.
$comment['favatar'] = $favatars->get( $comment['website'] );
Read on to view the source of this class.
// Based on Paul James' Favatars - http://www.peej.co.uk/projects/favatars.html
class Favatars {
public $maxDownload = 51200;
protected $path = '';
public function __construct( $path = 'favatars/' ) {
$this->path = $path;
}
public function get( $url ) {
$faviconURL = $this->guessFaviconURL( $url );
// has the favicon a valid extension?
if( !empty($faviconURL) && preg_match('/\.(png|gif|jpe?g|ico)$/i', $faviconURL) ) {
// construct the local filename and path
$urlParts = @parse_url( $url );
$fileName = $urlParts['host'] . (isset($urlParts['path']) ? $urlParts['path'] : '');
$fileName = trim( preg_replace( '/\W+/', '-', $fileName ), "-" );
$fileExt = strtolower( substr( strrchr( $faviconURL, '.' ), 1 ) );
$targetFile = $fileName . '.' . $fileExt;
$targetPath = $this->path . $targetFile;
if( $this->download($faviconURL, $targetPath, $url) ) {
return $targetPath;
}
}
return '';
}
protected function guessFaviconURL( $url ) {
$faviconURL = '';
if( !empty($url) && $html = @file_get_contents($url) ) {
if (preg_match('/<link[^>]+rel="(?:shortcut )?icon"[^>]+?href="([^"]+?)"/si', $html, $matches)) {
// Attempt to grab a favicon link from their webpage url
$linkUrl = html_entity_decode($matches[1]);
if (substr($linkUrl, 0, 1) == '/') {
$urlParts = parse_url($url);
$faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].$linkUrl;
} else if (substr($linkUrl, 0, 7) == 'http://') {
$faviconURL = $linkUrl;
} else if (substr($url, -1, 1) == '/') {
$faviconURL = $url.$linkUrl;
} else {
$faviconURL = $url.'/'.$linkUrl;
}
} else {
// If unsuccessful, attempt to "guess" the favicon location
$urlParts = parse_url($url);
$faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].'/favicon.ico';
}
}
return $faviconURL;
}
protected function download( $url, $target, $referer = '' ) {
$context = stream_context_create( array(
'http' => array(
'method' => "GET",
'header' => "Referer: $referer\r\n",
'timeout' => 2
)
));
if( $fp = @fopen($url, 'r', false, $context) ) {
$file = '';
$downloaded = 0;
while( !feof($fp) ) {
$chunk = fread( $fp, 8192 );
$downloaded += strlen( $chunk );
if( $downloaded > $this->maxDownload ) {
fclose( $fp );
return false;
}
$file .= $chunk;
}
fclose( $fp );
if( @file_put_contents($target, $file) ) {
return true;
}
}
return false;
}
}
This class requires allow_url_fopen to be enabled. No CURL option this time, sorry.