PHOBOSLAB

Blog Home

Fixing Favatars

Ever since this blog went online I used Favatars to make it more easy to recognize authors of comments. I think this worked out really well for my site, as many visitors have their own websites and favicons. However, despite Paul James pointing out that Cool URIs don’t change, I had more and more 404s when I viewed my site. All of these resulted from favicons that weren’t there anymore.

To fix this, I wrote a small PHP5 class that attempts to download the favicon from the commentators website. This class is very conservative – it stops a download if it’s bigger than 50 kb, has a short timeout, only allows PNG, GIF, JPEG and ICO extensions and silently returns if there’s any error. Here’s how you can use it:

// Create a new Favatars object and tell it to save 
// all favicons to files/favatars/
$favatars = new Favatars( 'files/favatars/' );

// The get method attempts to find a favicon for the given
// URL and returns the local path of the downloaded favicon, 
// or an empty string if none could be found.
$comment['favatar'] = $favatars->get( $comment['website'] );

Read on to view the source of this class.

// Based on Paul James' Favatars - http://www.peej.co.uk/projects/favatars.html
class Favatars {
    public $maxDownload = 51200;
    protected $path = '';

    public function __construct( $path = 'favatars/' ) {
        $this->path = $path;
    }

    public function get( $url ) {
        $faviconURL = $this->guessFaviconURL( $url );

        // has the favicon a valid extension?
        if( !empty($faviconURL) && preg_match('/\.(png|gif|jpe?g|ico)$/i', $faviconURL) ) {

            // construct the local filename and path
            $urlParts = @parse_url( $url );
            $fileName = $urlParts['host'] . (isset($urlParts['path']) ? $urlParts['path'] : '');
            $fileName = trim( preg_replace( '/\W+/', '-', $fileName ), "-" );
            $fileExt = strtolower( substr( strrchr( $faviconURL, '.' ), 1 ) );

            $targetFile = $fileName . '.' . $fileExt;
            $targetPath = $this->path . $targetFile;

            if( $this->download($faviconURL, $targetPath, $url) ) {
                return $targetPath;
            }
        }

        return '';
    }

    protected function guessFaviconURL( $url ) {
        $faviconURL = '';
        if( !empty($url) && $html = @file_get_contents($url) ) {
            if (preg_match('/<link[^>]+rel="(?:shortcut )?icon"[^>]+?href="([^"]+?)"/si', $html, $matches)) {
                // Attempt to grab a favicon link from their webpage url

                $linkUrl = html_entity_decode($matches[1]);
                if (substr($linkUrl, 0, 1) == '/') {
                    $urlParts = parse_url($url);
                    $faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].$linkUrl;
                } else if (substr($linkUrl, 0, 7) == 'http://') {
                    $faviconURL = $linkUrl;
                } else if (substr($url, -1, 1) == '/') {
                    $faviconURL = $url.$linkUrl;
                } else {
                    $faviconURL = $url.'/'.$linkUrl;
                }
            } else {
                // If unsuccessful, attempt to "guess" the favicon location

                $urlParts = parse_url($url);
                $faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].'/favicon.ico';
            }
        }
        return $faviconURL;
    }

    protected function download( $url, $target, $referer = '' ) {
        $context = stream_context_create( array(
            'http' => array(
                'method' => "GET",
                'header' => "Referer: $referer\r\n",
                'timeout' => 2
            )
        ));

        if( $fp = @fopen($url, 'r', false, $context) ) {
            $file = '';
            $downloaded = 0;
            while( !feof($fp) ) {
                $chunk = fread( $fp, 8192 );
                $downloaded += strlen( $chunk );
                if( $downloaded > $this->maxDownload ) {
                    fclose( $fp );
                    return false;
                }
                $file .= $chunk;
            }
            fclose( $fp );

            if( @file_put_contents($target, $file) ) {
                return true;
            }
        }

        return false;
    }
}

This class requires allow_url_fopen to be enabled. No CURL option this time, sorry.

Saturday, July 5th 2008
— Dominic Szablewski, @phoboslab