Page 1 of 1

High performance autoloader

Posted: Tue Apr 20, 2010 1:29 am
by Benjamin
I have written a high performance autoloader for the framework I am building. I'm not satisfied with this code. Something smells, but I can't put a finger on it.

I feel like it can be simplified and/or that the method names are a bit "off". What do you think?

Code: Select all

<?php
/**
 * High performance class resource locator
 *
 * Recursively scans multiple directories for *.class.php files.  The naming
 * convention dictates the filename, minus extensions, is the name of the class.
 * The scan results are then saved to disk in a serialized array for instant
 * access.
 *
 * The file system scan is only performed once, eliminating hundreds of system
 * calls to file_exists() and the like for every page request.
 *
 * A rescan is automatically performed when a class is not found in the cached
 * array.  Manually clearing the cache when adding/renaming classes is not
 * necessary.
 *
 * @todo Create a file writer class for writing the cache file.
 * @todo Research caching the class list into shared memory
 */
class adfClassLocator implements adfAutoLoaderInterface {
    /**
    * An array of classes, with class names as the keys and their associated
    * path as the values.
    *
    * @access protected
    * @var array
    */
    protected static $_classList = null;

    /**
    * Indicates whether a system scan has been performed during the life of the
    * current instance.
    *
    * @access protected
    * @var bool
    */
    protected static $_hasScanned = false;

    /**
    * Public Constructor
    *
    * @param string $cacheFile An optional absolute path to the cache file.
    * @todo Move cache file path into central configuration
    */
    public function __construct($cacheFile = null) {
        $this->cacheFilePath = ($cacheFile !== null) ? $cacheFile : ABSOLUTE_SYS_PATH . 'lib/cache/system/classCache';
    }

    /**
    * Attempts to find the file system path to a class.
    *
    * This method will load the class cache, if it hasn't already been loaded,
    * then check if the requested class exists.  If found, true is returned.
    *
    *  If the class is not found in the class cache a file system scan will be
    *  performed in an attempt to locate it. A scan is only performed once
    *  during the life of each instance. e.g. A maximum of once per page
    *  request and only if a class is not found in the cache.
    *
    *  If the class is found after the rescan, true is returned, otherwise
    *  false.
    *
    * @param string $className The name of the class
    * @return bool True or False
    * @todo Look into optimizing this algorithm for speed
    */
    public function canFind($className) {
        $className = strtolower($className);

        if (self::$_classList === null) {
            if (!file_exists($this->cacheFilePath)) {
                $this->scanPaths($this->getPaths());
            }

            self::$_classList = unserialize(file_get_contents($this->cacheFilePath));
        }

        if (isset(self::$_classList[$className])) {
            return true;
        } elseif (!self::$_hasScanned) {
            $this->scanPaths($this->getPaths());
        }

        return isset(self::$_classList[$className]);
    }

    /**
    * Get the path to a class
    *
    * Returns an absolute path to the given class name.
    *
    * <b>NOTE:</b> canLocate() should be called first to ensure the class is in
    * the $_classList array.  Existence of the class is not tested and will
    * result in a PHP Notice being thrown if it's not in the array.
    *
    * @param string $className The name of the class.
    * @return string The absolute path to the class.
    */
    public function getPath($className) {
        return self::$_classList[strtolower($className)];
    }

    /**
    * Populates the $_classList array from an array of file paths.
    *
    * Each path in the provided array matching the naming convention of
    * className.class.php will be added to the $_classList array.
    *
    * <b>NOTE: </b> This method resets $_classList to an empty array before
    * processing the file paths.
    *
    * @param array $filePaths An array of paths to files
    * @param bool $doWriteCacheFile Optionally set whether the cache file should
    * be saved.
    */
    protected function scanPaths($filePaths, $doWriteCacheFile = true) {
        self::$_hasScanned = true;
        self::$_classList = array();

        foreach ($filePaths as $path) {
            if (preg_match('#^..*/([^/\.]+)\.class.php$#i', $path, $matches)) {
                self::$_classList[strtolower($matches[1])] = $path;
            }
        }

        if ($doWriteCacheFile) {
            $this->writeCacheFile();
        }
    }

    /**
     * Caches the class list
     *
     * Saves the contents of $_classList on disk as a serialized array.
     */
    protected function writeCacheFile() {
        $pathInfo = pathinfo($this->cacheFilePath);

        if (!is_writable($pathInfo['dirname']) || !file_put_contents($this->cacheFilePath, serialize(self::$_classList))) {
            trigger_error("Cannot write class cache to '" . $this->cacheFilePath . "'.", E_USER_WARNING);
        }
    }

    /**
    * Iterates through each folder path, gathering all PHP files in each path.
    *
    * @return array An array of files
    */
    protected function getPaths() {
        $scanDirs = array(
            ABSOLUTE_SYS_PATH . 'lib/controllers',
            ABSOLUTE_SYS_PATH . 'lib/core/classes',
            ABSOLUTE_SYS_PATH . 'lib/models',
            ABSOLUTE_SYS_PATH . 'lib/vendor/classes',
        );

        $this->files = array();

        foreach ($scanDirs as $path) {
            $this->_scanDirectory($path);
        }

        return $this->files;
    }

    /**
     * Finds all PHP files in a specific path.
     *
     * @return array An array of files
     */
    private function _scanDirectory($path = null) {
        if (!is_dir($path) || !is_readable($path)) {
            return false;
        }

        $dh = opendir($path);

        while (false !== ($file = readdir($dh))) {
            if (is_dir("$path/$file") && !preg_match('#^(?:\.svn|\.{1,2}).*$#', $file)) {
                $this->_scanDirectory("$path/$file");
            } elseif (preg_match('#^.*\.class\.php$#i', $file)) {
                $this->files[] = "$path/$file";
            }
        }

        closedir($dh);

        return $this->files;
    }
}

Re: High performance autoloader

Posted: Tue Apr 20, 2010 1:39 am
by Eran
Something smells, but I can't put a finger on it.
Two things:
1. Why are you using static properties if none of the methods are static? use regular protected properties instead if there is just one instance, or use a singleton
2. You are relying on an externally defined constant (~ global). I would pass that information via the constructor or one of the methods.

Re: High performance autoloader

Posted: Tue Apr 20, 2010 1:51 am
by Benjamin
pytrin wrote:1. Why are you using static properties if none of the methods are static? use regular protected properties instead if there is just one instance, or use a singleton
Good call. I will make it a singleton.
pytrin wrote:2. You are relying on an externally defined constant (~ global). I would pass that information via the constructor or one of the methods.
Well, at this point in the code, the registry object hasn't been created yet. I could pass it as an argument to getInstance() I suppose, but then I would always have to do that. I mean it's really only instantiated once when the framework is initializing itself though.

Why do you not like the use of global constants?

Re: High performance autoloader

Posted: Wed Apr 21, 2010 12:08 am
by Benjamin
I rewrote it and I feel like it's a lot better. Still not perfect though. Got anything guys?

Code: Select all

<?php
class adfClassLocator implements adfAutoLoaderInterface {
    private static $_instance = false;
    private static $_classList = null;
    private static $_hasScanned = false;

    private function __construct($cacheFile = null) {
        $this->cacheFilePath = ($cacheFile !== null) ? $cacheFile : ABSOLUTE_SYS_PATH . 'cache/system/classCache';

        if (file_exists($this->cacheFilePath)) {
            self::$_classList = unserialize(file_get_contents($this->cacheFilePath));
        } else {
            $this->scanPaths();
        }
    }

    public static function getInstance($cacheFile = null) {
        return (self::$_instance === false) ? new adfClassLocator($cacheFile) : self::$_instance;
    }

    public function canLocate($className) {
        $className = strtolower($className);

        if (!isset(self::$_classList[$className]) && !self::$_hasScanned) {
            $this->scanPaths();
        }

        return isset(self::$_classList[$className]);
    }

    public function getPath($className) {
        return self::$_classList[strtolower($className)];
    }

    private function scanPaths() {
        self::$_hasScanned = true;
        self::$_classList = array();

        $this->_recursiveDirectoryScan(ABSOLUTE_SYS_PATH . 'lib/');

        $this->writeCacheFile();
    }

    private function writeCacheFile() {
        $pathInfo = pathinfo($this->cacheFilePath);

        if (!is_writable($pathInfo['dirname']) || !file_put_contents($this->cacheFilePath, serialize(self::$_classList))) {
            trigger_error("Cannot write class cache to '" . $this->cacheFilePath . "'.", E_USER_WARNING);
        }
    }

    private function _recursiveDirectoryScan($path) {
        if (!is_dir($path) || !is_readable($path)) {
            return false;
        }

        $dh = opendir($path);

        while (false !== ($file = readdir($dh))) {
            if (is_dir("$path/$file") && !preg_match('#^(?:\.svn|\.{1,2}).*$#', $file)) {
                $this->_recursiveDirectoryScan("$path/$file");
            } elseif (preg_match('#^.*/([^/\.]+)\.class.php$#i', $file, $matches)) {
                self::$_classList[strtolower($matches[1])] = "$path/$file";
            }
        }

        closedir($dh);
    }
}