Page 2 of 2

Re: csv library - a project I may actually finish

Posted: Mon Feb 18, 2008 5:29 pm
by Luke
Alright, I've majorly rewritten the reader object. Go ahead and have a look at it. The way you'd iterate now is:

Code: Select all

$reader = new Csv_Reader($file);
foreach ($reader as $row) {
    // do something
}
 
// or...
 
while($reader->valid()) {
    // do something
    $reader->next();
}
 
while($row = $reader->getRow()) {
    // do something
}
 

Code: Select all

<?php
/**
 * CSV Utils
 * 
 * This is a csv reader - basically it reads a csv file into an array
 * Please read the LICENSE file
 * @copyright MC2 Design Group, Inc. <luke@mc2design.com>
 * @author Luke Visinoni <luke@mc2design.com>
 * @package Csv
 * @license GNU Lesser General Public License
 * @version 0.1
 */
require_once 'Csv/Dialect.php';
require_once 'Csv/Exception.php';
/**
 * Provides an easy-to-use interface for reading csv-formatted text files. It
 * makes use of the function fgetcsv. It provides quite a bit of flexibility.
 * You can specify just about everything about how it should read a csv file
 * @todo Research the ArrayIterator class and see if it is the best choice for
 *       this and if I'm even using it correctly. There are quite a few methods 
 *       that are inherited that may or may not work. It would be cool if we
 *       could use 
 * @package Csv
 * @subpackage Csv_Reader
 */
class Csv_Reader implements Iterator, Countable
{
    /**
     * Maximum row size
     * @todo Should this be editable? maybe change it to a public variable
     */
    const MAX_ROW_SIZE = 4096;
    /**
     * Path to csv file
     * @var string
     * @access protected
     */
    protected $path;
    /**
     * Tells reader how to read the file
     * @var Csv_Dialect
     * @access protected
     */
    protected $dialect;
    /**
     * A handle that points to the file we are reading
     * @var resource
     * @access protected
     */
    protected $handle;
    /**
     * The currently loaded row
     * @var array
     * @access public
     * @todo: Should this be public? I think it might have been required for ArrayIterator to work properly
     */
    public $current;
    /**
     * This is the current line position in the file we're reading 
     * @var integer
     */
    protected $position = 0;
    /**
     * Number of lines skipped due to malformed data
     * @var integer
     * @todo This may be flawed - be sure to test it thoroughly
     */
    protected $skippedlines = 0;
    /**
     * Class constructor
     *
     * @param string Path to csv file we want to open
     * @param string The character(s) used to seperate columns in the csv file
     * @param boolean If set to false, don't treat the first row as headers - defaults to true
     * @throws Csv_Exception
     */
    public function __construct($path, Csv_Dialect $dialect = null/*, $skip_empty_rows = false*/) {
    
        if (is_null($dialect)) $dialect = new Csv_Dialect;
        $this->dialect = $dialect;
        // open the file
        $this->setPath($path);
        $this->handle = fopen($this->path, 'rb');
        if ($this->handle === false) throw new Csv_Exception('Unable to open file: "' . $path . '".');
        $this->rewind();
    
    }
    /**
     * Get the current Csv_Dialect object
     *
     * @return The current Csv_Dialect object
     * @access public
     */
    public function getDialect() {
    
        return $this->dialect;
    
    }
    /**
     * Change the dialect this csv reader is using
     *
     * @param Csv_Dialect the current Csv_Dialect object
     * @access public
     */
    public function setDialect(Csv_Dialect $dialect) {
    
        $this->dialect = $dialect;
    
    }
    /**
     * Get the path to the csv file we're reading
     *
     * @return string The path to the file we are reading
     * @access public
     */
    public function getPath() {
    
        return $this->path;
    
    }
    /**
     * Removes the escape character in front of our quote character
     *
     * @param string The input we are unescaping
     * @param string The key of the item
     * @todo Is the second param necssary? I think it is because array_walk
     */
    protected function unescape(&$item, $key) {
    
        $item = str_replace($this->dialect->escapechar.$this->dialect->quotechar, $this->dialect->quotechar, $item);
    
    }
    /**
     * Returns the current row and calls next()
     * 
     * @access public
     */
    public function getRow() {
    
        $return = $this->current();
        $this->next();
        return $return;
    
    }
    /**
     * Loads the current row into memory
     * 
     * @access protected
     */
    protected function loadRow() {
    
        $this->current = fgetcsv($this->handle, self::MAX_ROW_SIZE, $this->dialect->delimiter, $this->dialect->quotechar);
        if (
            $this->dialect->escapechar !== ''
            && $this->dialect->escapechar !== $this->dialect->quotechar
            && is_array($this->current)
        ) array_walk($this->current, array($this, 'unescape'));
        // if this row is blank and dialect says to skip blank lines, load in the next one and pretend this never happened
        if ($this->dialect->skipblanklines && is_array($this->current) && count($this->current) == 1 && $this->current[0] == '') {
            $this->skippedlines++;
            $this->next();
        }
    
    }
    /**
     * Get number of lines that were skipped
     * @todo probably should return an array with actual data instead of just the amount
     */
    public function getSkippedLines() {
    
        return $this->skippedlines;
    
    }
    /**
     * Get total rows
     *
     * @return integer The number of rows in the file (not includeing line-breaks in the data)
     * @todo Make sure that this is aware of line-breaks in data as opposed to end of row
     * @access public
     */
    public function close() {
    
        if (is_resource($this->handle)) fclose($this->handle);
    
    }
    /**
     * Destructor method - Closes the file handle
     * 
     * @access public
     */
    public function __destruct() {
 
        $this->close();
 
    }
    
    /**
     * The following are the methods required by php's Standard PHP Library - Iterator, Countable Interfaces
     */
    
    /**
     * Advances the internal pointer to the next row and returns it if valid, otherwise it returns false
     * 
     * @access public
     * @return boolean|array An array of data if valid, or false if not
     */
    public function next() {
    
        $this->position++;
        $this->loadRow(); // loads the current row into memory
        return ($this->valid()) ? $this->current : false;
    
    }
    /**
     * Tells whether or not the current row is valid - called after next and rewind
     * 
     * @access public
     * @return boolean True if the current row is valid
     */
    public function valid() {
    
        if (is_resource($this->handle))
            return (boolean) !feof($this->handle);
        
        return false;
    
    }
    /**
     * Returns the current row 
     * 
     * @access public
     * @return array An array of the current row's data
     */
    public function current() {
    
        return $this->current;
    
    }
    /**
     * Moves the internal pointer to the beginning
     * 
     * @access public
     */
    public function rewind() {
    
        rewind($this->handle);
        $this->position = 0;
        $this->loadRow(); // loads the current (first) row into memory 
    
    }
    /**
     * Returns the key of the current row (position of pointer)
     * 
     * @access public
     * @return integer
     */
    public function key() {
    
        return (integer) $this->position;
    
    }
    /**
     * Returns the number of rows in the csv file
     * 
     * @access public
     * @return integer
     * @todo Should this remember the position the file was in or something?
     */
    public function count() {
    
        $lines = 0;
        foreach ($this as $row) $lines++;
        return (integer) $lines;
    
    }
}

Re: csv library - a project I may actually finish

Posted: Tue Feb 19, 2008 7:38 am
by matthijs
Looks interesting and useful to me. Quickly read your blog post before you took it away. Are you going to return that post if you're happier with the code?

Re: csv library - a project I may actually finish

Posted: Tue Feb 19, 2008 11:17 am
by Luke
Yes I do plan on putting it back up. I am rewriting it to reflect the API changes and beefing it up a little (funny... this post and that article are already the top two search results for "php csv library") even though I took it down.

EDIT: Oh and I'm setting up phpdocumentor for it.

Re: csv library - a project I may actually finish

Posted: Tue Feb 19, 2008 4:21 pm
by Luke
I have packaged some good downloads to the google code page.

http://code.google.com/p/php-csv-utils/downloads/list

Re: csv library - a project I may actually finish

Posted: Wed Feb 20, 2008 12:55 am
by Luke

Re: csv library - a project I may actually finish

Posted: Wed Feb 20, 2008 2:33 am
by Christopher
I like you latest version and the direction you are going. The future functionality look pretty interesting too. For comparison, here is the class I wrote after my first post at the start of the thread. They are in many ways similar (I found fgetcsv()/fputcsv() too ;)), but you read one line at a time which is better memory wise. I also think your separate reader and writer is a better design too. In fact almost everything in yours is better. Mine is minimal, all-in-one, all in memory with a sloppier all public interface, no error anything, etc., etc.

This is the use case from your upload script:

Code: Select all

     require_once 'Delimited.php';
 
          echo "<table border='1'>";
          $reader = new Delimited($filename);
           echo "<tr>";
          foreach ($reader->columnNames as $header) printf("<th>%s</th>", $header);
          echo "</tr>";
          foreach ($reader->rows as $row) {
              echo "<tr>";
              foreach ($row as $col) printf("<td>%s</td>", $col);
              echo "</tr>";
          }
          echo "</table>";
 
Here is the code:

Code: Select all

class Delimited {
    protected $filename = null;
    public $rows = array();
    public $columnNames = array();
    public $settings;
    
    public function __construct($filename=null, $rows=null, $settings=null) {
        if ($filename) {
            $this->filename = $filename;
        }
        if ($rows) {
            $this->rows = $rows;
        }
        if ($settings) {
            $this->settings = $settings;
        }
        if (! $this->settings) {
             $settings = new Delimited_Settings;
        }
    }
    
    public function import($filename=null, $settings=null) {
        $this->__construct($filename, $settings);
        if (file_exists($this->filename)) {
            $handle = fopen($this->filename, "r");
            $n = 0;
            if ($this->settings->fieldNamesInFirstRow) {
                $this->columnNames = $row = fgetcsv($handle, 1000, $this->settings->fieldDelimiter, $this->settings->fieldEnclosure);
            }
            $this->rows = array();
            while (($row = fgetcsv($handle, 1000, $this->settings->fieldDelimiter, $this->settings->fieldEnclosure)) !== false) {
                // not a blank line
                if ($row[0]) {
                    // strip escaping
                    if ($this->settings->fieldEscape) {
                        array_walk($row, array($this, '_escape'), $this->settings);
                    }
                    $this->rows[$n++] = $row;
                }
            }
            fclose($handle);
            return $n;
        }
        return false;
    }
    
    public function export($filename=null, $rows=null, $settings=null) {
        $this->__construct($filename, $settings);
        if (! $rows) {
            $rows = $this->rows;
        }
        if ($this->filename) {
            $fp = fopen($this->filename, 'w');
            
            if ($fp) {
                $n = 0;
                foreach ($rows as $row) {
                    if ($this->settings->fieldEscape) {
                            array_walk($row, array($this, '_unescape'), $this->settings);
                        }
                    fputcsv($fp, $row);
                    ++$n;
                }
            
                fclose($fp);
                return $n;
            }
        }
        return false;
    }
 
    protected function _unescape(&$item, $key, $settings) {
        $item = str_replace($settings->fieldEscape, '', $item);
    }
    
    protected function _escape(&$item, $key, $settings) {
        $item = str_replace($settings->fieldEnclosure, $settings->fieldEscape.$settings->fieldEnclosure, $item);
    }
    
}
 
class Delimited_Settings {
    public $lineDelimiter = "\r\n";
    public $fieldDelimiter = "\t";
    public $fieldEnclosure = '"';
    public $fieldEscape = "\\";
    public $fieldNamesInFirstRow = true;
}
 

Re: csv library - a project I may actually finish

Posted: Wed Feb 20, 2008 9:45 am
by Luke
Why thank you! :) Like I said though, most of the better ideas in the design were borrowed from python.

I do like how headers can be explicitly requested in your reader though. This is something I have been thinking about. I want my eventual Csv_Sniffer class (which attemptes to deduce the format of a sample of csv) to be capable of returning a Csv_Dialect as well as a Csv_Mapper that match the csv file. I think I am going to add a Csv_Mapper property as a third param in Csv _Reader much like Csv_Dialect.