Delimited File Interpreter
Posted: Thu Jun 10, 2010 4:41 pm
Hello everyone. It's been a while... I've recently had the need to create an object that will determine the delimiter of a file, and the type of string enclosure it's using (only allows for single- or double-quotes, currently). I'd really like some feedback on this, I'm sure there are a few mistakes and certainly optimizations I could be making. Attached is a very crazy space delimited file that my co-worker came up with. Just toss "text.txt" into your /tmp directory (or change the file location as needed). For practical purposes on my end, I've assumed that the first line of a file contains the column names.
Thanks in advance,
Andy
Thanks in advance,
Andy
Code: Select all
<?php
class DelimitedFile
{
public $filename;
public $data;
public $line_count;
public $delimiters = array();
public $quote;
public function __construct($file_name)
{
$this->filename = $file_name;
$this->data = file($file_name);
}
public function detectDelimiter( $delimiters = array(',','|',"\t",';','~','#',' ') )
{
/**
* Passing $delimiters as 'false' attempts to find a delimiter of any kind
*/
if( $delimiters == false )
{
$uniques = array();
foreach($this->data as $line)
{
$characters = preg_split('//',$line);
$unique_characters = array_unique($characters);
foreach($unique_characters as $c)
if( !isset($uniques[$c]) )
$uniques[$c] = 0;
foreach($characters as $c)
$uniques[$c]++;
if( isset($uniques["'"]) ) unset($uniques["'"]);
if( isset($uniques["\""]) ) unset($uniques["\""]);
}
array_multisort($uniques,SORT_DESC);
$this->parseDelimiters(array_keys($uniques));
}
else
{
$this->parseDelimiters($delimiters);
}
return $this->delimiters;
}
private function parseDelimiters($delimiters = array())
{
$quote = array('"',"'");
foreach($delimiters as $delimiter)
{
foreach($quote as $q)
{
$fp = fopen($this->filename,"r");
$count = $last_count = $i = 0;
$b_delimiter_found = true;
while($delimited_line = fgetcsv($fp,4096,$delimiter,$q,"\\"))
{
$count = count($delimited_line);
if( $count <= 1 || ($count != $last_count && $i > 0 ))
{
$b_delimiter_found = false;
break;
}
elseif( $count > 1 && $i > 0 )
{
$this->delimiters[$delimiter]++;
}
$last_count = $count;
$i++;
}
if( $b_delimiter_found )
{
$this->delimiters[$delimiter] = true;
$this->quote = $q;
fclose($fp);
return true;
}
fclose($fp);
}
}
return false;
}
public function getMostLikelyDelimiter()
{
if( count($this->delimiters) )
{
$delimiter = array_search($this->line_count - 1, $this->delimiters);
return $delimiter;
}
}
}
class DelimitedFileInfo extends DelimitedFile
{
public $delimiter;
public $columns = array();
public $keyed_columns = array();
public function __construct($data,$use_built_in_delimiters = true)
{
parent::__construct($data);
$this->detectDelimiter($use_built_in_delimiters);
$this->delimiter = $this->getMostLikelyDelimiter();
$this->formatData();
}
private function formatData()
{
$fp = fopen($this->filename,"r");
$this->columns = fgetcsv($fp,4096,$this->delimiter,$this->quote,"\\");
while($l = fgetcsv($fp,4096,$this->delimiter,$this->quote,"\\") )
$this->keyed_columns[] = array_combine($this->columns,$l);
fclose($fp);
}
public function __toString()
{
$s = "Delimiter: ASCII-#" . ord($this->delimiter) . "\t" .
"Delimiter: '" . $this->delimiter . "'\t" .
"Quote: " . $this->quote . "\t" .
"Lines: " . $this->line_count . "\t" .
"Columns: " . count($this->columns) . "\n";
return $s;
}
public function strGetLine($n)
{
$s = implode(';',$this->keyed_columns[$n]);
return $s;
}
public function getLine($n='')
{
if( $n == '' )
{
return next($this->keyed_columns);
}
return $this->keyed_columns[$n];
}
}
/**
* Unknown Delmititer...
*/
$f = new DelimitedFileInfo("/tmp/test.txt",false);
echo $f;
print_r($f->getLine(11));