Page 2 of 2
Re: Another algorithm challenge :)
Posted: Sun Mar 02, 2008 8:51 pm
by dml
Hockey wrote:
...seek through the file handle byte by byte, basically emulating str_replace (slow like turtle -- easy on memory).
I should time this to confirm for myself, but is there a huge difference between say calling fread($h, 8192) once and calling fread($h, 1) 8192 times? PHP buffers file reads, so it's not going to cause any more system calls or physical I/O. If it's not an expensive operation, it has the advantage of letting PHP keep track of buffer offsets and of refreshing the buffer when the offset reaches the end: it's a boring job and if PHP can do it behind the scenes so much the better.
Another question. There's a
stream filter feature that sounds like it should provide the right abstraction, though it's not immediately obvious to me how one would use the API hooks to get a specific job like that done... Has anyone tried to use it?
Re: Another algorithm challenge :)
Posted: Sun Mar 02, 2008 9:17 pm
by Benjamin
This should do the trick.
Code: Select all
class stream_str_replace
{
# holds the last error
private $_error;
# holds the length of the longest item to be replaced
private $_max_replace_len;
# holds the file pointer for input_file
private $_input_fp;
# holds the file pointer for the output_file
private $_output_fp;
# holds the chunk size
private $_chunk_size = 8192;
# holds the current chunk of data being processed
private $_read_buffer;
# holds a small chunk that may contain a partial match
private $_temp_chunk;
# holds an array of replacements
private $_replacements;
# holds the total number of replacements made
private $_replacement_count;
public function do_str_replace($replacements, $input_file, $output_file)
{
$this->_replacements = $replacements;
$this->_replacement_count = 0;
try {
# set the maximum length for a replacement item
$this->_calibrate_offset();
# open the input file for reading
$this->_open_input_file($input_file);
# open output file for writing
$this->_open_output_file($output_file);
# validate chunk size
$this->_validate_chunk_size();
# do the replacements
$this->_process_replacements();
# close the file pointers
$this->_close_files();
# all done
return $this->_replacement_count;
} catch (Exception $e) {
# close the file pointers
$this->_close_files();
# set last error
$this->_error = $e->getMessage();
return false;
}
}
public function set_chunk_size($len)
{
# returns new chunk size
return $this->_chunk_size = (preg_match('#^\d+$#', $len)) ? $len : $this->_chunk_size;
}
public function get_error()
{
return $this->_error;
}
private function _process_replacements()
{
while (!feof($this->_input_fp))
{
$this->_read_buffer = str_replace(array_keys($this->_replacements), $this->_replacements, $this->_temp_chunk . fread($this->_input_fp, $this->_chunk_size), $count);
if (!feof($this->_input_fp))
{
list($this->_read_buffer, $this->_temp_chunk) = str_split($this->_read_buffer, (strlen($this->_read_buffer) - ($this->_max_replace_len - 1)));
}
$this->_write_data($this->_read_buffer);
$this->_replacement_count += $count;
}
}
private function _write_data($data)
{
if (!fwrite($this->_output_fp, $data))
{
throw new Exception("write error");
}
}
private function _close_files()
{
fclose($this->_input_fp);
fclose($this->_output_fp);
}
private function _validate_chunk_size()
{
if ($this->_chunk_size < $this->_max_replace_len)
{
throw new Exception("read chunk size must be larger than the maximum replacement length");
}
}
private function _open_output_file($output_file)
{
if (false === ($this->_output_fp = fopen($output_file, 'w')))
{
throw new Exception("write error: '$output_file' is not writeable or could not be created");
}
}
private function _open_input_file($input_file)
{
if (!is_file($input_file) || !is_readable($input_file) || (false === ($this->_input_fp = fopen($input_file, 'r'))))
{
throw new Exception("read error: '$input_file' does not exist, is not readable or you do not have permission to access this file");
}
}
private function _calibrate_offset()
{
foreach ($this->_replacements as $replacement => $null)
{
$this->_max_replace_len = ($this->_max_replace_len < strlen($replacement)) ? strlen($replacement) : $this->_max_replace_len;
}
}
}
Usage:
Code: Select all
$str_replace = new stream_str_replace();
$str_replace->set_chunk_size(8192);
$replacements = array('to' => 'from', 'red' => 'blue', 'foo' => 'bar');
if (false !== ($total_replacements = $str_replace->do_str_replace($replacements, 'input.txt', 'output.txt')))
{
echo "replaced " . number_format($total_replacements) . " items";
} else {
echo $str_replace->get_error();
}
Re: Another algorithm challenge :)
Posted: Mon Mar 03, 2008 8:21 pm
by Benjamin
Did I win?
Re: Another algorithm challenge :)
Posted: Mon Mar 03, 2008 9:03 pm
by alex.barylski
astions wrote:Did I win?
You have my vote.

Re: Another algorithm challenge :)
Posted: Mon Mar 03, 2008 11:07 pm
by Chris Corbyn
Hey sorry man never saw your post yesterday since being uber busy with visa stuff. I'll find some time later to have a look
