I'm new here and I need some help...
I'm writing a script to compress (zip) and send multiple files with PHP. I can have a lot of files at once, as well as really huge files in the lot (several hundred MB). So I figured I would do the process by splitting files in chunks, encode them one by one to avoid memory problems and send them out one after another.
But I encounter a big problem : I can't see how to compress (with either gzcompress or deflate) chunks of a file and keep its integrity. The tests I made show that the compressed file is different (and corrupted) when I compress it by chunks. I know it works by using gzwrite with a loop in a temp file, but I want to stay in memory to avoid multiple file access, and be able to send data out before the file is completely zipped...
So my question is : can I compress multiple files on-the-fly, chunk by chunk, and send them out as a single streamed zip file to the client ?
UPDATE : I added my code for you to check out :
Basically what I do now is :
get a file from ftp server > read it by chunks > gzwrite chunks in tmp file > read entire tmpfile in memory > add it into archive > close archive > send archive URL for download.
And what I want to do is :
get a file from ftp server > send file headers to client > read it by chunks > compress (or deflate) chunks in memory > send chunks > process next files > send global archive descriptor > voilà, the whole archive has been sent to client without ever having either a whole file in memory or waiting for a file to be completely compressed before sending it.
So what do you think ?
Code: Select all
<?php
// include zip lib
require_once('zip.lib.php');
// avoid PHP timeouts
set_time_limit(0);
ini_set("max_execution_time", 0);
// unique id for this particular archive process
$id = md5(rand(0, 1000000));
// change extension for Mac users
if (ereg("Macintosh", $_SERVER["HTTP_USER_AGENT"]) || ereg("Mac_PowerPC", $_SERVER["HTTP_USER_AGENT"])) {
$archiveFileName = "archive" . $id . ".sit";
} else {
$archiveFileName = "archive" . $id . ".zip";
}
// open ftp connection to retrieve files
$connection = ftp_connect($_POST["server"]) or die("Connexion échouée.");
ftp_login($connection, $_POST["login"], $_POST["password"]) or die("Login échoué.");
ftp_pasv($connection, false);
ftp_chdir($connection, $_POST["root"]) or die("Dossier non ouvert.");
// get total size of files to process
$tmpTotalSize=0;
$i=0;
while (isset($_POST["folder_".$i])) {
addFolderSize($connection, $root, ".", $_POST["folder_".$i]);
$i++;
}
$i=0;
while (isset($_POST["file_".$i])) {
$tmpTotalSize += ftp_size($connection, $_POST["file_".$i]);
$i++;
}
// session vars for status check with another script
session_start();
$_SESSION["processedSize"] = 0;
$_SESSION["totalSize"] = $tmpTotalSize;
$_SESSION["archiveFileName"] = $archiveFileName;
session_write_close();
// define size of data chunks
$chunkSize = 32000;
// local temp file : each file is transferred from ftp to the web server before being compressed
$tmp_file_name = "download" . $id . ".tmp";
// create archive
$zip = new zipfile();
// recursive function to process directories and subdirectories
function addFolder($connection, $root, $path, $folder, $zip, $archiveFileName, $chunkSize)
{
ftp_chdir($connection, $folder);
$tab = @ftp_rawlist($connection, ".");
for($i=1; $i<count($tab); $i++) {
if ($tab[$i][0] == "d") {
$folder_ = substr(strrchr($tab[$i], " "), 1);
if ($folder_ != "." && $folder_ != "..") {
addFolder($connection, $root."/".$folder, $path."/".$folder, $folder_, $zip, $archiveFileName, $chunkSize);
}
} else {
$file_name = substr(strrchr($tab[$i], " "), 1);
if (ftp_get($connection, $GLOBALS["tmp_file_name"], $file_name, FTP_BINARY)) {
$zip->addFile($archiveFileName, $GLOBALS["tmp_file_name"], $path."/".$folder."/".$file_name, 0, $chunkSize);
unlink($GLOBALS["tmp_file_name"]);
}
}
}
ftp_cdup($connection);
}
// process directories
$i=0;
while (isset($_POST["folder_".$i])) {
addFolder($connection, $_POST["root"], ".", $_POST["folder_".$i], $zip, "tmp/" . $archiveFileName, $chunkSize);
$i++;
}
// process files
$i=0;
while (isset($_POST["file_".$i])) {
if (ftp_get($connection, $tmp_file_name, $_POST["file_".$i], FTP_BINARY)) {
$path = $root;
$zip->addFile("tmp/" . $archiveFileName, $tmp_file_name, $_POST["file_".$i], 0, $chunkSize);
unlink($tmp_file_name);
}
$i++;
}
// close ftp connection
ftp_close($connection);
// finalize archive
$zip->closeArchive("tmp/" . $archiveFileName);
// save archive filename before unset
$output = $archiveFileName;
// unset session vars
session_start();
unset($_SESSION["processedSize"]);
session_unregister($processedSize);
$_SESSION["totalSize"] = 0;
$_SESSION["archiveFileName"] = "";
session_write_close();
// set back PHP timeout values
set_time_limit(30);
ini_set("max_execution_time", 300);
// echo archive filename
echo ($output);
?>Code: Select all
<?php
/* $Id: zip.lib.php,v 1.6 2002/03/30 08:24:04 loic1 Exp $ */
/**
* Zip file creation class.
* Makes zip files.
*
* Based on :
*
* http://www.zend.com/codex.php?id=535&single=1
* By Eric Mueller <eric@themepark.com>
*
* http://www.zend.com/codex.php?id=470&single=1
* by Denis125 <webmaster@atlant.ru>
*
* a patch from Peter Listiak <mlady@users.sourceforge.net> for last modified
* date and time of the compressed file
*
* Official ZIP file format: http://www.pkware.com/appnote.txt
*
* @access public
*/
class zipfile
{
/**
* Array to store compressed data
*
* @var array $datasec
*/
//var $datasec = array();
/**
* Central directory
*
* @var array $ctrl_dir
*/
var $ctrl_dir = array();
/**
* End of central directory record
*
* @var string $eof_ctrl_dir
*/
var $eof_ctrl_dir = "\x50\x4b\x05\x06\x00\x00\x00\x00";
/**
* Last offset position
*
* @var integer $old_offset
*/
var $old_offset = 0;
// offset header pour ajustement après écriture data compressées
//var $fileHeaderOffset = 1000000;
// offset général pour l'ajout du fichier suivant dans l'archive (attention au header global écrit en fin de fichier)
var $currentArchiveOffset = 0;
// longueur chaine de l'archive complète sans header global
var $archiveStrlen = 0;
/**
* Converts an Unix timestamp to a four byte DOS date and time format (date
* in high two bytes, time in low two bytes allowing magnitude comparison).
*
* @param integer the current Unix timestamp
*
* @return integer the current date in a four byte DOS format
*
* @access private
*/
function unix2DosTime($unixtime = 0)
{
$timearray = ($unixtime == 0) ? getdate() : getdate($unixtime);
if ($timearray['year'] < 1980) {
$timearray['year'] = 1980;
$timearray['mon'] = 1;
$timearray['mday'] = 1;
$timearray['hours'] = 0;
$timearray['minutes'] = 0;
$timearray['seconds'] = 0;
}
return (($timearray['year'] - 1980) << 25) | ($timearray['mon'] << 21) | ($timearray['mday'] << 16) |
($timearray['hours'] << 11) | ($timearray['minutes'] << 5) | ($timearray['seconds'] >> 1);
} // end of the 'unix2DosTime()' method
// bit by bit CRC32 from php.net documentation notes //
function bitbybit_crc32($str,$first_call){
//reflection in 32 bits of crc32 polynomial 0x04C11DB7
$poly_reflected=0xEDB88320;
//=0xFFFFFFFF; //keep track of register value after each call
static $reg=0xFFFFFFFF;
//initialize register on first call
if($first_call) $reg=0xFFFFFFFF;
$n=strlen($str);
$zeros=$n<4 ? $n : 4;
//xor first $zeros=min(4,strlen($str)) bytes into the register
for($i=0;$i<$zeros;$i++)
$reg^=ord($str{$i})<<$i*8;
//now for the rest of the string
for($i=4;$i<$n;$i++){
$next_char=ord($str{$i});
for($j=0;$j<8;$j++)
$reg=(($reg>>1&0x7FFFFFFF)|($next_char>>$j&1)<<0x1F)
^($reg&1)*$poly_reflected;
}
//put in enough zeros at the end
for($i=0;$i<$zeros*8;$i++)
$reg=($reg>>1&0x7FFFFFFF)^($reg&1)*$poly_reflected;
//xor the register with 0xFFFFFFFF
return ~$reg;
}
/**
* Adds "file" to archive, chunks version
*
* @param string archive file name
* @param string file to compress
* @param string name of the file in the archive (contains the path)
* @param integer the current timestamp
* @param integer chunk size in bytes
*
* @access public
*/
function addFile($archiveFile, $tmpFile, $fileName, $time = 0, $chunkSize)
{
// replace slashes
$fileName = str_replace('\\', '/', $fileName);
if (substr($fileName, 0, 2) == "./") {
$fileName = substr($fileName, 2);
}
// compute crc32 (only for small files)
$crcCreated = false;
if (filesize($tmpFile) < 1000000) {
$crcCreated = true;
$crc = crc32(file_get_contents($tmpFile));
}
// open source file
$sourceFileHandler = fopen($tmpFile, "r");
// create and gzopen tmp compressed data file
$tmpCompressedFileName = md5(rand(0,1000000)) . ".tmp";
$compressedFileHandler = gzopen($tmpCompressedFileName, "w");
for($i=0; $i < filesize($tmpFile); $i+=$chunkSize) {
// get chunk string
$tmpData = fread($sourceFileHandler, $chunkSize);
// compress and write
gzwrite($compressedFileHandler, $tmpData);
// compute crc32 (only for big files)
if (!$crcCreated) {
if ($i==0) {
$crc = $this->bitbybit_crc32($tmpData, true);
} else {
$crc = $this->bitbybit_crc32($tmpData, false);
}
}
// update session var
session_start();
$_SESSION["processedSize"] += $chunkSize;
session_write_close();
}
// close files
fclose ($sourceFileHandler);
gzclose($compressedFileHandler);
// retrieve all compressed data
$compressedFileHandler = fopen($tmpCompressedFileName, "r");
$compressedData = fread($compressedFileHandler, filesize($tmpCompressedFileName));
fclose ($compressedFileHandler);
// delete tmp compressed file
unlink($tmpCompressedFileName);
// remove old crc16 (4) and gzwrite file header (10)
$compressedData = substr(substr($compressedData, 0, strlen($compressedData) - 4), 10);
// file header
$dtime = dechex($this->unix2DosTime($time));
$hexdtime = '\x' . $dtime[6] . $dtime[7]
. '\x' . $dtime[4] . $dtime[5]
. '\x' . $dtime[2] . $dtime[3]
. '\x' . $dtime[0] . $dtime[1];
eval('$hexdtime = "' . $hexdtime . '";');
$fileHeader = "\x50\x4b\x03\x04";
$fileHeader .= "\x14\x00"; // ver needed to extract
$fileHeader .= "\x00\x00"; // gen purpose bit flag, default
//$fileHeader .= "\x00\x04"; // gen purpose bit flag, bit 3 switched for streaming of zip file
$fileHeader .= "\x08\x00"; // compression method
$fileHeader .= $hexdtime; // last mod time and date
$c_len = strlen($compressedData); // compressed length
$unc_len = filesize($tmpFile); // uncompressed length
$fileHeader .= pack('V', $crc); // crc32
//$fileHeader .= pack('V', 0); // 0 for streaming
$fileHeader .= pack('V', $c_len); // compressed filesize
//$fileHeader .= pack('V', 0); // 0 for streaming
$fileHeader .= pack('V', $unc_len); // uncompressed filesize
//$fileHeader .= pack('V', 0); // 0 for streaming
$fileHeader .= pack('v', strlen($fileName)); // length of filename
$fileHeader .= pack('v', 0); // extra field length
$fileHeader .= $fileName;
// file footer
// "data descriptor" segment (optional but necessary if archive is not served as file) (=file specific footer)
$fileFooter .= pack('V', $crc); // crc32
$fileFooter .= pack('V', $c_len); // compressed filesize
$fileFooter .= pack('V', $unc_len); // uncompressed filesize
// concatenate all file data
$compressedFile = $fileHeader . $compressedData . $fileFooter;
// write file in archive
$archiveHandler = fopen($archiveFile, "a+");
fwrite ($archiveHandler, $compressedFile);
fclose ($archiveHandler);
$archiveStrlen += strlen($compressedFile);
// central directory record
$cdrec = "\x50\x4b\x01\x02";
$cdrec .= "\x00\x00"; // version made by
$cdrec .= "\x14\x00"; // version needed to extract
$cdrec .= "\x00\x00"; // gen purpose bit flag
$cdrec .= "\x08\x00"; // compression method
$cdrec .= $hexdtime; // last mod time & date
$cdrec .= pack('V', $crc); // crc32
$cdrec .= pack('V', $c_len); // compressed filesize
$cdrec .= pack('V', $unc_len); // uncompressed filesize
$cdrec .= pack('v', strlen($fileName)); // length of filename
$cdrec .= pack('v', 0 ); // extra field length
$cdrec .= pack('v', 0 ); // file comment length
$cdrec .= pack('v', 0 ); // disk number start
$cdrec .= pack('v', 0 ); // internal file attributes
$cdrec .= pack('V', 32 ); // external file attributes - 'archive' bit set
$cdrec .= pack('V', $this -> old_offset ); // relative offset of local header
//$this -> old_offset = $new_offset;
$this -> old_offset += strlen($compressedFile);
$this -> currentArchiveOffset += strlen($compressedFile);
$cdrec .= $fileName;
// optional extra field, file comment goes here
// save to central directory
$this -> ctrl_dir[] = $cdrec;
}
// add central directory and close file archive
function closeArchive ($archiveFile) {
$ctrldir = implode('', $this -> ctrl_dir);
$globalFooter = $ctrldir .
$this -> eof_ctrl_dir .
pack('v', sizeof($this -> ctrl_dir)) . // total # of entries "on this disk"
pack('v', sizeof($this -> ctrl_dir)) . // total # of entries overall
pack('V', strlen($ctrldir)) . // size of central dir
pack('V', filesize($archiveFile)) . // offset to start of central dir
"\x00\x00"; // .zip file comment length
$archiveHandler = fopen($archiveFile, "a+");
fwrite ($archiveHandler, $globalFooter);
fclose ($archiveHandler);
return(sizeof($this -> ctrl_dir));
}
} // end of the 'zipfile' class
?>