I'm a bit fanatic when it comes down to normalization. I strongly believe that we should only store facts that we know.
This means that storing NULL values doesn't make sense.
Another result of my believe is that i end up with extra tables that exists out of a foreign key to the main data and one attribute.
In order to select the data (+optional attributes) i have to perform an OUTER JOIN.
Now, i wanted to measure the time difference between a regular select and a select with outer join(s) for different numbers of rows in the table(s).
Feel free to tell me where i'm mistaken.
If you know methods to profile memory usage, let me know.
If you know better methods for benchmarking, let me know.
Feedback is very welcome
Code: Select all
<?php
// +---------------------------------------------------------------------------
// | Author: Tim Van Wassenhove <timvw@users.sourceforge.net>
// +---------------------------------------------------------------------------
ini_set('error_reporting', E_ALL);
ini_set('display_errors', TRUE);
function create($conn) {
mysql_query('CREATE TABLE joined (joined_id INT NOT NULL AUTO_INCREMENT, firstname CHAR(20) NOT NULL, lastname CHAR(30) NOT NULL, PRIMARY KEY(joined_id)) TYPE=INNODB;', $conn) or die(mysql_error($conn));
mysql_query('CREATE TABLE joined_phone (joined_id INT, phone CHAR(10) NOT NULL, PRIMARY KEY (joined_id, phone), FOREIGN KEY (joined_id) REFERENCES joined (joined_id)) TYPE=INNODB;', $conn) or die(mysql_error($conn));
mysql_query('CREATE TABLE joined_email (joined_id INT, email CHAR(10) NOT NULL, PRIMARY KEY (joined_id, email), FOREIGN KEY (joined_id) REFERENCES joined (joined_id)) TYPE=INNODB;', $conn) or die(mysql_error($conn));
mysql_query('CREATE TABLE nojoin (nojoin_id INT NOT NULL AUTO_INCREMENT, firstname CHAR(20) NOT NULL, lastname CHAR(30) NOT NULL, phone CHAR(10), email CHAR(10), PRIMARY KEY (nojoin_id)) TYPE=INNODB;', $conn) or die(mysql_error($conn));
}
function drop($conn) {
mysql_query('DROP TABLE nojoin', $conn) or die(mysql_error($conn));
mysql_query('DROP TABLE joined_email', $conn) or die(mysql_error($conn));
mysql_query('DROP TABLE joined_phone', $conn) or die(mysql_error($conn));
mysql_query('DROP TABLE joined', $conn) or die(mysql_error($conn));
}
function fill($conn, $count = 1000) {
mysql_query('SET AUTOCOMMIT = 0', $conn) or die(mysql_error($conn));
mysql_query('SET UNIQUE_CHECKS = 0', $conn) or die(mysql_error($conn));
mysql_query('SET FOREIGN_KEY_CHECKS = 0', $conn) or die(mysql_error($conn));
mysql_query('BEGIN');
$uniq = uniqid(rand(), true);
for($i = 0; $i < $count; ++$i) {
$firstname = substr(md5($uniq++), 0, 20);
$lastname = substr(md5($uniq++), 0, 30);
$phone = 'NULL';
$email = 'NULL';
if (rand(0, 1) > 0) {
$phone = '\'' . substr(md5($uniq++), 0, . '\'';
}
if (rand(0, 1) > 0) {
$email = '\'' . substr(md5($uniq++), 0, . '\'';
}
mysql_query("INSERT INTO joined (firstname, lastname) VALUES ('$firstname', '$lastname')", $conn) or die(mysql_error($conn));
if ($phone != 'NULL' || $email != 'NULL') {
$id = mysql_insert_id($conn);
if ($phone != 'NULL') {
mysql_query("INSERT INTO joined_phone (joined_id, phone) VALUES ($id, $phone)", $conn) or die(mysql_error($conn));
}
if ($email != 'NULL') {
mysql_query("INSERT INTO joined_email (joined_id, email) VALUES ($id, $email)", $conn) or die (mysql_error());
}
}
mysql_query("INSERT INTO nojoin (firstname, lastname, phone, email) VALUES ('$firstname', '$lastname', $phone, $email)", $conn) or die(mysql_error());
}
mysql_query('COMMIT');
mysql_query('SET FOREIGN_KEY_CHECKS = 1', $conn) or die(mysql_error($conn));
mysql_query('SET UNIQUE_CHECKS = 1', $conn) or die(mysql_error($conn));
mysql_query('SET AUTOCOMMIT = 1', $conn) or die(mysql_error($conn));
}
function microtime_float() {
list($usec, $sec) = explode(' ', microtime());
return ((float)$usec + (float)$sec);
}
function test($conn, $count = 1000) {
$query = array();
$query[] = 'SELECT * FROM nojoin';
$query[] = 'SELECT * FROM joined LEFT OUTER JOIN joined_phone ON joined.joined_id = joined_phone.joined_id';
$query[] = 'SELECT * FROM joined LEFT OUTER JOIN joined_email ON joined.joined_id = joined_email.joined_id';
$query[] = 'SELECT * FROM joined LEFT OUTER JOIN joined_phone ON joined.joined_id = joined_phone.joined_id LEFT OUTER JOIN joined_email ON joined.joined_id = joined_email.joined_id';
$time = array();
for ($i = 0; $i < count($query); ++$i) {
$time[$i] = 0;
}
$data = array();
echo "\nPlease wait.";
for ($i = 0; $i < $count; ++$i) {
for ($j = 0; $j < count($query); ++$j) {
$before = microtime_float();
mysql_query($query[$j], $conn) or die(mysql_error($conn));
$after = microtime_float();
$time[$j] += ($after - $before);
}
for ($j = count($query) - 1; $j >= 0; --$j) {
$before = microtime_float();
mysql_query($query[$j], $conn) or die(mysql_error($conn));
$after = microtime_float();
$time[$j] += ($after - $before);
}
if ($i % 100 == 0 && $i > 0) {
echo '.';
}
}
echo "\n";
return $time;
}
$conn = mysql_connect('localhost', 'username', 'password');
mysql_select_db('database', $conn) or die(mysql_error($conn));
drop($conn);
create($conn);
fill($conn, 1);
for ($i = 3; $i < 5; ++$i) {
$total = pow(10, $i);
$already = pow(10, $i - 1);
$count = $total - $already;
fill($conn, $count);
$times = test($conn, 1000);
echo "\nResults for {$total} rows:\n";
for ($j = 0; $j < count($times); ++$j) {
$diff = round(((100 / $times[0]) * $times[$j]) - 100);
echo "time {$j}: {$times[$j]} diff: {$diff}%\n";
}
}
mysql_close($conn);
?>timvw@madoka:~$ php benchmark.php
Please wait.....
Results for 10 rows:
time 0: 0.38061618804932 diff: 0%
time 1: 0.39212822914124 diff: 3%
time 2: 0.39929938316345 diff: 5%
time 3: 0.44149446487427 diff: 16%
Please wait.....
Results for 100 rows:
time 0: 1.0028855800629 diff: 0%
time 1: 0.98137807846069 diff: -2%
time 2: 0.97600913047791 diff: -3%
time 3: 1.0882740020752 diff: 9%
Please wait.....
Results for 1000 rows:
time 0: 6.6008727550507 diff: 0%
time 1: 6.4744307994843 diff: -2%
time 2: 6.6128196716309 diff: 0%
time 3: 7.0546145439148 diff: 7%
Please wait.....
Results for 10000 rows:
time 0: 65.041672468185 diff: 0%
time 1: 63.138476848602 diff: -3%
time 2: 63.299174070358 diff: -3%
time 3: 71.14570069313 diff: 9%