IPv6

Any questions involving matching text strings to patterns - the pattern is called a "regular expression."

Moderator: General Moderators

User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

IPv6

Post by Ambush Commander »

Hi, this IPv6 regex isn't working properly, anyone know what's wrong?

Code: Select all

$HEXDIG = '[A-Fa-f0-9]';
$h16 = "{$HEXDIG}{1,4}";
$dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])';
$IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet";
$ls32 = "(?:$h16:$h16|$IPv4address)";
$IPv6Address = "(?:".
                                        "(?:$h16:){6}$ls32" .
                                     "|::(?:$h16:){5}$ls32" .
                            "|(?:$h16)?::(?:$h16:){4}$ls32" .
                "|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" .
                "|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" .
                "|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" .
                "|(?:(?:$h16:){4}$h16)?::$ls32" .
                "|(?:(?:$h16:){5}$h16)?::$h16" .
                "|(?:(?:$h16:){6}$h16)?::" .
                ")";
$regex = "/^$IPv6Address\$/";
preg_match($regex, '2001:db8::7', $matches);
I transcoded this from the ABNF notation, maybe something got lost on the way?

Edit - removed brackets, they shouldn't be there, as you may have figured, this is part of something bigger.
Last edited by Ambush Commander on Fri Aug 11, 2006 6:51 pm, edited 1 time in total.
User avatar
Ollie Saunders
DevNet Master
Posts: 3179
Joined: Tue May 24, 2005 6:01 pm
Location: UK

Post by Ollie Saunders »

errrr yesss. I can debug that ah huh :) yes I can :roll: :roll:

Sorry AC I think this one is down to you.
User avatar
feyd
Neighborhood Spidermoddy
Posts: 31559
Joined: Mon Mar 29, 2004 3:24 pm
Location: Bothell, Washington, USA

Post by feyd »

If you can put up a unit test, I'll see what I can do..
User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

Post by Ambush Commander »

Code: Select all

<?php

// related RFC 4291

require_once 'simpletest/unit_tester.php';
require_once 'simpletest/reporter.php';

function isValidIPv6($ip) {
    $ip = (string) $ip; // sanity check
    $HEXDIG = '[A-Fa-f0-9]';
    $h16 = "{$HEXDIG}{1,4}";
    $dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])';
    $IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet";
    $ls32 = "(?:$h16:$h16|$IPv4address)";
    $IPv6Address = "(?:".
                                "(?:$h16:){6}$ls32" .
                             "|::(?:$h16:){5}$ls32" .
                    "|(?:$h16)?::(?:$h16:){4}$ls32" .
        "|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" .
        "|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" .
        "|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" .
        "|(?:(?:$h16:){4}$h16)?::$ls32" .
        "|(?:(?:$h16:){5}$h16)?::$h16" .
        "|(?:(?:$h16:){6}$h16)?::" .
    ")";
    $regex = "/^$IPv6Address\$/";
    return preg_match($regex, $ip); 
}

class isValidIPv6Test extends UnitTestCase
{
    
    function test() {
        $this->assertFalse(isValidIPv6(''));
        $this->assertTrue(isValidIPv6('2001:db8::7')); // fail
        $this->assertTrue(isValidIPv6('2001:0db8:0000:0000:0000:0000:1428:57ab'));
        $this->assertTrue(isValidIPv6('2001:0db8:0000:0000:0000::1428:57ab'));
        $this->assertTrue(isValidIPv6('2001:db8::1428:57ab')); // fail
        $this->assertTrue(isValidIPv6('2001:0db8:0::0:1428:57ab')); // fail
        $this->assertTrue(isValidIPv6('2001:1234:5678:FFFF:FFFF:FFFF:FFFF:FFFF'));
        $this->assertTrue(isValidIPv6('::1'));
        $this->assertTrue(isValidIPv6('2001:1234:5678::')); // fail
        $this->assertTrue(isValidIPv6('::FFFF:129.144.52.38')); // fail
        $this->assertTrue(isValidIPv6('0:0:0:0:0:0:13.1.68.3')); // fail
        $this->assertTrue(isValidIPv6('2001:DB8:0:0:8:800:200C:417A'));
        $this->assertFalse(isValidIPv6('asdf'));
        $this->assertFalse(isValidIPv6('23'));
        $this->assertFalse(isValidIPv6('2001:1234:5678::FFFF:FFFF:FFFF:FFFF:FFFF:FFFF'));
    }
    
}

$test = new isValidIPv6Test();
$test->run( new HTMLReporter );

?>
Not all of the IPs fail, interestingly. Maybe I'm just feeding them IPs that really are bad?

Edit - Updated testcase with more examples from the RFC, yes, the regex isn't working properly.
Last edited by Ambush Commander on Fri Aug 11, 2006 8:07 pm, edited 1 time in total.
nickvd
DevNet Resident
Posts: 1027
Joined: Thu Mar 10, 2005 5:27 pm
Location: Southern Ontario
Contact:

Post by nickvd »

Taken from: http://blogs.msdn.com/mpoulson/archive/ ... 50037.aspx

Code: Select all

<?php
include("debuglib.php");

require_once 'simpletest/unit_tester.php';
require_once 'simpletest/reporter.php';

function isValidIPv6($ip) {
   $ip = (string) $ip; // sanity check
   $IPv4Pattern = "(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}";
   $IPv6Pattern = "(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}";
   $IPv6Pattern_HEXCompressed = "((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)";
   $IPv6Pattern_6Hex4Dec = "((?:[0-9A-Fa-f]{1,4}:){6,6})(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}";
   $IPv6Pattern_Hex4DecCompressed = "((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) ::((?:[0-9A-Fa-f]{1,4}:)*)(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}";
   $regex = '/'.$IPv4Pattern.'|'.$IPv6Pattern.'|'.$IPv6Pattern_HEXCompressed.'|'.$IPv6Pattern_6Hex4Dec.'|'.$IPv6Pattern_Hex4DecCompressed.'/';
   return preg_match($regex, $ip);
}

class isValidIPv6Test extends UnitTestCase
{

    function test() {
        $this->assertFalse(isValidIPv6(''));
        $this->assertTrue(isValidIPv6('192.168.0.123')); // fail
        $this->assertTrue(isValidIPv6('2001:db8::7')); // fail
        $this->assertTrue(isValidIPv6('2001:0db8:0000:0000:0000:0000:1428:57ab'));
        $this->assertTrue(isValidIPv6('2001:0db8:0000:0000:0000::1428:57ab'));
        $this->assertTrue(isValidIPv6('2001:db8::1428:57ab')); // fail
        $this->assertTrue(isValidIPv6('2001:0db8:0::0:1428:57ab')); // fail
        $this->assertTrue(isValidIPv6('2001:1234:5678:FFFF:FFFF:FFFF:FFFF:FFFF'));
        $this->assertTrue(isValidIPv6('::1'));
        $this->assertTrue(isValidIPv6('2001:1234:5678::')); // fail
        $this->assertFalse(isValidIPv6('asdf'));
        $this->assertFalse(isValidIPv6('23'));
    }

}

$test = new isValidIPv6Test();
$test->run( new HTMLReporter );
?>
Note, this tests for valid ipv4 AND ipv6 addresses, though very easy to modify to remove checks for ipv4...
User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

Post by Ambush Commander »

Those regexps are incorrect. See this assertion:

Code: Select all

$this->assertFalse(isValidIPv6('2001:1234:5678:FFFF::aaaa:aaaa:FFFF:FFFF:FFFF:FFFF'));
User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

Post by Ambush Commander »

After rereading the spec, I found out why the regex isn't working, although that doesn't help me very much.
RFC 3986 Section 3.2.2 wrote:This syntax does not support IPv6 scoped addressing zone identifiers.
The only problem is I don't know what those are, nor how to implement them in regex. >.>

Edit - Did some research, and actually, this isn't the case: http://www3.ietf.org/proceedings/00jul/ ... mat-02.txt uses percent signs.
User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

Post by Ambush Commander »

Hrmm... this is quite knotty. Still not resolved.
User avatar
feyd
Neighborhood Spidermoddy
Posts: 31559
Joined: Mon Mar 29, 2004 3:24 pm
Location: Bothell, Washington, USA

Post by feyd »

My hopeful mission today is to make this work.. with the prefix syntax they specify too. :) I'm trying to read and build unit test(s) right now. It's the slowest part of doing this stuff for me. :? oh well.
User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

Post by Ambush Commander »

Good luck. Whatever regex you come up with will be a great contribution to the greater programming community. :-D
User avatar
feyd
Neighborhood Spidermoddy
Posts: 31559
Joined: Mon Mar 29, 2004 3:24 pm
Location: Bothell, Washington, USA

Post by feyd »

ok, so it's not one giant regex.. after some thinking, that'd just be too crazy and probably eat the processor.. so here's a more simple subset of regex that get it done.

Code: Select all

<?php

class DNAUnitTest extends UnitTestCase
{
	public function __construct($aName = null)
	{
		if ($aName === null)
		{
			$name = preg_split('#([^A-Za-z]+|[A-Z]?[a-z]+)#', get_class($this), -1, PREG_SPLIT_DELIM_CAPTURE);
			$name = array_map('trim', $name);
			$name = array_filter($name);
			$name = implode(' ', $name);
		}
		else
		{
			$name = strval($aName);
		}
		
		parent::UnitTestCase($name);
	}
}

function isValidIP($aIP)
{
	$hex = '[0-9a-fA-F]';
	$blk = '(?:' . $hex . '{1,4})';
	$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';			//	/0 - /128
	$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; 	//	0-255
	$ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
	
	//	standard IPv4 quick check.
	if (preg_match('#^' . $ip4 . '$#s', $aIP))
	{
		return true;
	}
	
	//	prefix check
	if (strpos($aIP, '/') !== false)
	{
		if (preg_match('#' . $pre . '$#s', $aIP, $find))
		{
			$aIP = substr($aIP, 0, 0-strlen($find[0]));
			unset($find);
		}
		else
		{
			return false;
		}
	}

	//	IPv4-compatiblity check	
	if (preg_match('#(?<=:'.')' . $ip4 . '$#s', $aIP, $find))
	{
		$aIP = substr($aIP, 0, 0-strlen($find[0]));
		$ip = explode('.', $find[0]);
		$ip = array_map('dechex', $ip);
		$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
		unset($find, $ip);
	}
	
	//	compression check
	$aIP = explode('::', $aIP);
	$c = count($aIP);
	if ($c > 2)
	{
		return false;
	}
	elseif ($c == 2)
	{
		list($first, $second) = $aIP;
		$first = explode(':', $first);
		$second = explode(':', $second);
		
		if (count($first) + count($second) > 
		{
			return false;
		}
		
		while(count($first) < 
		{
			array_push($first, '0');
		}

		array_splice($first, 8 - count($second), 8, $second);
		$aIP = $first;
		unset($first,$second);
	}
	else
	{
		$aIP = explode(':', $aIP[0]);
	}
	$c = count($aIP);
	
	if ($c != 
	{
		return false;
	}
	
	//	All the pieces should be 16-bit hex strings. Are they?
	foreach ($aIP as $piece)
	{
		if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
		{
			return false;
		}
	}

	return true;
}

class TestIPAddress extends DNAUnitTest
{
	public function testIsValidIP()
	{
		$this->AssertTrue( isValidIP('2001:DB8:0:0:8:800:200C:417A'));					//	unicast, full
		$this->AssertTrue( isValidIP('FF01:0:0:0:0:0:0:101'));							//	multicast, full
		$this->AssertTrue( isValidIP('0:0:0:0:0:0:0:1'));								//	loopback, full
		$this->AssertTrue( isValidIP('0:0:0:0:0:0:0:0'));								//	unspecified, full
		$this->AssertTrue( isValidIP('2001:DB8::8:800:200C:417A'));						//	unicast, compressed
		$this->AssertTrue( isValidIP('FF01::101'));										//	multicast, compressed
		$this->AssertTrue( isValidIP('::1'));											//	loopback, compressed, non-routable
		$this->AssertTrue( isValidIP('::'));											//	unspecified, compressed, non-routable
		$this->AssertTrue( isValidIP('0:0:0:0:0:0:13.1.68.3'));							//	IPv4-compatible IPv6 address, full, deprecated
		$this->AssertTrue( isValidIP('0:0:0:0:0:FFFF:129.144.52.38'));					//	IPv4-mapped IPv6 address, full
		$this->AssertTrue( isValidIP('::13.1.68.3'));									//	IPv4-compatible IPv6 address, compressed, deprecated
		$this->AssertTrue( isValidIP('::FFFF:129.144.52.38'));							//	IPv4-mapped IPv6 address, compressed
		$this->AssertTrue( isValidIP('2001:0DB8:0000:CD30:0000:0000:0000:0000/60'));	//	full, with prefix
		$this->AssertTrue( isValidIP('2001:0DB8::CD30:0:0:0:0/60'));					//	compressed, with prefix
		$this->AssertTrue( isValidIP('2001:0DB8:0:CD30::/60'));							//	compressed, with prefix #2
		$this->AssertTrue( isValidIP('::/128'));										//	compressed, unspecified address type, non-routable
		$this->AssertTrue( isValidIP('::1/128'));										//	compressed, loopback address type, non-routable
		$this->AssertTrue( isValidIP('FF00::/8'));										//	compressed, multicast address type
		$this->AssertTrue( isValidIP('FE80::/10'));										//	compressed, link-local unicast, non-routable
		$this->AssertTrue( isValidIP('FEC0::/10'));										//	compressed, site-local unicast, deprecated
		$this->AssertTrue( isValidIP('127.0.0.1'));										//	standard IPv4, loopback, non-routable
		$this->AssertTrue( isValidIP('0.0.0.0'));										//	standard IPv4, unspecified, non-routable
		$this->AssertTrue( isValidIP('255.255.255.255'));								//	standard IPv4
		$this->AssertFalse(isValidIP('300.0.0.0'));										//	standard IPv4, out of range
		$this->AssertFalse(isValidIP('124.15.6.89/60'));								//	standard IPv4, prefix not allowed
		$this->AssertFalse(isValidIP('2001:DB8:0:0:8:800:200C:417A:221'));				//	unicast, full
		$this->AssertFalse(isValidIP('FF01::101::2'));									//	multicast, compressed
		$this->AssertFalse(isValidIP(''));												//	nothing
	}
}

include '../3rdParty/SimpleTest/unit_tester.php';
include '../3rdParty/SimpleTest/reporter.php';

$test = new TestIPAddress();
$test->run(new HTMLReporter());

?>
User avatar
Ambush Commander
DevNet Master
Posts: 3698
Joined: Mon Oct 25, 2004 9:29 pm
Location: New Jersey, US

Post by Ambush Commander »

Wow... that'll take a while to digest. So the original ABNF was fundamentally flawed?
User avatar
feyd
Neighborhood Spidermoddy
Posts: 31559
Joined: Mon Mar 29, 2004 3:24 pm
Location: Bothell, Washington, USA

Post by feyd »

There's something goofy with it, but I can't place my finger on it right now. Adding it's code (and fixing a tiny bit of its code) it fails several of my tests.

I'll poke at it for a bit more time.

Oh, almost forgot to mention, with a minor tweak, my version will return the fully expanded IPv6 string. Another minor addition and it will also be able to return a compressed version. (Both without the IPv4 compatibility need.)
User avatar
feyd
Neighborhood Spidermoddy
Posts: 31559
Joined: Mon Mar 29, 2004 3:24 pm
Location: Bothell, Washington, USA

Post by feyd »

Okay, so other than the IPv4 and prefix tests, this appears to work.

Code: Select all

function isValidIPv6($ip) {
	$ip = (string) $ip; // sanity check
	$HEXDIG = '[A-Fa-f0-9]';
	$h16 = "{$HEXDIG}{1,4}";
	$dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|[0-9])';
	$IPv4address = "$dec_octet\\.$dec_octet\\.$dec_octet\\.$dec_octet";
	$ls32 = "(?:$h16:$h16|$IPv4address)";
	$IPv6Address = 
		"(?:(?:{$IPv4address})|(?:".
									  "(?:$h16:){6}$ls32" .
								   "|::(?:$h16:){5}$ls32" .
						  "|(?:$h16)?::(?:$h16:){4}$ls32" .
			"|(?:(?:$h16:){0,1}$h16)?::(?:$h16:){3}$ls32" .
			"|(?:(?:$h16:){0,2}$h16)?::(?:$h16:){2}$ls32" .
			"|(?:(?:$h16:){0,3}$h16)?::(?:$h16:){1}$ls32" .
			"|(?:(?:$h16:){0,4}$h16)?::$ls32" .
			"|(?:(?:$h16:){0,5}$h16)?::$h16" .
			"|(?:(?:$h16:){0,6}$h16)?::" .
		")(?:\\/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))?)";
	$regex = "/^$IPv6Address\$/";
	return (bool)preg_match($regex, $ip);
}


	public function testIsValidIPv6()
	{
		$this->AssertTrue( isValidIPv6('2001:DB8:0:0:8:800:200C:417A'));				//	unicast, full
		$this->AssertTrue( isValidIPv6('FF01:0:0:0:0:0:0:101'));						//	multicast, full
		$this->AssertTrue( isValidIPv6('0:0:0:0:0:0:0:1'));								//	loopback, full
		$this->AssertTrue( isValidIPv6('0:0:0:0:0:0:0:0'));								//	unspecified, full
		$this->AssertTrue( isValidIPv6('2001:DB8::8:800:200C:417A'));					//	unicast, compressed
		$this->AssertTrue( isValidIPv6('FF01::101'));									//	multicast, compressed
		$this->AssertTrue( isValidIPv6('::1'));											//	loopback, compressed, non-routable
		$this->AssertTrue( isValidIPv6('::'));											//	unspecified, compressed, non-routable
		$this->AssertTrue( isValidIPv6('0:0:0:0:0:0:13.1.68.3'));						//	IPv4-compatible IPv6 address, full, deprecated
		$this->AssertTrue( isValidIPv6('0:0:0:0:0:FFFF:129.144.52.38'));				//	IPv4-mapped IPv6 address, full
		$this->AssertTrue( isValidIPv6('::13.1.68.3'));									//	IPv4-compatible IPv6 address, compressed, deprecated
		$this->AssertTrue( isValidIPv6('::FFFF:129.144.52.38'));						//	IPv4-mapped IPv6 address, compressed
		$this->AssertTrue( isValidIPv6('2001:0DB8:0000:CD30:0000:0000:0000:0000/60'));	//	full, with prefix
		$this->AssertTrue( isValidIPv6('2001:0DB8::CD30:0:0:0:0/60'));					//	compressed, with prefix
		$this->AssertTrue( isValidIPv6('2001:0DB8:0:CD30::/60'));						//	compressed, with prefix #2
		$this->AssertTrue( isValidIPv6('::/128'));										//	compressed, unspecified address type, non-routable
		$this->AssertTrue( isValidIPv6('::1/128'));									//	compressed, loopback address type, non-routable
		$this->AssertTrue( isValidIPv6('FF00::/8'));									//	compressed, multicast address type
		$this->AssertTrue( isValidIPv6('FE80::/10'));									//	compressed, link-local unicast, non-routable
		$this->AssertTrue( isValidIPv6('FEC0::/10'));									//	compressed, site-local unicast, deprecated
		$this->AssertTrue( isValidIPv6('127.0.0.1'));									//	standard IPv4, loopback, non-routable
		$this->AssertTrue( isValidIPv6('0.0.0.0'));										//	standard IPv4, unspecified, non-routable
		$this->AssertTrue( isValidIPv6('255.255.255.255'));								//	standard IPv4
		$this->AssertFalse(isValidIPv6('300.0.0.0'));									//	standard IPv4, out of range
		$this->AssertFalse(isValidIPv6('124.15.6.89/60'));								//	standard IPv4, prefix not allowed
		$this->AssertFalse(isValidIPv6('2001:DB8:0:0:8:800:200C:417A:221'));			//	unicast, full
		$this->AssertFalse(isValidIPv6('FF01::101::2'));								//	multicast, compressed
		$this->AssertFalse(isValidIPv6(''));											//	nothing
		$this->AssertFalse(isValidIPv6(':127.0.0.1'));									//	bad IPv4
		$this->AssertFalse(isValidIPv6(':BAF:ABF:ABEF'));								//	bad IPv6
		$this->AssertFalse(isValidIPv6(':ABF:ABEF'));									//	bad IPv6
		$this->AssertFalse(isValidIPv6('ABF:ABEF'));									//	bad IPv6
	}

There were several logical errors in the regex.


As an aside, I cannot recommend using this regex version. It's a bit difficult to test, or rather debug. So I suggest using the more straight forward version I posted earlier.

edit: speedwise, mine and isValidIPv6() (with IPv4 and prefix support added) tag nearly the same amount of time to process. Mine runs anywhere from 0.015 to 0.003 seconds slower in PHP 5.1.2 / Apache 2.0.55.

I've updated the code in this post to include IPv4 and prefix lookup too.
User avatar
Ollie Saunders
DevNet Master
Posts: 3179
Joined: Tue May 24, 2005 6:01 pm
Location: UK

Post by Ollie Saunders »

Some real teamwork is going on here. It's beautiful to watch :)
Post Reply