Code: Select all
<?php
$words = explode(" ", $string_of_words);
?>Now, what if the word keeps it's space in if it is in quotes, ie:
Code: Select all
this is a "bunch of" wordsCode: Select all
this
is
a
bunch of
wordsModerator: General Moderators
Code: Select all
<?php
$words = explode(" ", $string_of_words);
?>Code: Select all
this is a "bunch of" wordsCode: Select all
this
is
a
bunch of
wordsCode: Select all
error_reporting(E_ALL);
function explodeSpacesQuotes($text) {
$curLocation = 0;
$quotes = false;
$array = array();
while(strpos($text,'"',$curLocation) !== false) {
$quote_location = strpos($text,'"',$curLocation);
$before = substr($text,$curLocation,$quote_location - $curLocation);
if ($quotes) {
$array[] = trim($before);
} else {
$temp = explode(' ',trim($before));
$array = array_merge($array,$temp);
}
$curLocation = $quote_location + 1;
$quotes = ! $quotes;
}
//kill empty elements
foreach($array as $key => $value) {
if (empty($value)) {
array_splice($array,$key,1);
}
}
return $array;
}
var_dump(explodeSpacesQuotes('bang this "Foo bar" "Bang Bling" foo "Boo"'));Code: Select all
$string = 'Look at the "string with space in it", I bet it stays like a string! And this on contains a quote itself "String with a \" in it!"';
preg_match_all('/(?:(?<!\\\\)\'.+?(?<!\\\\)\')|(?:(?<!\\\\)\".+?(?<!\\\\)\")|\w+/', $string, $matches);
print_r($matches[0]);
/*
Array
(
[0] => Look
[1] => at
[2] => the
[3] => "string with space in it"
[4] => I
[5] => bet
[6] => it
[7] => stays
[8] => like
[9] => a
[10] => string
[11] => And
[12] => this
[13] => on
[14] => contains
[15] => a
[16] => quote
[17] => itself
[18] => "String with a \" in it!"
)
*/Code: Select all
error_reporting(E_ALL);
function explodeSpacesQuotes($text) {
$curLocation = 0;
$quotes = false;
$array = array();
while(strpos($text,'"',$curLocation) !== false) {
$quote_location = strpos($text,'"',$curLocation);
while ($text{$quote_location - 1} == '\\') {
$quote_location = strpos($text,'"',$quote_location+1);
}
$before = stripslashes(substr($text,$curLocation,$quote_location - $curLocation));
if ($quotes) {
$array[] = trim($before);
} else {
$temp = explode(' ',trim($before));
$array = array_merge($array,$temp);
}
$curLocation = $quote_location + 1;
$quotes = ! $quotes;
}
//kill empty elements
foreach($array as $key => $value) {
if (empty($value)) {
array_splice($array,$key,1);
}
}
return $array;
}
var_dump(explodeSpacesQuotes('bang this "Fo \"o bar" "Bang Bling" foo "Boo"'));Code: Select all
/**
* Finds the next quote specified.
*
* If the quotes parameter is set to a quote, then it will simply search for
* that quote and then return its location. If it's set to false, it'll find
* the first " or ' it finds. The algorithm used here is not very efficient
* for many comparisons (like find the first a, e, s, 9, 3, or j) but is good
* for only two (otherwise, use preg_match).
*
* This function determines which quotes are used by the next function,
* explodeSpacesQuotes, so you can edit it to change the quote set.
*
* @param string $text haystack
* @param mixed $quotes quote to look for or false for both
* @param int $offset offset
*/
function nextQuote($text,$quotes = false,$offset = 0) {
if ($quotes == '"') { //first, we handle the easy cases: wrapper functions
$quote_location = strpos($text,'"',$offset);
} elseif ($quotes == "'") {
$quote_location = strpos($text,"'",$offset);
} else { //Whichever quote comes first is the ONE! Hardest thing about
//this part is false becomes 0, and messes up the comparisons.
$double_quote_location = strpos($text,'"',$offset);
$single_quote_location = strpos($text,"'",$offset);
if ($double_quote_location === false &&
$single_quote_location === false) {
$quote_location = false;
} elseif ($double_quote_location !== false &&
$single_quote_location === false) {
$quote_location = $double_quote_location;
} elseif ($single_quote_location !== false &&
$double_quote_location === false) {
$quote_location = $single_quote_location;
} else {
$quote_location = $single_quote_location < $double_quote_location ?
$single_quote_location : $double_quote_location;
}
}
return $quote_location;
}
/**
* Explodes a string, noting quotes and backslash escaping.
*
* If this where your regular exploder, you'd be better off using explode(). But
* this is special. It is great for your search term needs (although you'd
* probably also want to add support for boolean operators and etc.) On the
* simplest level, it groups text inside quotes together, so "text 'ping pong'"
* comes out as array('text','ping pong'). When inside quotes, it ignores any
* quotes of the other type (so "'ping \" pong'" will stay one chunk) until it
* reaches the first non-escaped ending quote of the same type. If a quote is
* not closed, such as a "doesn't", it will be merged back in, for example:
*
* <code>
* $text = "This isn't closed.";
* print_r(explodeSpacesquotes($text)); //returns array('This','isn't','closed')
* </code>
*
* Which quotes it uses are entirely dependent on the nextQuote() function, so
* by editing it you can, say, switch to using only " quotes, or add your own
* quote marks.
*/
function explodeSpacesQuotes($text) {
$curLocation = 0; //Location on text, primarily offset for nextQuote()
$quotes = false; //Current state of a quotation. Can be false, " or '
$array = array(); //The array we return
while(nextQuote($text,$quotes,$curLocation) !== false) {
//Go through all quotes, this way we "chunkify" the output
$quote_location = nextQuote($text,$quotes,$curLocation);
//Backslash handling, requires a backwards step to make sure it's not
//an even number of backslashes (if it is, no effect). If there is an
//effect, loop back and check the next quote.
while ($quote_location > 0 && $text{$quote_location - 1} == '\\') {
$slash_location = $quote_location - 1;
$backslashes = 0;
while ($slash_location >= 0 && $text{$slash_location} == '\\') {
$backslashes++;
$slash_location--;
}
if ($backslashes % 2 == 0) {
break;
} else {
$quote_location = nextQuote($text,$quotes,$quote_location+1);
}
}
//Okay, now chunk out the section between our offset and our quote
$before = stripslashes(substr($text,$curLocation,
$quote_location - $curLocation));
//If there quoted, put the string in whole and then switch quotes off
if ($quotes !== false) {
if (trim($before) !== '') {
$array[] = trim($before);
}
$quotes = false;
//Otherwise, explode it and then merge in.
} else {
if (trim($before) !== '') {
$temp = explode(' ',trim($before));
$array = array_merge($array,$temp);
}
$quotes = $text{$quote_location};
}
//Set offset for next iteration
$curLocation = $quote_location + 1;
}
//This is post-loop processing, handling the stuff that might have gotten
//left behind, particularly 'keyword "quote" this stuff isnt picked up.'.
//Almost the same thing as above, but with some slight variable changes.
//See above documentation
if ($curLocation < $length = strlen($text)) {
$before = stripslashes(substr($text,$curLocation,$length-$curLocation));
$temp = explode(' ',trim($before));
if ($quotes !== false) {
$array[sizeof($array) - 1] .= $quotes . $temp[0];
array_shift($temp);
}
$array = array_merge($array,$temp);
$quotes = $text{$quote_location};
}
//Yay, we're done.
return $array;
}
$text = 'bang th\\\\is \\\\"Fo \\"o bar" \\\\\\"B"ang Bling" foo "Boo" foo';
var_dump($text);
var_dump(explodeSpacesQuotes($text));
$text = 'Extra \\s backslashes where they shouldn\'t be, plus an unterminated quote.';
var_dump($text);
var_dump(explodeSpacesQuotes($text));
$text = '"crap car" yuck';
var_dump($text);
var_dump(explodeSpacesQuotes($text));Code: Select all
<?php
// ...Ambush Commander's code was here
function splitSpacesQuotes($string)
{
$result = preg_split('#((?<!\\\\)(?:\\\\\\\\)*(["\']).*?(?<!\\\\)(?:\\\\\\\\)*\\2)|((?<=^|\s)(?!=["\'])(.+?)(?!=["\'])(?=\s|$))#s', $string, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
$c = 0;
$final = array();
foreach($result as $k => $v)
{
if(!ctype_space($v) and (++$c % 2))
{
$final[] = $v;
}
}
return preg_replace('#^(["\'])(.*?)\\1$#s','\\2',$final);
}
$test[] = 'this is a "bunch of" words';
$test[] = 'this is a "test string" some escaped "quote \\" magic" along with handling "escapes \\\\" arf "arf';
$test[] = 'bang th\\\\is \\\\"Fo \\"o bar" \\\\\\"B"ang Bling" foo "Boo" foo';
$test[] = 'Extra \\s backslashes where they shouldn\'t be, plus an unterminated quote.';
$loop = 1000;
foreach($test as $t)
{
$amStart = microtime();
for($i = 0; $i < $loop; $i++)
{
$ambush = explodeSpacesQuotes($t);
}
$amStop = microtime();
$amStart = explode(' ',$amStart);
$amStop = explode(' ',$amStop);
$amTime = $amStop[0] - $amStart[0] + $amStop[1] - $amStart[1];
$fStart = microtime();
for($i = 0; $i < $loop; $i++)
{
$feyd = splitSpacesQuotes($t);
}
$fStop = microtime();
$fStart = explode(' ',$fStart);
$fStop = explode(' ',$fStop);
$fTime = $fStop[0] - $fStart[0] + $fStop[1] - $fStart[1];
echo '
Input:
'.$t.'
Ambush Commander: '.number_format($amTime,8).' seconds for '.$loop.' iterations ( '.number_format($amTime / $loop, .' second average )
'.var_export($ambush,true).'
feyd: '.number_format($fTime,8).' seconds for '.$loop.' iterations ( '.number_format($fTime / $loop, .' second average )
'.var_export($feyd,true).'
';
}
?>Code: Select all
Input:
this is a "bunch of" words
Ambush Commander: 0.13933206 seconds for 1000 iterations ( 0.00013933 second average )
array (
0 => 'this',
1 => 'is',
2 => 'a',
3 => 'bunch of',
4 => 'words',
)
feyd: 0.26041794 seconds for 1000 iterations ( 0.00026042 second average )
array (
0 => 'this',
1 => 'is',
2 => 'a',
3 => 'bunch of',
4 => 'words',
)
Input:
this is a "test string" some escaped "quote \" magic" along with handling "escapes \\" arf "arf
Ambush Commander: 0.44074011 seconds for 1000 iterations ( 0.00044074 second average )
array (
0 => 'this',
1 => 'is',
2 => 'a',
3 => 'test string',
4 => 'some',
5 => 'escaped',
6 => 'quote " magic',
7 => 'along',
8 => 'with',
9 => 'handling',
10 => 'escapes \\',
11 => 'arf"arf',
)
feyd: 0.73889494 seconds for 1000 iterations ( 0.00073889 second average )
array (
0 => 'this',
1 => 'is',
2 => 'a',
3 => 'test string',
4 => 'some',
5 => 'escaped',
6 => 'quote \\" magic',
7 => 'along',
8 => 'with',
9 => 'handling',
10 => 'escapes \\\\',
11 => 'arf',
12 => '"arf',
)
Input:
bang th\\is \\"Fo \"o bar" \\\"B"ang Bling" foo "Boo" foo
Ambush Commander: 0.41683507 seconds for 1000 iterations ( 0.00041684 second average )
array (
0 => 'bang',
1 => 'th\\is',
2 => '\\',
3 => 'Fo "o bar',
4 => '\\"B',
5 => 'ang Bling',
6 => 'foo',
7 => 'Boo',
8 => 'foo',
)
feyd: 0.45822906 seconds for 1000 iterations ( 0.00045823 second average )
array (
0 => 'bang',
1 => 'th\\\\is',
2 => '\\\\"Fo \\"o bar"',
3 => '\\\\\\"B"ang',
4 => 'Bling"',
5 => 'foo',
6 => 'Boo',
7 => 'foo',
)
Input:
Extra \s backslashes where they shouldn't be, plus an unterminated quote.
Ambush Commander: 0.13459802 seconds for 1000 iterations ( 0.00013460 second average )
array (
0 => 'Extra',
1 => 's',
2 => 'backslashes',
3 => 'where',
4 => 'they',
5 => 'shouldn\'t',
6 => 'be,',
7 => 'plus',
8 => 'an',
9 => 'unterminated',
10 => 'quote.',
)
feyd: 0.58185506 seconds for 1000 iterations ( 0.00058186 second average )
array (
0 => 'Extra',
1 => '\\s',
2 => 'backslashes',
3 => 'where',
4 => 'they',
5 => 'shouldn\'t',
6 => 'be,',
7 => 'plus',
8 => 'an',
9 => 'unterminated',
10 => 'quote.',
)