Page 1 of 1

PHP Syntax highlighter

Posted: Thu Sep 23, 2004 9:04 am
by vigge89
I'm looking for a PHP syntax highlighter, which doesn't use highlight_string() or highlight_file(). The reason is that I want to make my own highlighter, and I need something to base it on.
What i'm looking for is preg_replace code (or something similar) to add html-code to variables, functions, strings, comments, html, etc.
I've looked on google, but couldn't find anything.

Thanks in advance.

Posted: Thu Sep 23, 2004 11:46 am
by feyd
maybe looking at the source to highlight_string would be best?

Posted: Thu Sep 23, 2004 12:01 pm
by timvw
try searching source2html or php2html :)

Posted: Thu Sep 23, 2004 12:38 pm
by vigge89
feyd wrote:maybe looking at the source to highlight_string would be best?
problem is, I can't find it :(
[php_man]highlight_string[/php_man]()

timvw: couldn't find anything intresting :(
or well, I find a cool highlighter (http://www.faebu.ch/phpcc/), but there was no download or source code for it :(

Posted: Thu Sep 23, 2004 1:38 pm
by feyd
well.. you can download php's source code.. and find the function in there...

Posted: Thu Sep 23, 2004 1:39 pm
by vigge89
ok, i'll see what i can find, i'll let you know :)

Posted: Thu Sep 23, 2004 1:53 pm
by vigge89
Found the source for highlighting i think, but this doesn't give me much of a PHP way of doing it :(

Code: Select all

/* $Id: zend_highlight.c,v 1.43.2.2 2004/08/11 22:38:31 iliaa Exp $ */

#include "zend.h"
#include "zend_language_parser.h"
#include "zend_compile.h"
#include "zend_highlight.h"
#include "zend_ptr_stack.h"
#include "zend_globals.h"

ZEND_API void zend_html_putc(char c)
{
	switch (c) {
		case '\n':
			ZEND_PUTS("<br />");
			break;
		case '<':
			ZEND_PUTS("<");
			break;
		case '>':
			ZEND_PUTS(">");
			break;
		case '&':
			ZEND_PUTS("&");
			break;
		case ' ':
			ZEND_PUTS("&nbsp;");
			break;
		case '\t':
			ZEND_PUTS("&nbsp;&nbsp;&nbsp;&nbsp;");
			break;
		default:
			ZEND_PUTC(c);
			break;
	&#125;
&#125;


ZEND_API void zend_html_puts(const char *s, uint len TSRMLS_DC)
&#123;
	const char *ptr=s, *end=s+len;

#ifdef ZEND_MULTIBYTE
	char *filtered;
	int filtered_len;

	if (LANG_SCNG(output_filter)) &#123;
		LANG_SCNG(output_filter)(&filtered, &filtered_len, s, len TSRMLS_CC);
		ptr = filtered;
		end = filtered + filtered_len;
	&#125;
#endif /* ZEND_MULTIBYTE */
	
	while (ptr<end) &#123;
		if (*ptr==' ') &#123;
			/* Series of spaces should be displayed as &nbsp;'s
			 * whereas single spaces should be displayed as a space
			 */
			if ((ptr+1) < end && *(ptr+1)==' ') &#123;
				do &#123;
					zend_html_putc(*ptr);
				&#125; while ((++ptr < end) && (*ptr==' '));
			&#125; else &#123;
				ZEND_PUTC(*ptr);
				ptr++;
			&#125;
		&#125; else &#123;
			zend_html_putc(*ptr++);
		&#125;
	&#125;

#ifdef ZEND_MULTIBYTE
	if (LANG_SCNG(output_filter)) &#123;
		efree(filtered);
	&#125;
#endif /* ZEND_MULTIBYTE */
&#125;



ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
&#123;
	zval token;
	int token_type;
	char *last_color = syntax_highlighter_ini->highlight_html;
	char *next_color;
	int in_string=0;

	zend_printf("<code>");
	zend_printf("<span style="color: %s">\n", last_color);
	/* highlight stuff coming back from zendlex() */
	token.type = 0;
	while ((token_type=lex_scan(&token TSRMLS_CC))) &#123;
		switch (token_type) &#123;
			case T_INLINE_HTML:
				next_color = syntax_highlighter_ini->highlight_html;
				break;
			case T_COMMENT:
			case T_DOC_COMMENT:
				next_color = syntax_highlighter_ini->highlight_comment;
				break;
			case T_OPEN_TAG:
			case T_OPEN_TAG_WITH_ECHO:
				next_color = syntax_highlighter_ini->highlight_default;
				break;
			case T_CLOSE_TAG:
				next_color = syntax_highlighter_ini->highlight_default;
				break;
			case T_CONSTANT_ENCAPSED_STRING:
				next_color = syntax_highlighter_ini->highlight_string;
				break;
			case '"':
				next_color = syntax_highlighter_ini->highlight_string;
				in_string = !in_string;
				break;				
			case T_WHITESPACE:
				zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC);  /* no color needed */
				token.type = 0;
				continue;
				break;
			default:
				if (in_string) &#123;
					next_color = syntax_highlighter_ini->highlight_string;
				&#125; else if (token.type == 0) &#123;
					next_color = syntax_highlighter_ini->highlight_keyword;
				&#125; else &#123;
					next_color = syntax_highlighter_ini->highlight_default;
				&#125;
				break;
		&#125;

		if (last_color != next_color) &#123;
			if (last_color != syntax_highlighter_ini->highlight_html) &#123;
				zend_printf("</span>");
			&#125;
			last_color = next_color;
			if (last_color != syntax_highlighter_ini->highlight_html) &#123;
				zend_printf("<span style="color: %s">", last_color);
			&#125;
		&#125;
		switch (token_type) &#123;
			case T_END_HEREDOC:
				zend_html_puts(token.value.str.val, token.value.str.len TSRMLS_CC);
				&#123;
					char *ptr = LANG_SCNG(yy_text);
					if (ptr&#1111;LANG_SCNG(yy_leng) - 1] != ';') &#123;
						zend_html_putc('\n');
					&#125;
				&#125;
				break;
			default:
				zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC);
				break;
		&#125;

		if (token.type == IS_STRING) &#123;
			switch (token_type) &#123;
				case T_OPEN_TAG:
				case T_OPEN_TAG_WITH_ECHO:
				case T_CLOSE_TAG:
				case T_WHITESPACE:
				case T_COMMENT:
				case T_DOC_COMMENT:
					break;
				default:
					efree(token.value.str.val);
					break;
			&#125;
		&#125; else if (token_type == T_END_HEREDOC) &#123;
			efree(token.value.str.val);
		&#125;
		token.type = 0;
	&#125;
	if (last_color != syntax_highlighter_ini->highlight_html) &#123;
		zend_printf("</span>\n");
	&#125;
	zend_printf("</span>\n");
	zend_printf("</code>");
&#125;



ZEND_API void zend_strip(TSRMLS_D)
&#123;
	zval token;
	int token_type;
	int prev_space = 0;

	token.type = 0;
	while ((token_type=lex_scan(&token TSRMLS_CC))) &#123;
		switch (token_type) &#123;
			case T_WHITESPACE:
				if (!prev_space) &#123;
					zend_write(" ", sizeof(" ") - 1);
					prev_space = 1;
				&#125;
						/* lack of break; is intentional */
			case T_COMMENT:
			case T_DOC_COMMENT:
				token.type = 0;
				continue;
			
			case T_END_HEREDOC: &#123;
					char *ptr = LANG_SCNG(yy_text);

					zend_write(ptr, LANG_SCNG(yy_leng) - 1);
					/* The ensure that we only write one ; and that it followed by the required newline */
					zend_write("\n", sizeof("\n") - 1);
					if (ptr&#1111;LANG_SCNG(yy_leng) - 1] == ';') &#123;
						lex_scan(&token TSRMLS_CC);
					&#125;
					efree(token.value.str.val);
				&#125;
				break;
			
			default:
				zend_write(LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
				break;
		&#125;

		if (token.type == IS_STRING) &#123;
			switch (token_type) &#123;
				case T_OPEN_TAG:
				case T_OPEN_TAG_WITH_ECHO:
				case T_CLOSE_TAG:
				case T_WHITESPACE:
				case T_COMMENT:
				case T_DOC_COMMENT:
					break;

				default:
					efree(token.value.str.val);
					break;
			&#125;
		&#125;
		prev_space = token.type = 0;
	&#125;
&#125;

/*
 * Local variables:
 * tab-width: 4
 * c-basic-offset: 4
 * indent-tabs-mode: t
 * End:
 */

Posted: Thu Sep 23, 2004 2:02 pm
by feyd
looks fairly straight forward to me ;) :P

Posted: Thu Sep 23, 2004 2:19 pm
by vigge89
so, then, how could i do it in PHP?
do I have to call some sort of exefile? :S

Posted: Fri Sep 24, 2004 8:54 am
by timvw
You can find a complete parser source:

http://www.akbkhome.com/svn/akpear/PHP_Parser/

Posted: Fri Sep 24, 2004 10:24 am
by vigge89
eh, just a question, what is that? :S :(
I don't get a thing what all the files mean, or what it does.
Call me dumb or if you want, but what am i supposed to do with all that code ><
sorry for letting you people do the work if that is what's happening, but I don't have a clue...