loadXML demands UTF-8 encoding, why?

PHP programming forum. Ask questions or help people concerning PHP code. Don't understand a function? Need help implementing a class? Don't understand a class? Here is where to ask. Remember to do your homework!

Moderator: General Moderators

Post Reply
wvxvw
Forum Newbie
Posts: 22
Joined: Sat May 17, 2008 10:55 am

loadXML demands UTF-8 encoding, why?

Post by wvxvw »

Hi, mb, someone can help me with this...
I'm trying to save XML file with encoding, that depends on the parameter it gets from the client. The code looks like this:

Code: Select all

<?php
class Receiver extends DOMDocument {
    public static $US_ASCII = "us-ascii";
    public static $US_ASCII_PHP = "cp1252";
    
    public static $UTF_8 = "utf-8";
    public static $UTF_8_PHP = "utf-8";
    
    public static $ISO_8859_5 = "iso-8859-5";
    public static $ISO_8859_5_PHP = "ISO-8859-5";
    
    public static $KOI8_R = "koi8-r";
    public static $KOI8_R_PHP = "koi8-r";
    
    public static $WINDOWS_1251 = "windows-1251";
    public static $WINDOWS_1251_PHP = "cp1251";
    
    public $source;
    public $enc;
    
    function __construct() {
        if(isset($GLOBALS["HTTP_RAW_POST_DATA"])){
            preg_match("/^[\w\d\-]+/", $GLOBALS["HTTP_RAW_POST_DATA"], $encodings);
            switch($encodings[0]) {
                case Receiver::$UTF_8:
                    $this->enc = Receiver::$$UTF_8_PHP;
                    break;
                case Receiver::$US_ASCII:
                    $this->enc = Receiver::$US_ASCII_PHP;
                    break;
                case Receiver::$ISO_8859_5:
                    $this->enc = Receiver::$ISO_8859_5_PHP;
                case Receiver::$KOI8_R:
                    break;
                case Receiver::$WINDOWS_1251:
                    $this->enc = Receiver::$WINDOWS_1251_PHP;
                    break;
                default:
                    exit("unrecognised encoding");
            }
            //var_dump(iconv_get_encoding('all'));
            //exit();
            iconv_set_encoding("input_encoding", $this->enc);
            iconv_set_encoding("internal_encoding", $this->enc);
            parent::__construct("1.0", $this->enc);
            preg_match("/<.+/s", $GLOBALS["HTTP_RAW_POST_DATA"], $encodings);
            $this->source = join("", $encodings);
            //$this->source = iconv($this->enc, Receiver::$UTF_8_PHP, $this->source);
            $this->loadXML($this->source);
        } else {
            $nodata = $this->appendChild($this->createElement("noData"));
            $nodata->appendChild($this->createTextNode("No data posted!"));
        }
    }
    /**
     * @return string
     */
    function toString() {
        $this->formatOutput = true;
        return $this->saveXML();
    }
}
?>
But I'm getting error in this line $this->loadXML($this->source); which sais $this->source has to be in UTF-8 encoding. Aparently, it's not, and I don't want it to be UTF-8. Can I use other encodings with DOMDocument? (If I uncomment the line before it ($this->source = iconv($this->enc, Receiver::$UTF_8_PHP, $this->source);) it'll agree to parse the XML, but, naturally, it'll be gibrish...

Here's the code, that generates the POST data I'm trying to convert. But it should be OK...

Code: Select all

/**
* ...
* @author wvxvw
*/
package  org.wvxvw.phputils
{
    import flash.events.EventDispatcher;
    import flash.events.Event;
    import flash.events.IOErrorEvent;
    import flash.events.SecurityErrorEvent;
    import flash.net.URLLoader;
    import flash.net.URLRequest;
    import flash.net.URLLoaderDataFormat;
    import flash.net.URLRequestMethod;
    import flash.net.URLRequestHeader;
    import flash.utils.ByteArray;
    import flash.utils.Endian;
    
    public class XMLSender extends EventDispatcher
    {
        public static const US_ASCII:String = 'us-ascii';
        public static const UTF_8:String = 'utf-8';
        public static const ISO_8859_5:String = 'iso-8859-5';
        public static const KOI8_R:String = 'koi8-r';
        public static const WINDOWS_1251:String = 'windows-1251';
        
        private var ur:URLRequest;
        private var ul:URLLoader;
        private var ba:ByteArray;
        private var xml:XML;
        private var encodedString:String;
        
        public function XMLSender() 
        {
            super();
            ul = new URLLoader();
            ul.dataFormat = URLLoaderDataFormat.BINARY;
            ul.addEventListener(Event.COMPLETE, handleComplete);
            ul.addEventListener(IOErrorEvent.IO_ERROR, handleIOError);
            ul.addEventListener(SecurityErrorEvent.SECURITY_ERROR, handleSecurityError);
        }
        public function sendXML(source:Object, url:String, encoding:String = UTF_8, endian:String = Endian.LITTLE_ENDIAN):void
        {
            try
            {
                xml = new XML(source);
            } catch (e:Error) {
                trace('unable to convert to XML');
            }
            encodedString = xml.toXMLString();
            ur = new URLRequest(url);
            var h:URLRequestHeader = new URLRequestHeader('Content-Type', 'application/octet-stream');
            ur.method = URLRequestMethod.POST;
            ur.requestHeaders.push(h);
            ba = new ByteArray();
            ba.endian = endian;
            trace(encodedString);
            ba.writeMultiByte(encoding, US_ASCII);
            ba.position = 20;
            ba.writeMultiByte(encodedString, encoding);
            ba.position = 0;
            ur.data = ba;
            ul.load(ur);
        }
        public function handleComplete(evt:Event):void
        {
            trace('complete', ul.data);
        }
        public function handleIOError(evt:Event):void
        {
            trace('IO', evt);
        }
        public function handleSecurityError(evt:Event):void
        {
            trace('security', evt);
        }
    }
    
}
Post Reply