diff options
Diffstat (limited to 'includes/xml_parser.php')
-rw-r--r-- | includes/xml_parser.php | 427 |
1 files changed, 427 insertions, 0 deletions
diff --git a/includes/xml_parser.php b/includes/xml_parser.php new file mode 100644 index 0000000..15e02e0 --- /dev/null +++ b/includes/xml_parser.php | |||
@@ -0,0 +1,427 @@ | |||
1 | <?php | ||
2 | /* | ||
3 | * xml_parser.php | ||
4 | * | ||
5 | * @(#) $Header: /home/mlemos/cvsroot/xmlparser/xml_parser.php,v 1.19 2006/11/22 01:25:05 mlemos Exp $ | ||
6 | * | ||
7 | */ | ||
8 | |||
9 | /* | ||
10 | * Parser error numbers: | ||
11 | * | ||
12 | * 1 - Could not create the XML parser | ||
13 | * 2 - Could not parse data | ||
14 | * 3 - Could not read from input stream | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | $xml_parser_handlers=array(); | ||
19 | |||
20 | Function xml_parser_start_element_handler($parser,$name,$attrs) | ||
21 | { | ||
22 | global $xml_parser_handlers; | ||
23 | |||
24 | if(!strcmp($xml_parser_handlers[$parser]->error,"")) | ||
25 | $xml_parser_handlers[$parser]->StartElement($xml_parser_handlers[$parser],$name,$attrs); | ||
26 | } | ||
27 | |||
28 | Function xml_parser_end_element_handler($parser,$name) | ||
29 | { | ||
30 | global $xml_parser_handlers; | ||
31 | |||
32 | if(!strcmp($xml_parser_handlers[$parser]->error,"")) | ||
33 | $xml_parser_handlers[$parser]->EndElement($xml_parser_handlers[$parser],$name); | ||
34 | } | ||
35 | |||
36 | Function xml_parser_character_data_handler($parser,$data) | ||
37 | { | ||
38 | global $xml_parser_handlers; | ||
39 | |||
40 | if(!strcmp($xml_parser_handlers[$parser]->error,"")) | ||
41 | $xml_parser_handlers[$parser]->CharacterData($xml_parser_handlers[$parser],$data); | ||
42 | } | ||
43 | |||
44 | class xml_parser_handler_class | ||
45 | { | ||
46 | var $xml_parser; | ||
47 | var $error_number=0; | ||
48 | var $error=""; | ||
49 | var $error_code=0; | ||
50 | var $error_line,$error_column,$error_byte_index; | ||
51 | var $structure=array(); | ||
52 | var $positions=array(); | ||
53 | var $path=""; | ||
54 | var $store_positions=0; | ||
55 | var $simplified_xml=0; | ||
56 | var $fail_on_non_simplified_xml=0; | ||
57 | |||
58 | Function SetError(&$object,$error_number,$error) | ||
59 | { | ||
60 | $object->error_number=$error_number; | ||
61 | $object->error=$error; | ||
62 | $object->error_line=xml_get_current_line_number($object->xml_parser); | ||
63 | $object->error_column=xml_get_current_column_number($object->xml_parser); | ||
64 | $object->error_byte_index=xml_get_current_byte_index($object->xml_parser); | ||
65 | } | ||
66 | |||
67 | Function SetElementData(&$object,$path,&$data) | ||
68 | { | ||
69 | $object->structure[$path]=$data; | ||
70 | if($object->store_positions) | ||
71 | { | ||
72 | $object->positions[$path]=array( | ||
73 | "Line"=>xml_get_current_line_number($object->xml_parser), | ||
74 | "Column"=>xml_get_current_column_number($object->xml_parser), | ||
75 | "Byte"=>xml_get_current_byte_index($object->xml_parser) | ||
76 | ); | ||
77 | } | ||
78 | } | ||
79 | |||
80 | Function StartElement(&$object,$name,&$attrs) | ||
81 | { | ||
82 | if(strcmp($this->path,"")) | ||
83 | { | ||
84 | $element=$object->structure[$this->path]["Elements"]; | ||
85 | $object->structure[$this->path]["Elements"]++; | ||
86 | $this->path.=",$element"; | ||
87 | } | ||
88 | else | ||
89 | { | ||
90 | $element=0; | ||
91 | $this->path="0"; | ||
92 | } | ||
93 | $data=array( | ||
94 | "Tag"=>$name, | ||
95 | "Elements"=>0 | ||
96 | ); | ||
97 | if($object->simplified_xml) | ||
98 | { | ||
99 | if($object->fail_on_non_simplified_xml | ||
100 | && count($attrs)>0) | ||
101 | { | ||
102 | $this->SetError($object,2,"Simplified XML can not have attributes in tags"); | ||
103 | return; | ||
104 | } | ||
105 | } | ||
106 | else | ||
107 | $data["Attributes"]=$attrs; | ||
108 | $this->SetElementData($object,$this->path,$data); | ||
109 | } | ||
110 | |||
111 | Function EndElement(&$object,$name) | ||
112 | { | ||
113 | $this->path=(($position=strrpos($this->path,",")) ? substr($this->path,0,$position) : ""); | ||
114 | } | ||
115 | |||
116 | Function CharacterData(&$object,$data) | ||
117 | { | ||
118 | $element=$object->structure[$this->path]["Elements"]; | ||
119 | $previous=$this->path.",".strval($element-1); | ||
120 | if($element>0 | ||
121 | && GetType($object->structure[$previous])=="string") | ||
122 | $object->structure[$previous].=$data; | ||
123 | else | ||
124 | { | ||
125 | $this->SetElementData($object,$this->path.",$element",$data); | ||
126 | $object->structure[$this->path]["Elements"]++; | ||
127 | } | ||
128 | } | ||
129 | }; | ||
130 | |||
131 | class xml_parser_class | ||
132 | { | ||
133 | var $xml_parser=0; | ||
134 | var $parser_handler; | ||
135 | var $error=""; | ||
136 | var $error_number=0; | ||
137 | var $error_line=0; | ||
138 | var $error_column=0; | ||
139 | var $error_byte_index=0; | ||
140 | var $error_code=0; | ||
141 | var $stream_buffer_size=4096; | ||
142 | var $structure=array(); | ||
143 | var $positions=array(); | ||
144 | var $store_positions=0; | ||
145 | var $case_folding=0; | ||
146 | var $target_encoding="ISO-8859-1"; | ||
147 | var $simplified_xml=0; | ||
148 | var $fail_on_non_simplified_xml=0; | ||
149 | |||
150 | Function xml_parser_start_element_handler($parser,$name,$attrs) | ||
151 | { | ||
152 | if(!strcmp($this->error,"")) | ||
153 | $this->parser_handler->StartElement($this,$name,$attrs); | ||
154 | } | ||
155 | |||
156 | Function xml_parser_end_element_handler($parser,$name) | ||
157 | { | ||
158 | if(!strcmp($this->error,"")) | ||
159 | $this->parser_handler->EndElement($this,$name); | ||
160 | } | ||
161 | |||
162 | Function xml_parser_character_data_handler($parser,$data) | ||
163 | { | ||
164 | if(!strcmp($this->error,"")) | ||
165 | $this->parser_handler->CharacterData($this,$data); | ||
166 | } | ||
167 | |||
168 | Function SetErrorPosition($error_number,$error,$line,$column,$byte_index) | ||
169 | { | ||
170 | $this->error_number=$error_number; | ||
171 | $this->error=$error; | ||
172 | $this->error_line=$line; | ||
173 | $this->error_column=$column; | ||
174 | $this->error_byte_index=$byte_index; | ||
175 | } | ||
176 | |||
177 | Function SetError($error_number,$error) | ||
178 | { | ||
179 | $this->error_number=$error_number; | ||
180 | $this->error=$error; | ||
181 | if($this->xml_parser) | ||
182 | { | ||
183 | $line=xml_get_current_line_number($this->xml_parser); | ||
184 | $column=xml_get_current_column_number($this->xml_parser); | ||
185 | $byte_index=xml_get_current_byte_index($this->xml_parser); | ||
186 | } | ||
187 | else | ||
188 | { | ||
189 | $line=$column=1; | ||
190 | $byte_index=0; | ||
191 | } | ||
192 | $this->SetErrorPosition($error_number,$error,$line,$column,$byte_index); | ||
193 | } | ||
194 | |||
195 | Function Parse($data,$end_of_data) | ||
196 | { | ||
197 | global $xml_parser_handlers; | ||
198 | |||
199 | if(strcmp($this->error,"")) | ||
200 | return($this->error); | ||
201 | if(!$this->xml_parser) | ||
202 | { | ||
203 | if(!function_exists("xml_parser_create")) | ||
204 | { | ||
205 | $this->SetError(1,"XML support is not available in this PHP configuration"); | ||
206 | return($this->error); | ||
207 | } | ||
208 | if(!($this->xml_parser=xml_parser_create())) | ||
209 | { | ||
210 | $this->SetError(1,"Could not create the XML parser"); | ||
211 | return($this->error); | ||
212 | } | ||
213 | xml_parser_set_option($this->xml_parser,XML_OPTION_CASE_FOLDING,$this->case_folding); | ||
214 | xml_parser_set_option($this->xml_parser,XML_OPTION_TARGET_ENCODING,$this->target_encoding); | ||
215 | if(function_exists("xml_set_object")) | ||
216 | { | ||
217 | xml_set_object($this->xml_parser,$this); | ||
218 | $this->parser_handler=new xml_parser_handler_class; | ||
219 | $this->structure=array(); | ||
220 | $this->positions=array(); | ||
221 | } | ||
222 | else | ||
223 | { | ||
224 | $xml_parser_handlers[$this->xml_parser]=new xml_parser_handler_class; | ||
225 | $xml_parser_handlers[$this->xml_parser]->xml_parser=$this->xml_parser; | ||
226 | $xml_parser_handlers[$this->xml_parser]->store_positions=$this->store_positions; | ||
227 | $xml_parser_handlers[$this->xml_parser]->simplified_xml=$this->simplified_xml; | ||
228 | $xml_parser_handlers[$this->xml_parser]->fail_on_non_simplified_xml=$this->fail_on_non_simplified_xml; | ||
229 | } | ||
230 | xml_set_element_handler($this->xml_parser,"xml_parser_start_element_handler","xml_parser_end_element_handler"); | ||
231 | xml_set_character_data_handler($this->xml_parser,"xml_parser_character_data_handler"); | ||
232 | } | ||
233 | $parser_ok=xml_parse($this->xml_parser,$data,$end_of_data); | ||
234 | if(!function_exists("xml_set_object")) | ||
235 | $this->error=$xml_parser_handlers[$this->xml_parser]->error; | ||
236 | if(!strcmp($this->error,"")) | ||
237 | { | ||
238 | if($parser_ok) | ||
239 | { | ||
240 | if($end_of_data) | ||
241 | { | ||
242 | if(function_exists("xml_set_object")) | ||
243 | Unset($this->parser_handler); | ||
244 | else | ||
245 | { | ||
246 | $this->structure=$xml_parser_handlers[$this->xml_parser]->structure; | ||
247 | $this->positions=$xml_parser_handlers[$this->xml_parser]->positions; | ||
248 | Unset($xml_parser_handlers[$this->xml_parser]); | ||
249 | } | ||
250 | xml_parser_free($this->xml_parser); | ||
251 | $this->xml_parser=0; | ||
252 | } | ||
253 | } | ||
254 | else | ||
255 | $this->SetError(2,"Could not parse data: ".xml_error_string($this->error_code=xml_get_error_code($this->xml_parser))); | ||
256 | } | ||
257 | else | ||
258 | { | ||
259 | if(!function_exists("xml_set_object")) | ||
260 | { | ||
261 | $this->error_number=$xml_parser_handlers[$this->xml_parser]->error_number; | ||
262 | $this->error_code=$xml_parser_handlers[$this->xml_parser]->error_code; | ||
263 | $this->error_line=$xml_parser_handlers[$this->xml_parser]->error_line; | ||
264 | $this->error_column=$xml_parser_handlers[$this->xml_parser]->error_column; | ||
265 | $this->error_byte_index=$xml_parser_handlers[$this->xml_parser]->error_byte_index; | ||
266 | } | ||
267 | } | ||
268 | return($this->error); | ||
269 | } | ||
270 | |||
271 | Function VerifyWhiteSpace($path) | ||
272 | { | ||
273 | if($this->store_positions) | ||
274 | { | ||
275 | $line=$parser->positions[$path]["Line"]; | ||
276 | $column=$parser->positions[$path]["Column"]; | ||
277 | $byte_index=$parser->positions[$path]["Byte"]; | ||
278 | } | ||
279 | else | ||
280 | { | ||
281 | $line=$column=1; | ||
282 | $byte_index=0; | ||
283 | } | ||
284 | if(!IsSet($this->structure[$path])) | ||
285 | { | ||
286 | $this->SetErrorPosition(2,"element path does not exist",$line,$column,$byte_index); | ||
287 | return($this->error); | ||
288 | } | ||
289 | if(GetType($this->structure[$path])!="string") | ||
290 | { | ||
291 | $this->SetErrorPosition(2,"element is not data",$line,$column,$byte_index); | ||
292 | return($this->error); | ||
293 | } | ||
294 | $data=$this->structure[$path]; | ||
295 | for($previous_return=0,$position=0;$position<strlen($data);$position++) | ||
296 | { | ||
297 | switch($data[$position]) | ||
298 | { | ||
299 | case " ": | ||
300 | case "\t": | ||
301 | $column++; | ||
302 | $byte_index++; | ||
303 | $previous_return=0; | ||
304 | break; | ||
305 | case "\n": | ||
306 | if(!$previous_return) | ||
307 | $line++; | ||
308 | $column=1; | ||
309 | $byte_index++; | ||
310 | $previous_return=0; | ||
311 | break; | ||
312 | case "\r": | ||
313 | $line++; | ||
314 | $column=1; | ||
315 | $byte_index++; | ||
316 | $previous_return=1; | ||
317 | break; | ||
318 | default: | ||
319 | $this->SetErrorPosition(2,"data is not white space",$line,$column,$byte_index); | ||
320 | return($this->error); | ||
321 | } | ||
322 | } | ||
323 | return(""); | ||
324 | } | ||
325 | |||
326 | Function ParseStream($stream) | ||
327 | { | ||
328 | if(strcmp($this->error,"")) | ||
329 | return($this->error); | ||
330 | do | ||
331 | { | ||
332 | if(!($data=@fread($stream,$this->stream_buffer_size))) | ||
333 | { | ||
334 | if(!feof($stream)) | ||
335 | { | ||
336 | $this->SetError(3,"Could not read from input stream".(IsSet($php_errormsg) ? ': '.$php_errormsg : '')); | ||
337 | break; | ||
338 | } | ||
339 | } | ||
340 | if(strcmp($error=$this->Parse($data,feof($stream)),"")) | ||
341 | break; | ||
342 | } | ||
343 | while(!feof($stream)); | ||
344 | return($this->error); | ||
345 | } | ||
346 | |||
347 | Function ParseFile($file) | ||
348 | { | ||
349 | if(!file_exists($file)) | ||
350 | return("the XML file to parse ($file) does not exist"); | ||
351 | if(!($definition=@fopen($file,"r"))) | ||
352 | return("could not open the XML file ($file)".(IsSet($php_errormsg) ? ': '.$php_errormsg : '')); | ||
353 | $error=$this->ParseStream($definition); | ||
354 | fclose($definition); | ||
355 | return($error); | ||
356 | } | ||
357 | }; | ||
358 | |||
359 | Function XMLParseFile(&$parser,$file,$store_positions,$cache="",$case_folding=0,$target_encoding="ISO-8859-1",$simplified_xml=0,$fail_on_non_simplified_xml=0) | ||
360 | { | ||
361 | if(!file_exists($file)) | ||
362 | return("the XML file to parse ($file) does not exist"); | ||
363 | if(strcmp($cache,"")) | ||
364 | { | ||
365 | if(file_exists($cache) | ||
366 | && filemtime($file)<=filemtime($cache)) | ||
367 | { | ||
368 | if(($cache_file=@fopen($cache,"r"))) | ||
369 | { | ||
370 | if(function_exists("set_file_buffer")) | ||
371 | set_file_buffer($cache_file,0); | ||
372 | if(!($cache_contents=@fread($cache_file,filesize($cache)))) | ||
373 | $error="could not read from the XML cache file $cache".(IsSet($php_errormsg) ? ': '.$php_errormsg : ''); | ||
374 | else | ||
375 | $error=""; | ||
376 | fclose($cache_file); | ||
377 | if(!strcmp($error,"")) | ||
378 | { | ||
379 | if(GetType($parser=unserialize($cache_contents))=="object" | ||
380 | && IsSet($parser->structure)) | ||
381 | { | ||
382 | if(!IsSet($parser->simplified_xml)) | ||
383 | $parser->simplified_xml=0; | ||
384 | if(($simplified_xml | ||
385 | || !$parser->simplified_xml) | ||
386 | && (!$store_positions | ||
387 | || $parser->store_positions)) | ||
388 | { | ||
389 | return(""); | ||
390 | } | ||
391 | } | ||
392 | else | ||
393 | $error="it was not specified a valid cache object in XML file ($cache)"; | ||
394 | } | ||
395 | } | ||
396 | else | ||
397 | $error="could not open cache XML file ($cache)".(IsSet($php_errormsg) ? ': '.$php_errormsg : ''); | ||
398 | if(strcmp($error,"")) | ||
399 | return($error); | ||
400 | } | ||
401 | } | ||
402 | $parser=new xml_parser_class; | ||
403 | $parser->store_positions=$store_positions; | ||
404 | $parser->case_folding=$case_folding; | ||
405 | $parser->target_encoding=$target_encoding; | ||
406 | $parser->simplified_xml=$simplified_xml; | ||
407 | $parser->fail_on_non_simplified_xml=$fail_on_non_simplified_xml; | ||
408 | if(!strcmp($error=$parser->ParseFile($file),"") | ||
409 | && strcmp($cache,"")) | ||
410 | { | ||
411 | if(($cache_file=@fopen($cache,"w"))) | ||
412 | { | ||
413 | if(function_exists("set_file_buffer")) | ||
414 | set_file_buffer($cache_file,0); | ||
415 | if(!@fwrite($cache_file,serialize($parser)) | ||
416 | || !@fclose($cache_file)) | ||
417 | $error="could to write to the XML cache file ($cache)".(IsSet($php_errormsg) ? ': '.$php_errormsg : ''); | ||
418 | if(strcmp($error,"")) | ||
419 | unlink($cache); | ||
420 | } | ||
421 | else | ||
422 | $error="could not open for writing to the cache file ($cache)".(IsSet($php_errormsg) ? ': '.$php_errormsg : ''); | ||
423 | } | ||
424 | return($error); | ||
425 | } | ||
426 | |||
427 | ?> \ No newline at end of file | ||