Wednesday, January 26, 2011

Xml Parsing using Java


Parse using SAX or DOM
DOM (Document Object Model) represents the XML as hierarchy to simplify the access. Since everything is in memory, it is more ressource intensive.

SAX - creates events and calls callback methods that the programmer write to handle them.

Both examples read this XML data file [howto.xml]

<?xml version="1.0"?>
<howto>
  <topic>
      <title>Java</title>
      <url>http://www.rgagnon/javahowto.htm</url>
  </topic>
    <topic>
      <title>PowerBuilder</title>
      <url>http://www.rgagnon/pbhowto.htm</url>
  </topic>
      <topic>
        <title>Javascript</title>
        <url>http://www.rgagnon/jshowto.htm</url>
  </topic>
      <topic>
        <title>VBScript</title>
        <url>http://www.rgagnon/vbshowto.htm</url>
  </topic>
</howto>

In both cases , the output is

Title: Java
Url: http://www.rgagnon/javahowto.htm
Title: PowerBuilder
Url: http://www.rgagnon/pbhowto.htm
Title: Javascript
Url: http://www.rgagnon/jshowto.htm
Title: VBScript
Url: http://www.rgagnon/vbshowto.htm

[HowToListerSAX.java]

// jdk1.4.1
import java.io.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;

// using SAX
public class HowToListerSAX {
  class HowToHandler extends DefaultHandler {
    boolean title = false;
    boolean url   = false;
    public void startElement(String nsURI, String strippedName,
                            String tagName, Attributes attributes)
       throws SAXException {
     if (tagName.equalsIgnoreCase("title"))
        title = true;
     if (tagName.equalsIgnoreCase("url"))
        url = true;
    }

    public void characters(char[] ch, int start, int length) {
     if (title) {
       System.out.println("Title: " + new String(ch, start, length));
       title = false;
       }
     else if (url) {
       System.out.println("Url: " + new String(ch, start,length));
       url = false;
       }
     }
    }

    public void list( ) throws Exception {
       XMLReader parser =
          XMLReaderFactory.createXMLReader
            ("org.apache.crimson.parser.XMLReaderImpl");
       parser.setContentHandler(new HowToHandler( ));
       parser.parse("howto.xml");
       }

    public static void main(String[] args) throws Exception {
       new HowToListerSAX().list( );
       }
}

[HowToListerDOM.java]

// jdk1.4.1
import java.io.File;
import javax.xml.parsers.*;
import org.w3c.dom.*;

// using DOM
public class HowtoListerDOM {
 public static void main(String[] args) {
   File file = new File("howto.xml");
   try {
     DocumentBuilder builder =
       DocumentBuilderFactory.newInstance().newDocumentBuilder();
     Document doc = builder.parse(file);

     NodeList nodes = doc.getElementsByTagName("topic");
     for (int i = 0; i < nodes.getLength(); i++) {
       Element element = (Element) nodes.item(i);

       NodeList title = element.getElementsByTagName("title");
       Element line = (Element) title.item(0);

       System.out.println("Title: " + getCharacterDataFromElement(line));

       NodeList url = element.getElementsByTagName("url");
       line = (Element) url.item(0);
       System.out.println("Url: " + getCharacterDataFromElement(line));

     }
   }
   catch (Exception e) {
      e.printStackTrace();
   }
 }
 public static String getCharacterDataFromElement(Element e) {
   Node child = e.getFirstChild();
   if (child instanceof CharacterData) {
     CharacterData cd = (CharacterData) child;
       return cd.getData();
     }
   return "?";
 }
}