Friday 31 May 2013

SAX Parser


SAX is the Simple API for XML originally a Java API. SAX parser can be used for parsing XML documents in an efficient and well mannered way. SAX parser use callback function (org.xml.sax.helpers.DefaultHandler) to inform clients of the XML document structure. In case of Java, we can extend DeafultHanlder and override few methods to achieve XML parsing.
There are some methods or functions provided:

1) startDocument() : This function executes at the start of the document.
2) startElement() : This function executes at the start of each and every tag where we can set some boolean variables to true.
3) endDocument() : This function executes at the end of the document.
4) endElement() : This function executes at the end of each and every tag where we can set some boolean variables to false.
5) characters() : It is an important function as it displays the text between the start tag and its corresponding end tag.
Now, consider that we have to parse the above xml file.


<company>
           <employee>
                          <name>Azim</name>
                          <id>201205556</id>
                          <degree>M.Tech</degree>
                          <history>
                                      <name>Progress Software</name>
                          </history>
           </employee>
           <employee>
                         <name>Aditya</name>
                         <id>AK47</id>
                         <degree>B.Tech</degree>
                         <history>
                                     <name>Synechron</name>
                         </history>
            </employee>
            <employee>
                          <name>Abdul</name>
                          <id>40040</id>
                          <degree>B.E</degree>
                          <history>
                                     <name>Cognizant</name>
                                     <name>Wipro</name>
                          </history>
            </employee>
</company>


The following example demonstrates the use of DefaultHandler and various functionalities provided in Java.
import java.io.File;
import java.io.OutputStreamWriter;
import java.io.Writer;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;



public class SAX extends DefaultHandler
{
 public static String inputXMLFile = "/home/star/workspace1/PracticeSession/sample.xml";
 public static Writer out;    
 public static boolean empFlag,nameFlag,idFlag,historyFlag,degFlag,histNameFlag;
 
 public static void main (String argv []) 
 {       
  empFlag = nameFlag = idFlag = historyFlag = degFlag = histNameFlag =false;
  SAXParserFactory factory = SAXParserFactory.newInstance();        
  try 
  {
   //Setting up the output stream - in this case System.out with UTF8 encoding
   out = new OutputStreamWriter(System.out, "UTF8");
   //Getting a parser from the factory
   SAXParser saxParser = factory.newSAXParser();
   //Parsing the XML document using the parser
   saxParser.parse( new File(inputXMLFile), new SAX());
  } 
  catch (Throwable throwable) 
  { 
   throwable.printStackTrace ();
  }
 } 
 
 public void startDocument() throws SAXException
 {
  System.out.println("Document Parsing Started:");
 }

 public void endDocument()throws SAXException
 {     
  System.out.println("Document Parsing Completed Successfully:");
 }

 public void startElement(String namespaceURI, String localName, String qName, Attributes atts)throws SAXException
 {  
  if(qName.equalsIgnoreCase("employee"))
   empFlag = true;
  else if(qName.equalsIgnoreCase("name") && historyFlag)   // for  tag in 
   histNameFlag = true;
  else if(qName.equalsIgnoreCase("name"))   // for only  tag
   nameFlag = true;
  else if(qName.equalsIgnoreCase("id"))
   idFlag = true;
  else if(qName.equalsIgnoreCase("history"))
   historyFlag = true;
 }

 public void endElement(String namespaceURI, String localName, String qName) throws SAXException
 {
  if(qName.equalsIgnoreCase("employee"))
   empFlag = false;
  else if(qName.equalsIgnoreCase("name") && historyFlag)
   histNameFlag = false;
  else if(qName.equalsIgnoreCase("name"))
   nameFlag = false;
  else if(qName.equalsIgnoreCase("id"))
   idFlag = false;
  else if(qName.equalsIgnoreCase("history"))
   historyFlag = false;
 }

 public void characters(char buffer [], int offset, int length) throws SAXException
 {
  String str = new String(buffer, offset, length);
  if(nameFlag && !historyFlag)
  {
   System.out.println("Employee Details:");
   System.out.println("Name: "+ str);
  }
  else if(histNameFlag)
   System.out.println("Previous Company Name: "+ str);
  else if(idFlag)
   System.out.println("Id: "+ str);
  else if(degFlag)
   System.out.println("Degree: "+ str);
 }
};

Output:
Document Parsing Started:
Employee Details:
Name: Azim
Id: 201205556
Previous Company Name: Progress Software
Employee Details:
Name: Aditya
Id: AK47
Previous Company Name: Synechron
Employee Details:
Name: Abdul
Id: 40040
Previous Company Name: Cognizant
Previous Company Name: Wipro
Document Parsing Completed Successfully: