解析XML——简单直接的来。
?
==================闲扯的话================
对于现在越来越轻量级,越来越讲究速度和接近用户的应用来说,xml确实有点复杂了。解析起来不仅耗内存,而且很复杂。这就好像花了几千块钱买了个MSOffice,但是80%的feature都用不着,还白白的耗着CPU和内存。
个人觉得,设置文件用XML其实挺好,因为设置文件一般并不太大,而且要求可读性强,还有很多乱七八糟的需求,可以利用XML的力量。昨天搞chrome的设置,发现chrome的设置文件也是使用的json,读起来也是轻松愉快。
前阵子做了个程序,需要解析豆瓣API调用返回的XML。真想说一句。。。豆瓣你别用XML了。。。至少,提供个json版的API调用吧。
(以上谨代表个人观点)
===================正文=================
解析豆瓣返回的xml,实在是不想用DOM这个重量级的玩意。DOM这个玩意,说它强大好还是说它官僚好呢。我倾向于使用SAXP解析。但是现在面临的一个问题是,我需要根据xml节点的名字和属性值(一个或者多个)来决定当前的值是不是我想要的。这就麻烦一点点。第一反应是考虑xpath。后来觉 得不如自己做一个得了,权当是按需定制一个轻量级的xpath。
首先定义XMLSearchUnit类,这个类的实例用来描述一个需要在XML中搜索的值,值可以是xml节点的值,或者是节点的属性。
package com.deepnighttwo.resourceresolver.douban.resolver.utils;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.Attributes;
/**
?*
?* Represent a search task. Target couldbe value of a node or attribute of the
?* node.
?*
?* @author mzang
?*/
public class XMLSearchUnit {
??? // attribute values to be matched during search
??? private Map<String, String> attributeMatchValidation = new HashMap<String, String>();
??? // if target is an attribute, then set this member to bethe attribute name.
??? // if it is null or empty, then meansthe target is node value.
??? private String expectedAttr;
???// xml path, formatis: /node_name/node_name/...
??? private String xmlPath;
??? publicXMLSearchUnit(String xmlPath) {
??????? this.xmlPath= xmlPath;
??? }
??? /**
???? * if current node meets the searchconditions or not. Meets means the path
???? * is correct and the attribute valueis matched.
???? *
???? * @param path
???? * @paramattributes
???? * @return
???? */
??? public boolean match(String path, Attributes attributes) {
??????? if(xmlPath.equals(path) == false) {
??????????? return false;
??????? }
??????? for(String key : attributeMatchValidation.keySet()) {
??????????? String exp =attributeMatchValidation.get(key);
??????????? String compare =attributes.getValue(key);
??????????? if(exp.equalsIgnoreCase(compare) == false) {
?????? ?????????return false;
??????????? }
??????? }
??????? return true;
??? }
??? publicMap<String, String> getAttributeMatchValidation() {
??????? returnattributeMatchValidation;
??? }
??? public void addAttributeValidation(String key, String value) {
?? ?????attributeMatchValidation.put(key, value);
??? }
??? publicString getXmlPath() {
??????? returnxmlPath;
??? }
??? public void setAttributeMatchValidation(
??????????? Map<String, String>attributeMatchValidation) {
??????? this.attributeMatchValidation= attributeMatchValidation;
??? }
??? publicString getExpectedAttr() {
??????? returnexpectedAttr;
??? }
??? /**
???? * if target is node value, then setexpectedAttr to null. if target is an
???? * attribute value, set it to be theattribute name.
???? *
???? * @paramexpectedAttr
???? */
??? public void setExpectedAttr(String expectedAttr) {
??????? this.expectedAttr= expectedAttr;
??? }
??? /**
???? * hash code can be cached if allproperties are not be be changed.
???? */
??? @Override
??? public int hashCode() {
??????? final int prime = 31;
??????? intresult = 1;
??????? result = prime
??????????????? * result
??????????????? +((attributeMatchValidation == null) ? 0
??????????????????????? :attributeMatchValidation.hashCode());
??????? result = prime * result
??????????????? + ((expectedAttr == null) ? 0 : expectedAttr.hashCode());
??????? result = prime * result +((xmlPath == null) ? 0 : xmlPath.hashCode());
??????? returnresult;
??? }
??? @Override
??? public boolean equals(Object obj) {
??????? if (this == obj)
??????????? return true;
??????? if (obj== null)
??????????? return false;
??????? if(getClass() != obj.getClass())
??????????? return false;
??????? XMLSearchUnit other =(XMLSearchUnit) obj;
??????? if (attributeMatchValidation== null) {
??????????? if(other.attributeMatchValidation != null)
??????????????? return false;
??????? } else if (!attributeMatchValidation
???????????????.equals(other.attributeMatchValidation))
??????????? return false;
??????? if(expectedAttr == null) {
??????????? if(other.expectedAttr != null)
??????????????? return false;
??????? } else if (!expectedAttr.equals(other.expectedAttr))
??????????? return false;
??????? if(xmlPath == null) {
??????????? if(other.xmlPath != null)
??????????????? return false;
??????? } else if (!xmlPath.equals(other.xmlPath))
??????????? return false;
??????? return true;
??? }
}
这个类比较简单。就是用一个hashmap保待匹配的attribut键值对,用一个字符串表示期待的attribute name,用一个字符串表示期待的node path。
然后就是如何在SAXP里用到这个类的实例去搜索了。
package com.deepnighttwo.resourceresolver.douban.resolver.utils;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
?*
?* SAXP parser working withXMLSearchUnit.
?*
?* @author mzang
?*/
public class DoubanSearchParser extendsDefaultHandler {
??? // create and initial search units
??? public static final XMLSearchUnit DETAILS_LINK_API_PATH = new XMLSearchUnit(
??????????? "/feed/entry/id");
??? public static final XMLSearchUnit DETAILS_CONTENT_PATH = new XMLSearchUnit(
??????????? "/entry/summary");
??? public static final XMLSearchUnit DETAILS_TITLE_PATH = new XMLSearchUnit(
??????????? "/entry/title");
??? public static final XMLSearchUnit DETAILS_CHINESE_NAME_PATH = new XMLSearchUnit(
???????????"/entry/db:attribute");
??? public static final XMLSearchUnit DETAILS_RATINGE_PATH = new XMLSearchUnit(
???????????"/entry/gd:rating");
??? public static final XMLSearchUnitDETAILS_RATINGE_RATER_COUNT_PATH = new XMLSearchUnit(
??????????? "/entry/gd:rating");
??? public static final XMLSearchUnit DETAILS_LINK_URL_PATH = new XMLSearchUnit(
???????????"/feed/entry/link");
??? static {
???????DETAILS_LINK_URL_PATH.addAttributeValidation("rel","alternate");
??????? DETAILS_LINK_URL_PATH.setExpectedAttr("href");
???????DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang","zh_CN");
???????DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name","aka");
???????DETAILS_RATINGE_PATH.setExpectedAttr("average");
???????DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters");
??? }
??? // a map to store the XMLSearchUnit and value
??? private Map<XMLSearchUnit, String> results = new HashMap<XMLSearchUnit, String>();
??? // a counter of search unit. if it is 0, then all searchunit finds a match
??? // value and the result of the XMLwill be skipped.
??? private int count = 0;
??? privateStringBuilder path = new StringBuilder();
??? private static final String pathSeparater = "/";
??? privateXMLSearchUnit[] searchUnits;
??? List<XMLSearchUnit> foundItems= new ArrayList<XMLSearchUnit>();
??? /**
???? * constructor, accept XML inputstream, 0 or more search unit instances.
???? *
???? * @param input
???? * @paramexpectedPath
???? * @return
???? */
?? ?public Map<XMLSearchUnit, String>parseResults(InputStream input,
??????????? XMLSearchUnit...expectedPath) {
??????? for(XMLSearchUnit search : expectedPath) {
??????????? results.put(search, null);
??????? }
??????? searchUnits = expectedPath;
???? ???count = expectedPath.length;
??????? XMLReader xmlReader = null;
??????? try {
??????????? SAXParserFactory spfactory =SAXParserFactory.newInstance();
??????????? spfactory.setValidating(false);
??????????? SAXParser saxParser =spfactory.newSAXParser();
??????????? xmlReader =saxParser.getXMLReader();
??????????? xmlReader.setContentHandler(this);
??????????? xmlReader.parse(new InputSource(input));
??????? } catch(Exception e) {
??????????? System.err.println(e);
??????????? System.exit(1);
?????? ?}
??????? returnresults;
??? }
??? private void addToPath(String addPath) {
???????path.append(pathSeparater).append(addPath.toLowerCase());
??? }
??? private void popPath() {
??????? int index= path.lastIndexOf(pathSeparater);
??????? // String removedPath = path.substring(index);
???????path.delete(index, path.length());
??? }
??? @Override
??? public void startElement(String uri, String localName, String qName,
??????????? Attributes attributes) throws SAXException {
??????? foundItems.clear();
???????if (count == 0) {
??????????? return;
??????? }
??????? // update path
???????addToPath(qName);
??????? List<XMLSearchUnit>foundAttrItems = null;
??????? // check if current node matches search units. if it is anode value
??????? // search, then store it in amember variable named foundItems because
??????? // the value of the node is knownonly when reaches the end of the
??????? // node.but for attribute search,it value is known here. So then are
??????? // put in a local variable listnamed foundAttrItems.
???????for (XMLSearchUnit unit : searchUnits) {
??????????? if(unit.match(path.toString(), attributes) == true) {
??????????????? if(unit.getExpectedAttr() == null) {
??????????????????? foundItems.add(unit);
??????????????? } else {
??????????????????? if(foundAttrItems == null) {
??????????????????????? foundAttrItems = new ArrayList<XMLSearchUnit>();
??????????????????? }
???????????????????foundAttrItems.add(unit);
??????????????? }
??????????? }
??????? }
??????? // if no attribute match, return.
???????if (foundAttrItems == null) {
??????????? return;
??????? }
??????? // fill search unit value using attribute value. updatecount.
???????for (XMLSearchUnit attrUnit : foundAttrItems) {
??????????? String attrValue =attributes.getValue(attrUnit.getExpectedAttr());
??????????? if(results.get(attrUnit) == null) {
??????????????? count--;
??????????? }
??????????? results.put(attrUnit,attrValue);
??????????? count--;
??????? }
??? }
??? /**
???? * if current node matches, the thenode value is useful, store it.
???? */
??? @Override
??? public void characters(char[] ch, intstart, int length)
??????????? throwsSAXException {
??????? if(count == 0) {
??????????? return;
??????? }
??????? if(foundItems.size() == 0) {
?????????? ?return;
??????? }
??????? for(XMLSearchUnit unit : foundItems) {
??????????? String content = new String(ch, start, length);
??????????? if(results.get(unit) == null) {
??????????????? count--;
??????????? }
??????????? results.put(unit, content);
??? ????}
??? }
??? @Override
??? public void endElement(String uri, String localName, String qName)
??????????? throwsSAXException {
??????? foundItems.clear();
??????? if(count == 0) {
??????????? return;
??????? }
??????? popPath();
??? }
}
?