Java开发之路

文章32 |   阅读 14713 |   点赞0

来源:https://blog.csdn.net/sunnyyoona

[Java开发之路](11)SAX解析XML文档

x33g5p2x  于2021-03-13 发布在 其他  
字(14.5k)|赞(0)|评价(0)|浏览(361)

1.简介

Dom解析功能强大,可增删改查,操作时会将XML文档读到内存,因此适用于小文档;

SAX解析是从头到尾逐行逐个元素解析,修改较为不便,但适用于只读的大文档;

SAX采用事件驱动的方式解析XML。套用网友的解释:如同在电影院看电影一样,从头到尾看一遍,不能回退(Dom可来来回回读取),在看电影的过程中,每遇到一个情节,都会调用大脑去接收处理这些信息。SAX也是相同的原理,每遇到一个元素节点,都会调用相应的方法来处理。在SAX的解析过程中,读取到文档开头、文档结尾,元素的开头和元素结尾都会调用相应方法,我们可以在这些方法中进行相应事件处理。

对应方法:

  1. public void startDocument() throws SAXException {
  2. }
  3. public void endDocument() throws SAXException {
  4. }
  5. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  6. }
  7. public void endElement(String uri, String localName, String qName) throws SAXException {
  8. }

我们还需一个方法来处理元素节点中间的文本节点(我们常误以为元素节点的文本值)

  1. public void characters(char[] ch, int start, int length) throws SAXException {
  2. }

2.解析

解析步骤:

(1)通过SAXParserFactory的静态方法newInstance()方法获取SAXParserFactory实例对象factory

  1. SAXParserFactory factory = SAXParserFactory.newInstance();

(2)通过SAXParserFactory实例的newSAXParser()方法返回SAXParser实例parser

  1. SAXParser parser = factory.newSAXParser();

(3)创建一个类继承DefaultHandler,重写其中的一些方法进行业务处理

  1. package com.qunar.handler;
  2. import org.xml.sax.Attributes;
  3. import org.xml.sax.SAXException;
  4. import org.xml.sax.helpers.DefaultHandler;
  5. public class SAXParserHandler extends DefaultHandler{
  6. // 用来标示解析开始
  7. @Override
  8. public void startDocument() throws SAXException {
  9. }
  10. // 用来标示解析结束
  11. @Override
  12. public void endDocument() throws SAXException {
  13. }
  14. // 用来遍历XML文件的开始标签
  15. @Override
  16. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  17. super.startElement(uri, localName, qName, attributes);
  18. }
  19. // 用来遍历XML文件的结束标签
  20. @Override
  21. public void endElement(String uri, String localName, String qName) throws SAXException {
  22. super.endElement(uri, localName, qName);
  23. }
  24. @Override
  25. public void characters(char[] ch, int start, int length) throws SAXException {
  26. super.characters(ch, start, length);
  27. }
  28. }

(4)创建Handler类对象实例

  1. // 定义SAXParserHandler对象
  2. SAXParserHandler handler = new SAXParserHandler();

(5)解析XML文档

  1. <?xml version="1.0" encoding="utf-8"?><bookstore>
  2. <book category="Java">
  3. <title lang="chi">Java多线程编程核心技术</title>
  4. <author>高洪岩</author>
  5. <year>2015</year>
  6. <price>69.00</price>
  7. </book>
  8. <book category="C++">
  9. <title lang="en">Effective C++: 55 Specific Ways to Improve Your Programs and Designs</title>
  10. <author>Scott Meyers</author>
  11. <year>2006</year>
  12. <price>58.00</price>
  13. </book>
  14. <book category="Web">
  15. <title lang="en">Learning XML</title>
  16. <author>Erik T. Ray</author>
  17. <year>2016</year>
  18. <price>39.95</price>
  19. </book>
  20. </bookstore>

3.具体实例:

  1. package com.qunar.handler;
  2. import org.xml.sax.Attributes;
  3. import org.xml.sax.SAXException;
  4. import org.xml.sax.helpers.DefaultHandler;
  5. public class SAXParserHandler extends DefaultHandler{
  6. private int bookIndex = 0;
  7. // 用来标示解析开始
  8. @Override
  9. public void startDocument() throws SAXException {
  10. System.out.println("SAX解析开始...");
  11. }
  12. // 用来标示解析结束
  13. @Override
  14. public void endDocument() throws SAXException {
  15. System.out.println("SAX解析结束...");
  16. }
  17. // 用来遍历XML文件的开始标签
  18. @Override
  19. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  20. // 调用DefaultHandler类的startElement方法
  21. super.startElement(uri, localName, qName, attributes);
  22. // 开始解析book元素节点
  23. if(qName.equals("book")){
  24. ++ bookIndex;
  25. System.out.println("开始解析第" + bookIndex + "本书...");
  26. // 已知book元素节点下的属性名称,根据属性名称获取属性值
  27. /*String value = attributes.getValue("category");
  28. System.out.println("value->"+value);*/
  29. // 不知道book元素节点下的属性名称以及个数
  30. int size = attributes.getLength();
  31. for(int i = 0;i < size;++i){
  32. System.out.println(attributes.getQName(i) + ":" + attributes.getValue(i));
  33. }//for
  34. }//if
  35. else if(!qName.equals("bookstore")){
  36. System.out.print(qName + ":");
  37. }//else
  38. }
  39. // 用来遍历XML文件的结束标签
  40. @Override
  41. public void endElement(String uri, String localName, String qName) throws SAXException {
  42. super.endElement(uri, localName, qName);
  43. // 判断一本书是否解析完
  44. if(qName.equals("book")){
  45. System.out.println("结束解析第" + bookIndex + "本书...");
  46. }//if
  47. }
  48. @Override
  49. public void characters(char[] ch, int start, int length) throws SAXException {
  50. super.characters(ch, start, length);
  51. String text = new String(ch, start, length);
  52. if(!text.trim().equals("")){
  53. System.out.println(text);
  54. }//if
  55. }
  56. }
  1. package com.qunar.xml;
  2. import java.io.IOException;
  3. import javax.xml.parsers.ParserConfigurationException;
  4. import javax.xml.parsers.SAXParser;
  5. import javax.xml.parsers.SAXParserFactory;
  6. import org.xml.sax.SAXException;
  7. import com.qunar.handler.SAXParserHandler;
  8. /**
  9. * SAX方式解析XML文档
  10. * @author sjf0115
  11. *
  12. */
  13. public class SAXXMLCode {
  14. public static void main(String[] args) {
  15. String path = "D:\\bookstore.xml";
  16. try {
  17. // 通过SAXParserFactory的静态方法newInstance()方法获取SAXParserFactory实例对象factory
  18. SAXParserFactory factory = SAXParserFactory.newInstance();
  19. // 通过SAXParserFactory实例的newSAXParser()方法返回SAXParser实例parser
  20. SAXParser saxParser = factory.newSAXParser();
  21. // 定义SAXParserHandler对象
  22. SAXParserHandler handler = new SAXParserHandler();
  23. // 解析XML文档
  24. saxParser.parse(path, handler);
  25. } catch (ParserConfigurationException e) {
  26. e.printStackTrace();
  27. } catch (SAXException e) {
  28. e.printStackTrace();
  29. } catch (IOException e) {
  30. e.printStackTrace();
  31. }
  32. }
  33. }

运行结果:

<table cellspacing="0" cellpadding="0" style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1050px"><tbody><tr><td style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1049px"><br/><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析开始...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Java</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">title:Java多线程编程核心技术</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">author:高洪岩</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">year:2015</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">price:69.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:C++</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">title:Effective C++: 55 Specific Ways to Improve Your Programs and Designs</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">author:Scott Meyers</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">year:2006</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">price:58.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Web</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">title:Learning XML</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">author:Erik T. Ray</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">year:2016</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">price:39.95</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析结束...</span></div></td></tr></tbody></table>

4.解析并储存于对象中

  1. package com.qunar.bean;
  2. /**
  3. * book实体类
  4. * @author sjf0115
  5. *
  6. */
  7. public class Book {
  8. private String category;
  9. private String title;
  10. private String author;
  11. private String year;
  12. private String price;
  13. private String lang;
  14. public String getCategory() {
  15. return category;
  16. }
  17. public void setCategory(String category) {
  18. this.category = category;
  19. }
  20. public String getTitle() {
  21. return title;
  22. }
  23. public void setTitle(String title) {
  24. this.title = title;
  25. }
  26. public String getAuthor() {
  27. return author;
  28. }
  29. public void setAuthor(String author) {
  30. this.author = author;
  31. }
  32. public String getYear() {
  33. return year;
  34. }
  35. public void setYear(String year) {
  36. this.year = year;
  37. }
  38. public String getPrice() {
  39. return price;
  40. }
  41. public void setPrice(String price) {
  42. this.price = price;
  43. }
  44. public String getLang() {
  45. return lang;
  46. }
  47. public void setLang(String lang) {
  48. this.lang = lang;
  49. }
  50. @Override
  51. public String toString() {
  52. return "category:" + category + " lang:" + lang + " title:" + title + " author:" + author + " year:" + year + " price:" + price;
  53. }
  54. }
  1. package com.qunar.handler;
  2. import java.util.ArrayList;
  3. import java.util.List;
  4. import org.xml.sax.Attributes;
  5. import org.xml.sax.SAXException;
  6. import org.xml.sax.helpers.DefaultHandler;
  7. import com.qunar.bean.Book;
  8. public class SAXParserHandler extends DefaultHandler{
  9. private Book book;
  10. private int bookIndex = 0;
  11. // 节点文本内容
  12. private String text;
  13. private List<Book> bookList = new ArrayList<Book>();
  14. public List<Book> getBookList() {
  15. return bookList;
  16. }
  17. // 用来标示解析开始
  18. @Override
  19. public void startDocument() throws SAXException {
  20. System.out.println("SAX解析开始...");
  21. }
  22. // 用来标示解析结束
  23. @Override
  24. public void endDocument() throws SAXException {
  25. System.out.println("SAX解析结束...");
  26. }
  27. // 用来遍历XML文件的开始标签
  28. @Override
  29. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  30. // 调用DefaultHandler类的startElement方法
  31. super.startElement(uri, localName, qName, attributes);
  32. // 开始解析book元素节点
  33. if(qName.equals("book")){
  34. // 创建一个book对象
  35. book = new Book();
  36. ++ bookIndex;
  37. System.out.println("开始解析第" + bookIndex + "本书...");
  38. int size = attributes.getLength();
  39. for(int i = 0;i < size;++i){
  40. String attr = attributes.getQName(i);
  41. // 属性category
  42. if(attr.equals("category")){
  43. book.setCategory(attributes.getValue(i));
  44. }//if
  45. }//for
  46. }//if
  47. // 用于遍历title节点中的属性
  48. else if(qName.equals("title")){
  49. int size = attributes.getLength();
  50. for(int i = 0;i < size;++i){
  51. String attr = attributes.getQName(i);
  52. // 属性category
  53. if(attr.equals("lang")){
  54. book.setLang(attributes.getValue(i));
  55. }//if
  56. }//for
  57. }//else
  58. }
  59. // 用来遍历XML文件的结束标签
  60. @Override
  61. public void endElement(String uri, String localName, String qName) throws SAXException {
  62. super.endElement(uri, localName, qName);
  63. // 判断一本书是否解析完
  64. if(qName.equals("book")){
  65. bookList.add(book);
  66. book = null;
  67. System.out.println("结束解析第" + bookIndex + "本书...");
  68. }//if
  69. else if(qName.equals("title")){
  70. book.setTitle(text);
  71. }//else
  72. else if(qName.equals("author")){
  73. book.setAuthor(text);
  74. }//else
  75. else if(qName.equals("year")){
  76. book.setYear(text);
  77. }//else
  78. else if(qName.equals("price")){
  79. book.setPrice(text);
  80. }//else
  81. }
  82. // 文本值
  83. @Override
  84. public void characters(char[] ch, int start, int length) throws SAXException {
  85. super.characters(ch, start, length);
  86. text = new String(ch, start, length);
  87. }
  88. }
  1. package com.qunar.xml;
  2. import java.io.IOException;
  3. import java.util.List;
  4. import javax.xml.parsers.ParserConfigurationException;
  5. import javax.xml.parsers.SAXParser;
  6. import javax.xml.parsers.SAXParserFactory;
  7. import org.xml.sax.SAXException;
  8. import com.qunar.bean.Book;
  9. import com.qunar.handler.SAXParserHandler;
  10. /**
  11. * SAX方式解析XML文档
  12. * @author sjf0115
  13. *
  14. */
  15. public class SAXXMLCode {
  16. public static void main(String[] args) {
  17. String path = "D:\\bookstore.xml";
  18. try {
  19. // 通过SAXParserFactory的静态方法newInstance()方法获取SAXParserFactory实例对象factory
  20. SAXParserFactory factory = SAXParserFactory.newInstance();
  21. // 通过SAXParserFactory实例的newSAXParser()方法返回SAXParser实例parser
  22. SAXParser saxParser = factory.newSAXParser();
  23. // 定义SAXParserHandler对象
  24. SAXParserHandler handler = new SAXParserHandler();
  25. // 解析XML文档
  26. saxParser.parse(path, handler);
  27. // 得到遍历结果
  28. List<Book> bookList = handler.getBookList();
  29. System.out.println("遍历结果:");
  30. for (Book book : bookList) {
  31. System.out.println(book);
  32. }//for
  33. } catch (ParserConfigurationException e) {
  34. e.printStackTrace();
  35. } catch (SAXException e) {
  36. e.printStackTrace();
  37. } catch (IOException e) {
  38. e.printStackTrace();
  39. }
  40. }
  41. }

运行结果:

<table cellspacing="0" cellpadding="0" style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1050px"><tbody><tr><td style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1049px"><br/><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析开始...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析结束...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">遍历结果:</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Java  lang:chi   title:Java多线程编程核心技术   author:高洪岩   year:2015   price:69.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:C++  lang:en   title:Effective C++: 55 Specific Ways to Improve Your Programs and Designs   author:Scott Meyers   year:2006   price:58.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Web  lang:en   title:Learning XML   author:Erik T. Ray   year:2016   price:39.95</span></div></td></tr></tbody></table>

相关文章