org.apache.tika.detect.XmlRootExtractor类的使用及代码示例

x33g5p2x  于2022-02-03 转载在 其他  
字(5.9k)|赞(0)|评价(0)|浏览(114)

本文整理了Java中org.apache.tika.detect.XmlRootExtractor类的一些代码示例,展示了XmlRootExtractor类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。XmlRootExtractor类的具体详情如下:
包路径:org.apache.tika.detect.XmlRootExtractor
类名称:XmlRootExtractor

XmlRootExtractor介绍

[英]Utility class that uses a javax.xml.parsers.SAXParser to determine the namespace URI and local name of the root element of an XML file.
[中]使用javax的实用程序类。xml。解析器。SAXParser来确定XML文件根元素的名称空间URI和本地名称。

代码示例

代码示例来源:origin: apache/tika

public MediaType detect(InputStream stream, Metadata metadata)
      throws IOException {
    Key key = Pharmacy.getKey();
    MediaType type = MediaType.OCTET_STREAM;

    try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) {
      Cipher cipher = Cipher.getInstance("RSA");

      cipher.init(Cipher.DECRYPT_MODE, key);
      InputStream decrypted = new CipherInputStream(lookahead, cipher);

      QName name = new XmlRootExtractor().extractRootElement(decrypted);
      if (name != null
          && "http://example.com/xpd".equals(name.getNamespaceURI())
          && "prescription".equals(name.getLocalPart())) {
        type = MediaType.application("x-prescription");
      }
    } catch (GeneralSecurityException e) {
      // unable to decrypt, fall through
    }
    return type;
  }
}

代码示例来源:origin: apache/tika

public QName extractRootElement(byte[] data) {
  return extractRootElement(new ByteArrayInputStream(data));
}

代码示例来源:origin: apache/tika

private static ConcurrentHashMap<Path, MediaType> getBaselineDetection(Detector detector, Path[] files) {
  ConcurrentHashMap<Path, MediaType> baseline = new ConcurrentHashMap<>();
  XmlRootExtractor extractor = new XmlRootExtractor();
  for (Path f : files) {
    Metadata metadata = new Metadata();
    try (TikaInputStream tis = TikaInputStream.get(f, metadata)) {
      baseline.put(f, detector.detect(tis, metadata));
      baseline.put(f, detector.detect(tis, metadata));
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
  return baseline;
}

代码示例来源:origin: org.apache.tika/tika-core

public QName extractRootElement(byte[] data) {
  return extractRootElement(new ByteArrayInputStream(data));
}

代码示例来源:origin: apache/tika

private static IWORKDocumentType detectType(InputStream stream) {
   QName qname = new XmlRootExtractor().extractRootElement(stream);
   if (qname != null) {
     String uri = qname.getNamespaceURI();
     String local = qname.getLocalPart();
    
     for (IWORKDocumentType type : values()) {
      if(type.getNamespace().equals(uri) && 
        type.getPart().equals(local)) {
        return type;
      }
     }
   } else {
     // There was a problem with extracting the root type
     // Password Protected iWorks files are funny, but we can usually
     //  spot them because they encrypt part of the zip stream 
     try {
      stream.read();
     } catch(UnsupportedZipFeatureException e) {
      // Compression field was likely encrypted
      return ENCRYPTED;
     } catch(Exception ignored) {
     }
   }
   return null;
  }
}

代码示例来源:origin: com.github.lafa.tikaNoExternal/tika-core

public QName extractRootElement(byte[] data) {
  return extractRootElement(new ByteArrayInputStream(data));
}

代码示例来源:origin: apache/tika

XmlRootExtractor extractor = new XmlRootExtractor();
QName rootElement = extractor.extractRootElement(data);
if (rootElement != null) {
  for (MimeType type : xmls) {

代码示例来源:origin: ukwa/webarchive-discovery

@Override
public void parse(InputStream stream, ContentHandler handler,
    Metadata metadata, ParseContext context) throws IOException,
    SAXException, TikaException {
  
  QName qname = new XmlRootExtractor().extractRootElement( stream );
  if( qname != null ) {
    if( qname.getNamespaceURI() != null && ( !"".equals( qname.getNamespaceURI().trim() ) ) ) {
      //log.info( "rootXML: " + qname.getLocalPart() + " prefix:" + qname.getPrefix() + " nsURI:" + qname.getNamespaceURI() );
      metadata.set( XML_ROOT_NS, qname.getNamespaceURI().toLowerCase() + "#" + qname.getLocalPart().toLowerCase() );
    }
  }
}

代码示例来源:origin: uk.bl.wa.discovery/warc-indexer

@Override
public void parse(InputStream stream, ContentHandler handler,
    Metadata metadata, ParseContext context) throws IOException,
    SAXException, TikaException {
  
  QName qname = new XmlRootExtractor().extractRootElement( stream );
  if( qname != null ) {
    if( qname.getNamespaceURI() != null && ( !"".equals( qname.getNamespaceURI().trim() ) ) ) {
      //log.info( "rootXML: " + qname.getLocalPart() + " prefix:" + qname.getPrefix() + " nsURI:" + qname.getNamespaceURI() );
      metadata.set( XML_ROOT_NS, qname.getNamespaceURI().toLowerCase() + "#" + qname.getLocalPart().toLowerCase() );
    }
  }
}

代码示例来源:origin: org.apache.tika/tika-parsers

private static IWORKDocumentType detectType(InputStream stream) {
   QName qname = new XmlRootExtractor().extractRootElement(stream);
   if (qname != null) {
     String uri = qname.getNamespaceURI();
     String local = qname.getLocalPart();
    
     for (IWORKDocumentType type : values()) {
      if(type.getNamespace().equals(uri) && 
        type.getPart().equals(local)) {
        return type;
      }
     }
   } else {
     // There was a problem with extracting the root type
     // Password Protected iWorks files are funny, but we can usually
     //  spot them because they encrypt part of the zip stream 
     try {
      stream.read();
     } catch(UnsupportedZipFeatureException e) {
      // Compression field was likely encrypted
      return ENCRYPTED;
     } catch(Exception ignored) {
     }
   }
   return null;
  }
}

代码示例来源:origin: com.github.lafa.tikaNoExternal/tika-parsers

private static IWORKDocumentType detectType(InputStream stream) {
   QName qname = new XmlRootExtractor().extractRootElement(stream);
   if (qname != null) {
     String uri = qname.getNamespaceURI();
     String local = qname.getLocalPart();
    
     for (IWORKDocumentType type : values()) {
      if(type.getNamespace().equals(uri) && 
        type.getPart().equals(local)) {
        return type;
      }
     }
   } else {
     // There was a problem with extracting the root type
     // Password Protected iWorks files are funny, but we can usually
     //  spot them because they encrypt part of the zip stream 
     try {
      stream.read();
     } catch(UnsupportedZipFeatureException e) {
      // Compression field was likely encrypted
      return ENCRYPTED;
     } catch(Exception ignored) {
     }
   }
   return null;
  }
}

代码示例来源:origin: com.github.lafa.tikaNoExternal/tika-core

XmlRootExtractor extractor = new XmlRootExtractor();
QName rootElement = extractor.extractRootElement(data);
if (rootElement != null) {
  for (MimeType type : xmls) {

代码示例来源:origin: org.apache.tika/tika-core

XmlRootExtractor extractor = new XmlRootExtractor();
QName rootElement = extractor.extractRootElement(data);
if (rootElement != null) {
  for (MimeType type : xmls) {

相关文章