[英]Whitelists define what HTML (elements and attributes) to allow through the cleaner. Everything else is removed.
Start with one of the defaults:
If you need to allow more through (please be careful!), tweak a base whitelist with:
You can remove any setting from an existing whitelist with:
The cleaner and these whitelists assume that you want to clean a body
fragment of HTML (to add user supplied HTML into a templated page), and not to clean a full HTML document. If the latter is the case, either wrap the document HTML around the cleaned body HTML, or create a whitelist that allows html
and head
elements as appropriate.
If you are going to extend a whitelist, please be very careful. Make sure you understand what attributes may lead to XSS attack vectors. URL attributes are particularly vulnerable and require careful validation. See http://ha.ckers.org/xss.html for some XSS attack examples.
代码示例来源:origin: k9mail/k-9
public static String extractText(String html) {
return Jsoup.clean(html, Whitelist.none());
代码示例来源:origin: k9mail/k-9
HtmlSanitizer() {
Whitelist whitelist = Whitelist.relaxed()
.addTags("font", "hr", "ins", "del", "center", "map", "area")
.addAttributes("font", "color", "face", "size")
.addAttributes("table", "align", "background", "bgcolor", "border", "cellpadding", "cellspacing",
.addAttributes("tr", "align", "background", "bgcolor", "valign")
"align", "background", "bgcolor", "colspan", "headers", "height", "nowrap", "rowspan", "scope",
"sorted", "valign", "width")
"align", "background", "bgcolor", "colspan", "headers", "height", "nowrap", "rowspan", "scope",
"valign", "width")
.addAttributes("map", "name")
.addAttributes("area", "shape", "coords", "href", "alt")
.addProtocols("area", "href", "http", "https")
.addAttributes("img", "usemap")
.addAttributes(":all", "class", "style", "id", "dir")
.addProtocols("img", "src", "http", "https", "cid", "data")
.addProtocols("a", "href", "tel", "sip", "bitcoin", "ethereum", "rtsp");
cleaner = new Cleaner(whitelist);
headCleaner = new HeadCleaner();
代码示例来源:origin: org.jsoup/jsoup
This whitelist allows the same text tags as {@link #basic}, and also allows <code>img</code> tags, with appropriate
attributes, with <code>src</code> pointing to <code>http</code> or <code>https</code>.
@return whitelist
public static Whitelist basicWithImages() {
return basic()
.addAttributes("img", "align", "alt", "height", "src", "title", "width")
.addProtocols("img", "src", "http", "https")
代码示例来源:origin: org.jsoup/jsoup
This whitelist allows only simple text formatting: <code>b, em, i, strong, u</code>. All other HTML (tags and
attributes) will be removed.
@return whitelist
public static Whitelist simpleText() {
return new Whitelist()
.addTags("b", "em", "i", "strong", "u")
代码示例来源:origin: org.jsoup/jsoup
This whitelist allows a fuller range of text nodes: <code>a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li,
ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul</code>, and appropriate attributes.
Links (<code>a</code> elements) can point to <code>http, https, ftp, mailto</code>, and have an enforced
<code>rel=nofollow</code> attribute.
Does not allow images.
@return whitelist
public static Whitelist basic() {
return new Whitelist()
"a", "b", "blockquote", "br", "cite", "code", "dd", "dl", "dt", "em",
"i", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong", "sub",
"sup", "u", "ul")
.addAttributes("a", "href")
.addAttributes("blockquote", "cite")
.addAttributes("q", "cite")
.addProtocols("a", "href", "ftp", "http", "https", "mailto")
.addProtocols("blockquote", "cite", "http", "https")
.addProtocols("cite", "cite", "http", "https")
.addEnforcedAttribute("a", "rel", "nofollow")
代码示例来源:origin: br.com.anteros/Anteros-Bean-Validation
public void initialize(SafeHtml safeHtmlAnnotation) {
switch ( safeHtmlAnnotation.whitelistType() ) {
case BASIC:
whitelist = Whitelist.basic();
whitelist = Whitelist.basicWithImages();
case NONE:
whitelist = Whitelist.none();
whitelist = Whitelist.relaxed();
whitelist = Whitelist.simpleText();
whitelist.addTags( safeHtmlAnnotation.additionalTags() );
for ( SafeHtml.Tag tag : safeHtmlAnnotation.additionalTagsWithAttributes() ) {
whitelist.addAttributes( tag.name(), tag.attributes() );
代码示例来源:origin: zhangyd-c/OneBlog
* 自定义的白名单
* @return
private static Whitelist custome() {
return Whitelist.none().addTags("p", "strong", "pre", "code", "span", "blockquote", "br").addAttributes("span", "class");
代码示例来源:origin: tomoya92/pybbs
content = Jsoup.clean(content, Whitelist.relaxed().addTags("code", "pre").addAttributes("code", "class"));
Document parse = Jsoup.parse(content);
Elements tableElements = parse.select("table");
代码示例来源:origin: com.eduworks/ew.levr.base
wl = Whitelist.none();
wl = Whitelist.basic();
for (int i = 0; i < allowAttributes.length(); i++) {
JSONObject attribute = allowAttributes.getJSONObject(i);
wl.addAttributes(attribute.getString("element"), attribute.getString("attribute"));
代码示例来源:origin: org.apache.myfaces.tobago/tobago-core
public void setProperties(final Properties configuration) {
unmodifiable = true;
for (final String key : configuration.stringPropertyNames()) {
if ("whitelist".equals(key)) {
whitelistName = configuration.getProperty(key);
if ("basic".equals(whitelistName)) {
whitelist = Whitelist.basic();
} else if ("basicWithImages".equals(whitelistName)) {
whitelist = Whitelist.basicWithImages();
} else if ("none".equals(whitelistName)) {
whitelist = Whitelist.none();
} else if ("relaxed".equals(whitelistName)) {
whitelist = Whitelist.relaxed();
} else if ("simpleText".equals(whitelistName)) {
whitelist = Whitelist.simpleText();
} else {
throw new TobagoConfigurationException(
"Unknown configuration value for 'whitelist' in tobago-config.xml found! value='" + whitelistName + "'");
} else {
throw new TobagoConfigurationException(
"Unknown configuration key in tobago-config.xml found! key='" + key + "'");
if (LOG.isInfoEnabled()) {
LOG.warn("Using whitelist '" + whitelistName + "' for sanitizing!");
代码示例来源:origin: FINRAOS/herd
Whitelist whitelist = new Whitelist();
whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class");
代码示例来源:origin: viritin/viritin
public Whitelist getWhitelist() {
if (whitelist == null) {
return Whitelist.relaxed();
return whitelist;
代码示例来源:origin: stackoverflow.com
String cleanXmlAndRemoveUnwantedTags(String textToEscape) {
Whitelist whitelist = Whitelist.none();
OutputSettings outputSettings = new OutputSettings()
String safe = Jsoup.clean(textToEscape, "", whitelist, outputSettings);
return safe;
代码示例来源:origin: IQSS/dataverse
* Wrapper around Jsoup clean method with the basic White list
* http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
* @param unsafe
* @return
public static String sanitizeBasicHTML(String unsafe){
if (unsafe == null){
return null;
// basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
//Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");
Whitelist wl = Whitelist.basicWithImages().addTags( "h1", "h2", "h3", "kbd", "hr", "s", "del","map","area").addAttributes("img", "usemap")
.addAttributes("map", "name").addAttributes("area", "shape","coords","href","title","alt")
.addEnforcedAttribute("a", "target", "_blank");
return Jsoup.clean(unsafe, wl);
代码示例来源:origin: jungilhan/awesome-blogs-android
Whitelist whitelist = Whitelist.basicWithImages()
"h1", "h2", "h3", "h4", "h5", "h6",
"table", "tbody", "td", "tfoot", "th", "thead", "tr",
"span", "font",
"del", "strike", "s")
.addAttributes("th", "colspan", "align", "style")
.addAttributes("td", "colspan", "align", "style")
.addAttributes(":all", "title", "style");
if(options.preserveRelativeLinks) {
whitelist.addTags("abbr", "acronym");
whitelist.addAttributes("h"+i, "id");
if(!el.getAttributes().isEmpty()) {
whitelist.addAttributes(el.getTagName(), el.getAttributes().toArray(new String[el.getAttributes().size()]));
代码示例来源:origin: tomoya92/pybbs
public Topic insertTopic(String title, String content, String tags, User user, HttpSession session) {
Topic topic = new Topic();
topic.setTitle(Jsoup.clean(title, Whitelist.simpleText()));
topic.setInTime(new Date());
// 增加用户积分
user.setScore(user.getScore() + Integer.parseInt(systemConfigService.selectAllConfig().get("create_topic_score").toString()));
if (session != null) session.setAttribute("_user", user);
// 保存标签
List<Tag> tagList = tagService.insertTag(Jsoup.clean(tags, Whitelist.none()));
// 处理标签与话题的关联
topicTagService.insertTopicTag(topic.getId(), tagList);
// 索引话题
indexTopic(String.valueOf(topic.getId()), topic.getTitle(), topic.getContent());
return topic;
代码示例来源:origin: lucene4ir/lucene4ir
public TRECAquaintDocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
super(indexPath, tokenFilterFile, positional);
try {
whiteList = Whitelist.relaxed();
} catch (Exception e){
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
doc = new Document();
代码示例来源:origin: mkalus/segrada
public String toPlain(String markupText) {
// sane default
if (markupText == null || markupText.equals("")) return "";
// first clean to have valid html
String cleaned = Jsoup.clean(markupText, Whitelist.basic());
// then strip all html out
cleaned = Jsoup.clean(cleaned, Whitelist.none());
// unescape all entities
cleaned = Parser.unescapeEntities(cleaned, false);
// clean further
return super.toPlain(cleaned);
代码示例来源:origin: ManyDesigns/Portofino
* Returns the JSoup whitelist used to clean user-provided HTML in rich-text fields.
* @return the default implementation returns the "basic" whitelist ({@link Whitelist#basic()}).
protected Whitelist getWhitelist() {
return Whitelist.basic();
代码示例来源:origin: info.magnolia/magnolia-module-rssaggregator
* Format and sanitize description input.
* @param origDescription original description as delivered by feed fetcher.
* @param abbreviation gives the amount of characters used for abbreviation.
* @return sanitized HTML output string.
public static String formatDescription(String origDescription, Integer abbreviation) {
// replace Java linebreaks with HTML
//origDescription = StringUtils.replace(origDescription, "\n", "<br/>");
// check if abbreviation is wanted
if (abbreviation != null && abbreviation > 0) {
origDescription = StringUtils.abbreviate(origDescription, abbreviation);
// Sanitize HTML input
Whitelist whitelist = Whitelist.basicWithImages();
whitelist.addTags("h1", "h2", "h3", "h4", "h5", "h6", "div");
// table tags
whitelist.addTags("table", "tbody", "td", "tfoot", "th", "thead", "tr");
return Jsoup.clean(origDescription, whitelist);