Skip to content

Commit 3082a4f

Browse files
committed
In W3CDom, add an xmlns for undeclared attribute prefixes
Fixes #2087
1 parent 001752f commit 3082a4f

File tree

3 files changed

+61
-5
lines changed

3 files changed

+61
-5
lines changed

CHANGES.md

+3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
normalized. [#1496].(https://github.com/jhy/jsoup/issues/1496)
1616
* When serializing to XML, characters that are invalid in XML 1.0 should be removed (not
1717
encoded). [#1743](https://github.com/jhy/jsoup/issues/1743).
18+
* When converting a Document to the W3C DOM in `W3CDom`, an element with an attribute in an undeclared namespace now
19+
gets a declaration of `xmlns:prefix="undefined"`. This allows subsequent serializations to XML via `W3CDom.asString()`
20+
to succeed. [#2087](https://github.com/jhy/jsoup/issues/2087).
1821

1922
## 1.19.1 (2025-03-04)
2023

src/main/java/org/jsoup/helper/W3CDom.java

+27-5
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ public Node contextNode(Document wDoc) {
330330
}
331331

332332
/**
333-
* Serialize a W3C document to a String. The output format will be XML or HTML depending on the content of the doc.
333+
* Serialize a W3C document that was created by {@link #fromJsoup(org.jsoup.nodes.Element)} to a String.
334+
* The output format will be XML or HTML depending on the content of the doc.
334335
*
335336
* @param doc Document
336337
* @return Document as string
@@ -423,21 +424,42 @@ public void tail(org.jsoup.nodes.Node source, int depth) {
423424

424425
private void copyAttributes(org.jsoup.nodes.Node source, Element el) {
425426
for (Attribute attribute : source.attributes()) {
426-
// the W3C DOM has a different allowed set of characters than HTML5 (that Attribute.getValidKey return, partic does not allow ';'). So if we except when using HTML, go to more restricted XML
427427
try {
428428
String key = Attribute.getValidKey(attribute.getKey(), syntax);
429-
if (key != null) // null if couldn't be coerced to validity
429+
if (key != null) {
430430
el.setAttribute(key, attribute.getValue());
431+
addUndeclaredAttrNs(key, el);
432+
}
431433
} catch (DOMException e) {
432434
if (syntax != Syntax.xml) {
433435
String key = Attribute.getValidKey(attribute.getKey(), Syntax.xml);
434-
if (key != null)
435-
el.setAttribute(key, attribute.getValue()); // otherwise, will skip attribute
436+
if (key != null) {
437+
el.setAttribute(key, attribute.getValue());
438+
addUndeclaredAttrNs(key, el);
439+
}
436440
}
437441
}
438442
}
439443
}
440444

445+
/**
446+
Add a namespace declaration for an attribute with a prefix if it is not already present. Ensures that attributes
447+
with prefixes have the corresponding namespace declared, E.g. attribute "v-bind:foo" gets another attribute
448+
"xmlns:v-bind='undefined'. So that the asString() transformation pass is valid.
449+
*/
450+
private void addUndeclaredAttrNs(String attrKey, Element wEl) {
451+
if (!namespaceAware) return;
452+
int pos = attrKey.indexOf(':');
453+
if (pos > 0) {
454+
String prefix = attrKey.substring(0, pos);
455+
if (!namespacesStack.peek().containsKey(prefix)) {
456+
wEl.setAttribute("xmlns:" + prefix, undefinedNs);
457+
namespacesStack.peek().put(prefix, undefinedNs);
458+
}
459+
}
460+
}
461+
private static final String undefinedNs = "undefined";
462+
441463
/**
442464
* Finds any namespaces defined in this element. Returns any tag prefix.
443465
*/

src/test/java/org/jsoup/helper/W3CDomTest.java

+31
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,37 @@ void doesNotExpandEntities(Parser parser) {
488488
assertTrue(string.contains("&lol1;"));
489489
}
490490

491+
@Test void undeclaredAttrNamespaceAsString() {
492+
// https://github.com/jhy/jsoup/issues/2087
493+
W3CDom w3CDom = new W3CDom();
494+
String html = "<html><body><div v-bind:class='test'></div></body></html>";
495+
org.jsoup.nodes.Document jdoc = Jsoup.parse(html);
496+
org.w3c.dom.Document w3CDoc = w3CDom.fromJsoup(jdoc);
497+
498+
String xml = w3CDom.asString(w3CDoc);
499+
assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?><html xmlns=\"http://www.w3.org/1999/xhtml\"><head/><body><div xmlns:v-bind=\"undefined\" v-bind:class=\"test\"/></body></html>", xml);
500+
}
501+
502+
@Test void declaredNamespaceIsUsed() {
503+
W3CDom w3CDom = new W3CDom();
504+
String html = "<html xmlns:v-bind=\"http://example.com\"><body><div v-bind:class='test'></div></body></html>";
505+
org.jsoup.nodes.Document jdoc = Jsoup.parse(html);
506+
org.w3c.dom.Document w3CDoc = w3CDom.fromJsoup(jdoc);
507+
508+
String xml = w3CDom.asString(w3CDoc);
509+
assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?><html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:v-bind=\"http://example.com\"><head/><body><div v-bind:class=\"test\"/></body></html>", xml);
510+
}
511+
512+
@Test void nestedElementsWithUndeclaredNamespace() {
513+
W3CDom w3CDom = new W3CDom();
514+
String html = "<html><body><div v-bind:class='test'><span v-bind:style='color:red'></span></div></body></html>";
515+
org.jsoup.nodes.Document jdoc = Jsoup.parse(html);
516+
org.w3c.dom.Document w3CDoc = w3CDom.fromJsoup(jdoc);
517+
518+
String xml = w3CDom.asString(w3CDoc);
519+
assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?><html xmlns=\"http://www.w3.org/1999/xhtml\"><head/><body><div xmlns:v-bind=\"undefined\" v-bind:class=\"test\"><span v-bind:style=\"color:red\"/></div></body></html>", xml);
520+
}
521+
491522
private static Stream<Arguments> parserProvider() {
492523
return Stream.of(
493524
Arguments.of(Parser.htmlParser()),

0 commit comments

Comments
 (0)