git » sdk » commit 3da8ec2

Fix unicode handling in XEP0393

author Stephen Paul Weber
2025-11-12 20:14:12 UTC
committer Stephen Paul Weber
2025-11-12 20:14:12 UTC
parent e6b81f145c0a9b59e919668eeac90e6386119a82

Fix unicode handling in XEP0393

StringTools.isSpace uses String indices, not UnicodeString indices.

Same goes with EReg. So fix the index afterwards to match UnicodeString.

borogove/Util.hx +27 -0
borogove/XEP0393.hx +16 -6

diff --git a/borogove/Util.hx b/borogove/Util.hx
index e276113..9ee1e10 100644
--- a/borogove/Util.hx
+++ b/borogove/Util.hx
@@ -167,4 +167,31 @@ class Util {
 		final b = bytesOfString(s);
 		o.writeBytes(b, 0, b.length);
 	}
+
+	/**
+		Convert a String index to a UnicodeString index
+	**/
+	@:access(StringTools.utf16CodePointAt)
+	@:access(StringTools.MIN_SURROGATE_CODE_POINT)
+	static public function convertIndex(u: UnicodeString, index: Int) {
+		final s: String = u;
+		var unicodeOffset = 0;
+		var nativeOffset = 0;
+		while (nativeOffset < s.length) {
+			unicodeOffset++;
+			var c = StringTools.utf16CodePointAt(s, nativeOffset++);
+			if (nativeOffset == index) {
+				return unicodeOffset;
+			}
+			if (c >= StringTools.MIN_SURROGATE_CODE_POINT) {
+				nativeOffset++;
+			}
+		}
+
+		if (nativeOffset == index) {
+			return unicodeOffset;
+		}
+
+		throw "No matching index";
+	}
 }
diff --git a/borogove/XEP0393.hx b/borogove/XEP0393.hx
index e8fc076..24c4910 100644
--- a/borogove/XEP0393.hx
+++ b/borogove/XEP0393.hx
@@ -2,6 +2,7 @@ package borogove;
 
 import borogove.Autolink;
 import borogove.Stanza;
+using borogove.Util;
 
 class XEP0393 {
 	public static function parse(styled: UnicodeString) {
@@ -90,15 +91,15 @@ class XEP0393 {
 	public static function parseSpans(styled: UnicodeString) {
 		final spans = [];
 		var start = 0;
-		var nextLink = null;
+		var nextLink: Null<{ span: Null<Node>, start: Int, end: Int }> = null;
 		final styledLength = styled.length;
 		while (start < styledLength) {
 			final char = styled.charAt(start);
-			if (StringTools.isSpace(styled, start + 1)) {
+			if (isSpace(styled, start + 1)) {
 				// The opening styling directive MUST NOT be followed by a whitespace character
 				spans.push(CData(new TextNode(styled.substr(start, 2))));
 				start += 2;
-			} else if (start != 0 && !StringTools.isSpace(styled, start - 1)) {
+			} else if (start != 0 && !isSpace(styled, start - 1)) {
 				// The opening styling directive MUST be located at the beginning of the parent block, after a whitespace character, or after a different opening styling directive.
 				spans.push(CData(new TextNode(char)));
 				start++;
@@ -122,6 +123,10 @@ class XEP0393 {
 			} else {
 				if (nextLink == null || start > nextLink.start) {
 					nextLink = Autolink.one(styled, start);
+					if (nextLink != null) {
+						nextLink.start = styled.convertIndex(nextLink.start);
+						nextLink.end = styled.convertIndex(nextLink.end);
+					}
 				}
 				if (nextLink != null && nextLink.start == start && nextLink.span != null) {
 					spans.push(nextLink.span);
@@ -135,10 +140,10 @@ class XEP0393 {
 		return spans;
 	}
 
-	public static function parseSpan(tagName: UnicodeString, marker: String, styled: String, start: Int) {
+	public static function parseSpan(tagName: String, marker: String, styled: UnicodeString, start: Int) {
 		var end = start + 1;
 		while (end < styled.length && styled.charAt(end) != marker) {
-			if (StringTools.isSpace(styled, end)) end++; // the closing styling directive MUST NOT be preceeded by a whitespace character
+			if (isSpace(styled, end)) end++; // the closing styling directive MUST NOT be preceeded by a whitespace character
 			end++;
 		}
 		if (end == start + 1) {
@@ -174,7 +179,7 @@ class XEP0393 {
 		var end = 1; // Skip leading >
 		var spaceAfter = 0;
 		while (end < styled.length) {
-			if (styled.charAt(end) != "\n" && StringTools.isSpace(styled, end)) end++;
+			if (styled.charAt(end) != "\n" && isSpace(styled, end)) end++;
 			while (end < styled.length && styled.charAt(end) != "\n") {
 				line += styled.charAt(end);
 				end++;
@@ -218,4 +223,9 @@ class XEP0393 {
 
 		return { block: new Stanza("pre").text(lines.join("")), rest: styled.substr(end) };
 	}
+
+	private static function isSpace(s: UnicodeString, pos: Int) {
+		// The version in StringTools won't use UnicodeString-aware indices
+		return StringTools.isSpace(s.charAt(pos), 0);
+	}
 }