| author | Stephen Paul Weber
<singpolyma@singpolyma.net> 2025-11-12 20:14:12 UTC |
| committer | Stephen Paul Weber
<singpolyma@singpolyma.net> 2025-11-12 20:14:12 UTC |
| parent | e6b81f145c0a9b59e919668eeac90e6386119a82 |
| borogove/Util.hx | +27 | -0 |
| borogove/XEP0393.hx | +16 | -6 |
diff --git a/borogove/Util.hx b/borogove/Util.hx index e276113..9ee1e10 100644 --- a/borogove/Util.hx +++ b/borogove/Util.hx @@ -167,4 +167,31 @@ class Util { final b = bytesOfString(s); o.writeBytes(b, 0, b.length); } + + /** + Convert a String index to a UnicodeString index + **/ + @:access(StringTools.utf16CodePointAt) + @:access(StringTools.MIN_SURROGATE_CODE_POINT) + static public function convertIndex(u: UnicodeString, index: Int) { + final s: String = u; + var unicodeOffset = 0; + var nativeOffset = 0; + while (nativeOffset < s.length) { + unicodeOffset++; + var c = StringTools.utf16CodePointAt(s, nativeOffset++); + if (nativeOffset == index) { + return unicodeOffset; + } + if (c >= StringTools.MIN_SURROGATE_CODE_POINT) { + nativeOffset++; + } + } + + if (nativeOffset == index) { + return unicodeOffset; + } + + throw "No matching index"; + } } diff --git a/borogove/XEP0393.hx b/borogove/XEP0393.hx index e8fc076..24c4910 100644 --- a/borogove/XEP0393.hx +++ b/borogove/XEP0393.hx @@ -2,6 +2,7 @@ package borogove; import borogove.Autolink; import borogove.Stanza; +using borogove.Util; class XEP0393 { public static function parse(styled: UnicodeString) { @@ -90,15 +91,15 @@ class XEP0393 { public static function parseSpans(styled: UnicodeString) { final spans = []; var start = 0; - var nextLink = null; + var nextLink: Null<{ span: Null<Node>, start: Int, end: Int }> = null; final styledLength = styled.length; while (start < styledLength) { final char = styled.charAt(start); - if (StringTools.isSpace(styled, start + 1)) { + if (isSpace(styled, start + 1)) { // The opening styling directive MUST NOT be followed by a whitespace character spans.push(CData(new TextNode(styled.substr(start, 2)))); start += 2; - } else if (start != 0 && !StringTools.isSpace(styled, start - 1)) { + } else if (start != 0 && !isSpace(styled, start - 1)) { // The opening styling directive MUST be located at the beginning of the parent block, after a whitespace character, or after a different opening styling directive. spans.push(CData(new TextNode(char))); start++; @@ -122,6 +123,10 @@ class XEP0393 { } else { if (nextLink == null || start > nextLink.start) { nextLink = Autolink.one(styled, start); + if (nextLink != null) { + nextLink.start = styled.convertIndex(nextLink.start); + nextLink.end = styled.convertIndex(nextLink.end); + } } if (nextLink != null && nextLink.start == start && nextLink.span != null) { spans.push(nextLink.span); @@ -135,10 +140,10 @@ class XEP0393 { return spans; } - public static function parseSpan(tagName: UnicodeString, marker: String, styled: String, start: Int) { + public static function parseSpan(tagName: String, marker: String, styled: UnicodeString, start: Int) { var end = start + 1; while (end < styled.length && styled.charAt(end) != marker) { - if (StringTools.isSpace(styled, end)) end++; // the closing styling directive MUST NOT be preceeded by a whitespace character + if (isSpace(styled, end)) end++; // the closing styling directive MUST NOT be preceeded by a whitespace character end++; } if (end == start + 1) { @@ -174,7 +179,7 @@ class XEP0393 { var end = 1; // Skip leading > var spaceAfter = 0; while (end < styled.length) { - if (styled.charAt(end) != "\n" && StringTools.isSpace(styled, end)) end++; + if (styled.charAt(end) != "\n" && isSpace(styled, end)) end++; while (end < styled.length && styled.charAt(end) != "\n") { line += styled.charAt(end); end++; @@ -218,4 +223,9 @@ class XEP0393 { return { block: new Stanza("pre").text(lines.join("")), rest: styled.substr(end) }; } + + private static function isSpace(s: UnicodeString, pos: Int) { + // The version in StringTools won't use UnicodeString-aware indices + return StringTools.isSpace(s.charAt(pos), 0); + } }