This is the equivalent Unicode char, nothing new here.
Ok.
Private Function UnicodeEscapedStringToUTF8(escapedString As String) As String
// Convert the output of a MacOS "defaults read whatever" to proper UTF-8
// Unicode chars are encoded as \uhhhh \Uhhhhhhhh and \ooo where h/o are the codepoint
// h is a hex digit, and o an octal.
Const escapeDoubleBackslashes As String = &uFFF9+"\"+&uFFFb
Var UtfString As String = escapedString.ReplaceAll("\\", escapeDoubleBackslashes)
Var re As new RegEx
Var match As RegExMatch
re.SearchPattern = "\\U[0-9a-fA-F]{8,8}" // 32 bit Unicode (hex)
match = re.Search(UtfString)
Do until match = Nil
Var found, code As String
found = match.SubExpressionString(0)
code = Text.FromUnicodeCodepoint(Integer.FromHex(found.Right(8)))
UtfString = UtfString.Replace(found, code)
match = re.Search(UtfString)
Loop
re.SearchPattern = "\\u[0-9a-fA-F]{4,4}" // 16 bit Unicode (hex)
match = re.Search(UtfString)
Do until match = Nil
Var found, code As String
found = match.SubExpressionString(0)
Try
code = Text.FromUnicodeCodepoint(Integer.FromHex(found.Right(4)))
Catch
code = &uFFFD // unknown codepoint
End
UtfString = UtfString.Replace(found, code)
match = re.Search(UtfString)
Loop
re.SearchPattern = "\\[0-7]{3,3}" // 8 bit Unicode (octal)
match = re.Search(UtfString)
Do until match = Nil
Var found, code As String
found = match.SubExpressionString(0)
Try
code = Text.FromUnicodeCodepoint(Integer.FromOctal(found.Right(3)))
Catch
code = &uFFFD // unknown codepoint
End
UtfString = UtfString.Replace(found, code)
match = re.Search(UtfString)
Loop
Return UtfString.ReplaceAll(escapeDoubleBackslashes, "\")
End Function
Wahooo!!!
Thank you very much. It was really too complex for me and I would never have done it alone. RESPECT!
1 Like