How convert string to Unicode?

This is the equivalent Unicode char, nothing new here.

Ok.

Private Function UnicodeEscapedStringToUTF8(escapedString As String) As String
  
  // Convert the output of a MacOS "defaults read whatever" to proper UTF-8
  // Unicode chars are encoded as \uhhhh \Uhhhhhhhh and \ooo where h/o are the codepoint
  // h is a hex digit, and o an octal.
  
  Const escapeDoubleBackslashes As String = &uFFF9+"\"+&uFFFb
  
  Var UtfString  As String = escapedString.ReplaceAll("\\", escapeDoubleBackslashes)
  
  Var re As new RegEx
  Var match As RegExMatch
  
  re.SearchPattern = "\\U[0-9a-fA-F]{8,8}" // 32 bit Unicode (hex)
  
  match = re.Search(UtfString)
  
  Do until match = Nil
    Var found, code As String
    found = match.SubExpressionString(0)
    code = Text.FromUnicodeCodepoint(Integer.FromHex(found.Right(8)))
    UtfString = UtfString.Replace(found, code)
    match = re.Search(UtfString)
  Loop
  
  re.SearchPattern = "\\u[0-9a-fA-F]{4,4}" // 16 bit Unicode (hex)
  
  match = re.Search(UtfString)
  
  Do until match = Nil
    Var found, code As String
    found = match.SubExpressionString(0)
    Try
      code = Text.FromUnicodeCodepoint(Integer.FromHex(found.Right(4)))
    Catch
      code = &uFFFD // unknown codepoint
    End
    UtfString = UtfString.Replace(found, code)
    match = re.Search(UtfString)
  Loop
  
  re.SearchPattern = "\\[0-7]{3,3}" // 8 bit Unicode (octal)
  
  match = re.Search(UtfString)
  
  Do until match = Nil
    Var found, code As String
    found = match.SubExpressionString(0)
    Try
      code = Text.FromUnicodeCodepoint(Integer.FromOctal(found.Right(3)))
    Catch
      code = &uFFFD // unknown codepoint
    End
    UtfString = UtfString.Replace(found, code)
    match = re.Search(UtfString)
  Loop
  
  Return UtfString.ReplaceAll(escapeDoubleBackslashes, "\")
  
End Function

Wahooo!!!
Thank you very much. It was really too complex for me and I would never have done it alone. RESPECT!

1 Like