This question is a followup to Does this C code advance by 32 bits at a time or only 8 bits?
This time I’m posting the complete code of both routines as my previous questions have frequently been referred to as not having enough code present.
The error is (Apparently) in FromPuntcode’s calculation of the variable “n”. As far as I can tell all of my functions are functionally identical to their C counterparts but obviously there is a mistake somewhere as my code does not calculate the correct results.
The complete original C code
#define punycode_uint uint32_t
#define punycode_success IDN2_OK
#define punycode_overflow IDN2_PUNYCODE_OVERFLOW
#define punycode_big_output IDN2_PUNYCODE_BIG_OUTPUT
#define punycode_bad_input IDN2_PUNYCODE_BAD_INPUT
/**********************************************************/
/* Implementation (would normally go in its own .c file): */
#include <stdio.h>
#include <string.h>
int punycode_decode (size_t input_length,
const char input[],
size_t * output_length,
uint32_t output[]
);
/*** Bootstring parameters for Punycode ***/
enum { base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
initial_bias = 72, initial_n = 0x80, delimiter = 0x2D };
/* basic(cp) tests whether cp is a basic code point: */
#define basic(cp) ((cp >= 'a' && cp <= 'z') || (cp >= '0' && cp <='9') || (cp >= 'A' && cp <='Z') || cp == '-' || cp == '_')
/* decode_digit(cp) returns the numeric value of a basic code */
/* point (for use in representing integers) in the range 0 to */
/* base-1, or base if cp does not represent a value. */
static unsigned decode_digit(int cp)
{
if (cp >= 'a' && cp <= 'z')
return cp - 'a';
if (cp >= '0' && cp <= '9')
return cp - '0' + 26;
if (cp >= 'A' && cp <= 'Z')
return cp - 'A';
return 0;
}
/*** Platform-specific constants ***/
/* maxint is the maximum value of a punycode_uint variable: */
static const punycode_uint maxint = -1;
/* Because maxint is unsigned, -1 becomes the maximum value. */
/*** Bias adaptation function ***/
static punycode_uint adapt(
punycode_uint Delta, punycode_uint numpoints, int firsttime )
{
punycode_uint k;
Delta = firsttime ? Delta / damp : Delta >> 1;
/* Delta >> 1 is a faster way of doing Delta / 2 */
Delta += Delta / numpoints;
for (k = 0; Delta > ((base - tmin) * tmax) / 2; k += base) {
Delta /= base - tmin;
}
return k + (base - tmin + 1) * Delta / (Delta + skew);
}
/*** Main decode function ***/
int punycode_decode(
size_t input_length,
const char input[],
size_t *output_length,
punycode_uint output[])
{
punycode_uint n, out = 0, i, max_out, bias, oldi, w, k, digit, t;
size_t b = 0, j, in;
if (!input_length) return punycode_bad_input;
/* Check that all chars are basic */
for (j = 0; j < input_length; ++j) {
if (!basic(input[j])) return punycode_bad_input;
if (input[j] == delimiter) b = j;
}
max_out = *output_length > maxint ? maxint : (punycode_uint) *output_length;
if (input[b] == delimiter) {
/* do not accept leading or trailing delimiter
* - leading delim must be omitted if there is no ASCII char in u-label
* - trailing delim means there where no non-ASCII chars in u-label
*/
if (!b || b == input_length - 1) return punycode_bad_input;
if (b >= max_out) return punycode_big_output;
/* Check that all chars before last delimiter are basic chars */
/* and copy the first b code points to the output. */
for (j = 0; j < b; j++)
output[out++] = input[j];
b += 1; /* advance to non-basic char encoding */
}
/* Initialize the state: */
n = initial_n;
i = 0;
bias = initial_bias;
/* Main decoding loop: Start just after the last delimiter if any */
/* basic code points were copied; start at the beginning otherwise. */
for (in = b; in < input_length; ++out) {
/* in is the index of the next ASCII code point to be consumed, */
/* and out is the number of code points in the output array. */
/* Decode a generalized variable-length integer into Delta, */
/* which gets added to i. The overflow checking is easier */
/* if we increase i as we go, then subtract off its starting */
/* value at the end to obtain Delta. */
for (oldi = i, w = 1, k = base; ; k += base) {
if (in >= input_length) return punycode_bad_input;
digit = decode_digit(input[in++]);
if (digit >= base) return punycode_bad_input;
if (digit > (maxint - i) / w) return punycode_overflow;
i += digit * w;
t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
k >= bias + tmax ? tmax : k - bias;
if (digit < t) break;
if (w > maxint / (base - t)) return punycode_overflow;
w *= (base - t);
}
bias = adapt(i - oldi, out + 1, oldi == 0);
/* i was supposed to wrap around from out+1 to 0, */
/* incrementing n each time, so we'll fix that now: */
if (i / (out + 1) > maxint - n) return punycode_overflow;
n += i / (out + 1);
if (n > 0x10FFFF || (n >= 0xD800 && n <= 0xDBFF)) return punycode_bad_input;
i %= (out + 1);
/* Insert n at position i of the output: */
/* not needed for Punycode: */
/* if (basic(n)) return punycode_bad_input; */
if (out >= max_out) return punycode_big_output;
memmove(output + i + 1, output + i, (out - i) * sizeof *output);
output[i++] = n;
}
*output_length = (size_t) out;
/* cannot overflow because out <= old value of *output_length */
return punycode_success;
}
My (Attempted) XOJO translation
FromPunycode(punycode As Text, correctResult As Text, allowEmptyString As Boolean = False) As Text
If punycode = "" Then
If allowEmptyString Then
Return ""
Else
#If DebugBuild
Break
#EndIf
Raise New IllegalInputException
End If
End If
If MaxUInt32 = 0 Then
Initialize
End If
Var result As Text = ""
Var basicCount, bias, n, j, k, t, i, w, oldi, digit As UInt64
Var realLength As UInt32 = punycode.Length
Var inputLength As UInt32 = realLength - 1
For j = 0 To inputLength
Var ascii As UInt32 = Asc(punycode.Mid(j, 1))
if Not IsBasicCodepoint(ascii, True) Then
#If DebugBuild
Break
#EndIf
Raise New IllegalInputException
ElseIf ascii = DELIMITER Then
basicCount = j
End If
Next
Var outputCount As UInt32
If Asc(punycode.Mid(basicCount, 1)) = DELIMITER Then
If basicCount = 0 Or basicCount = realLength Then
#If DebugBuild
Break
#EndIf
Raise New IllegalInputException
Else
For j = 0 To basicCount - 1
result = result + punycode.Mid(j, 1)
Next
outputCount = basicCount
End If
End If
n = INITIAL_N
bias = INITIAL_BIAS
i = 0
Var inCount As UInt32 = basicCount
While (inCount < realLength)
oldi = i
k = BASE
w = 1
Do
If inCount > inputLength Then
#If DebugBuild
Break
#EndIf
Raise New IllegalInputException
End If
digit = DecodeDigit(punycode.Mid(inCount, 1))
inCount = inCount + 1
If digit >= BASE Then
#If DebugBuild
Break
#EndIf
Raise New IllegalInputException
ElseIf digit > ((MaxUInt32 - i) \ w) Then
#If DebugBuild
Break
#EndIf
Raise New OverflowException
End If
i = i + (digit * w)
If k <= bias Then
t = T_MINIMUM
ElseIf k >= (bias + T_MAXIMUM) Then
t = T_MAXIMUM
Else
t = k - bias
End If
If digit < t Then
Exit Do
End If
If w > (MaxUInt32 \ (BASE - t)) Then
#If DebugBuild
Break
#EndIf
Raise New OverflowException
End If
w = w * (BASE - t)
k = k + BASE
Loop
Var op1 As UInt32 = outputCount + 1
bias = AdaptForBias(i - oldi, op1, oldi = 0)
if ((i \ op1) > (MaxUInt32 - n)) Then
#If DebugBuild
Break
#EndIf
Raise New OverflowException
End If
n = n + (i \ op1)
If n > &h10FFFF Or (n > &hD7FF And n < &hDC00) Then
#If DebugBuild
Break
#EndIf
Raise New IllegalInputException
End If
i = i Mod op1
result = result + Encodings.UTF8.Chr(n).ToText
i = i + 1
#If DebugBuild
If result.Mid(outputCount, 1) = correctResult.Mid(outputCount, 1) Then
MessageBox("Good through character #" + Str(outputCount+1))
Else
MessageBox("Character #" + Str(outputCount+1) + " INCORRECT!")_
+ EndofLine + "Difference:" + Str(Asc(correctresult.Mid(outputCount, 1)) - Asc(result.Mid(outputCount, 1)))
Quit(-1)
End If
#EndIf
outputCount = outputCount + 1
Wend
Return result
Initialize
Var mb As Xojo.Core.MutableMemoryBlock = _
New Xojo.Core.MutableMemoryBlock(4)
mb.Int32Value(0) = -1
MaxUInt32 = mb.UInt32Value(0)
IsBasicCodepoint(codepoint As UInt32, decoding As Boolean) As Boolean
Var result As Boolean
If decoding Then
result = ((codepoint >= Asc("A") And _
codepoint <= Asc("Z")) Or _
(codepoint >= Asc("a") And _
codepoint <= Asc("z")) Or _
(codepoint >= Asc("0") And _
codepoint <= Asc("9")) Or _
codepoint = Asc("-") Or _
codepoint = Asc("_"))
Else
result = codepoint < 128
End If
Return result
AdaptForBias(dlt As UInt32, numberOfPoints As UInt32, firstCall As Boolean) As UInt32
Var result As UInt32
Var cntr As UInt32 = 0
If firstCall Then
dlt = dlt \ DAMP
Else
dlt = dlt \ 2
End If
dlt = dlt + (dlt \ numberOfPoints)
While dlt > ((BASE - T_MINIMUM) * T_MAXIMUM) \ 2
dlt = dlt \ (BASE - T_MINIMUM)
cntr = cntr + BASE
Wend
result = cntr + ((BASE - T_MINIMUM + 1) * dlt) \ (dlt + SKEW)
Return result
DecodeDigit(char As Text) As UInt32
Var result As UInt32
Var codepoint As UInt32 = Asc(char)
If (codepoint >= Asc("a") And codepoint <= Asc("z")) Then
result = codepoint - Asc("a")
Elseif (codepoint >= Asc("A") And codepoint <= Asc("Z")) Then
result = codepoint - Asc("A")
Elseif (codepoint >= Asc("0") And codepoint <= Asc("9")) Then
result = codepoint - Asc("0") + 26
Else
result = 0
End If
Return result
CONSTANTS
Private Const BASE as Number = 36
Private Const DAMP as Number = 700
Private Const DELIMITER as Number = &h2D
Private Const INITIAL_BIAS as Number = 72
Private Const INITIAL_N as Number = 128
Private Const SKEW as Number = 38
Private Const T_MAXIMUM as Number = 26
Private Const T_MINIMUM as Number = 1
The test values as translated to Xojo syntax The Constructor’s parameters are: length As UInt32, inputData() As UInt64, expected_output As Text
There are a total of 19 test cases. I’ve only supplied the first two.
Me.TestCases.Append(New Punycode_Data(_
17, Array(&h0644, &h064A, &h0647, &h0645, &h0627, &h0628, _
&h062A, &h0643, &h0644, &h0645, &h0648, &h0634, &h0639, _
&h0631, &h0628, &h064A, &h061F), “egbpdaj6bu4bxfgehfvwxn”))
Me.TestCases.Append(New Punycode_Data(_
9, Array(&h4ED6, &h4EEC, &h4E3A, &h4EC0, &h4E48, &h4E0D, _
&h8BF4, &h4E2D, &h6587), “ihqwcrb4cv8a8dqg056pqjye”))