diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-09-10 17:16:52 -0400 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-09-10 17:16:52 -0400 |
commit | 1d82e3affd42c2336702af4a644baa8eec249ead (patch) | |
tree | aeea397863a7d2014bd960a9bd3ba090f841bcce /source/encoding.c | |
parent | 0882d1020d75bbddc8e8fbe30aed435e8814988a (diff) | |
download | gen3uploader-1d82e3affd42c2336702af4a644baa8eec249ead.tar.gz gen3uploader-1d82e3affd42c2336702af4a644baa8eec249ead.tar.bz2 gen3uploader-1d82e3affd42c2336702af4a644baa8eec249ead.zip |
Increased stability and added support for non-English names
The GameCube side of the program now can convert from the propietary character set to UTF-8. This is useful for representing names of Pokémon and players in a neutral way. The propietary character set is mostly compatible between the six languages supported by the games (as in, the hiragana and katakana characters unique to Japanese occupy spaces not used by the other languages for names, as do the letters with umlauts unique to German). However, six codepoints differ between the Japanese and non-Japanese character sets, and an additional two differ even amongst the non-Japanese sets. Because of this, the function that converts to UTF-8 takes a language as a parameter, and uses the correct characters for that language. From there, the behavior of this function differs slightly to that of the games. In the non-Japanese games, the Japanese encoding is used if the Pokémon in question originated in a Japanese game, and the non-Japanese encoding (disregarding the regional differences in the two codepoints mentioned earlier) otherwise. In the Japanese games, the Japanese encoding is used regardless of the Pokémon's origin. The decoding function I wrote always uses the character set corresponding to the language of the Pokémon's origin, because that most accurately represents the name given to it, and will not change just because the Pokémon was traded to a different game. The character set used for the name of the player is the one corresponding to the language of the cartridge. Additionally, a number of changes were made to the communication protocol between the GameCube and the GBA that appear to have dramatically increased stability. The most significant of these is likely that the transfer delay was increased tenfold. This causes the multiboot image to take slightly longer to download to the GBA, but the difference is not large enough to outweigh the benefits of the increased stability.
Diffstat (limited to 'source/encoding.c')
-rw-r--r-- | source/encoding.c | 398 |
1 files changed, 390 insertions, 8 deletions
diff --git a/source/encoding.c b/source/encoding.c index 0be1e0b..0a44800 100644 --- a/source/encoding.c +++ b/source/encoding.c | |||
@@ -9,15 +9,397 @@ | |||
9 | */ | 9 | */ |
10 | #include "encoding.h" | 10 | #include "encoding.h" |
11 | 11 | ||
12 | const char charmap[] = { | 12 | const char* charmap[] = { |
13 | ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', '-', ' ', | 13 | " ", // space |
14 | ' ', '\"', '\"', '\'', '\'', '*', '*', ' ', ',', ' ', '/', 'A', 'B', 'C', 'D', 'E', | 14 | "\xe3\x81\x82", // hiragana letter a |
15 | 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', | 15 | "\xe3\x81\x84", // hiragana letter i |
16 | 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', | 16 | "\xe3\x81\x86", // hiragana letter u |
17 | 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ' | 17 | "\xe3\x81\x88", // hiragana letter e |
18 | "\xe3\x81\x8a", // hiragana letter o | ||
19 | "\xe3\x81\x8b", // hiragana letter ka | ||
20 | "\xe3\x81\x8d", // hiragana letter ki | ||
21 | "\xe3\x81\x8f", // hiragana letter ku | ||
22 | "\xe3\x81\x91", // hiragana letter ke | ||
23 | "\xe3\x81\x93", // hiragana letter ko | ||
24 | "\xe3\x81\x95", // hiragana letter sa | ||
25 | "\xe3\x81\x97", // hiragana letter si | ||
26 | "\xe3\x81\x99", // hiragana letter su | ||
27 | "\xe3\x81\x9b", // hiragana letter se | ||
28 | "\xe3\x81\x9d", // hiragana letter so | ||
29 | "\xe3\x81\x9f", // hiragana letter ta | ||
30 | "\xe3\x81\xa1", // hiragana letter ti | ||
31 | "\xe3\x81\xa4", // hiragana letter tu | ||
32 | "\xe3\x81\xa6", // hiragana letter te | ||
33 | "\xe3\x81\xa8", // hiragana letter to | ||
34 | "\xe3\x81\xaa", // hiragana letter na | ||
35 | "\xe3\x81\xab", // hiragana letter ni | ||
36 | "\xe3\x81\xac", // hiragana letter nu | ||
37 | "\xe3\x81\xad", // hiragana letter ne | ||
38 | "\xe3\x81\xae", // hiragana letter no | ||
39 | "\xe3\x81\xaf", // hiragana letter ha | ||
40 | "\xe3\x81\xb2", // hiragana letter hi | ||
41 | "\xe3\x81\xb5", // hiragana letter hu | ||
42 | "\xe3\x81\xb8", // hiragana letter he | ||
43 | "\xe3\x81\xbb", // hiragana letter ho | ||
44 | "\xe3\x81\xbe", // hiragana letter ma | ||
45 | "\xe3\x81\xbf", // hiragana letter mi | ||
46 | "\xe3\x82\x80", // hiragana letter mu | ||
47 | "\xe3\x82\x81", // hiragana letter me | ||
48 | "\xe3\x82\x82", // hiragana letter mo | ||
49 | "\xe3\x82\x84", // hiragana letter ya | ||
50 | "\xe3\x82\x86", // hiragana letter yu | ||
51 | "\xe3\x82\x88", // hiragana letter yo | ||
52 | "\xe3\x82\x89", // hiragana letter ra | ||
53 | "\xe3\x82\x8a", // hiragana letter ri | ||
54 | "\xe3\x82\x8b", // hiragana letter ru | ||
55 | "\xe3\x82\x8c", // hiragana letter re | ||
56 | "\xe3\x82\x8d", // hiragana letter ro | ||
57 | "\xe3\x82\x8f", // hiragana letter wa | ||
58 | "\xe3\x82\x92", // hiragana letter wo | ||
59 | "\xe3\x82\x93", // hiragana letter n | ||
60 | "\xe3\x81\x81", // hiragana letter small a | ||
61 | "\xe3\x81\x83", // hiragana letter small i | ||
62 | "\xe3\x81\x85", // hiragana letter small u | ||
63 | "\xe3\x81\x87", // hiragana letter small e | ||
64 | "\xe3\x81\x89", // hiragana letter small o | ||
65 | "\xe3\x82\x83", // hiragana letter small ya | ||
66 | "\xe3\x82\x85", // hiragana letter small yu | ||
67 | "\xe3\x82\x87", // hiragana letter small yo | ||
68 | "\xe3\x81\x8c", // hiragana letter ga | ||
69 | "\xe3\x81\x8e", // hiragana letter gi | ||
70 | "\xe3\x81\x90", // hiragana letter gu | ||
71 | "\xe3\x81\x92", // hiragana letter ge | ||
72 | "\xe3\x81\x94", // hiragana letter go | ||
73 | "\xe3\x81\x96", // hiragana letter za | ||
74 | "\xe3\x81\x98", // hiragana letter zi | ||
75 | "\xe3\x81\x9a", // hiragana letter zu | ||
76 | "\xe3\x81\x9c", // hiragana letter ze | ||
77 | "\xe3\x81\x9e", // hiragana letter zo | ||
78 | "\xe3\x81\xa0", // hiragana letter da | ||
79 | "\xe3\x81\xa2", // hiragana letter di | ||
80 | "\xe3\x81\xa5", // hiragana letter du | ||
81 | "\xe3\x81\xa7", // hiragana letter de | ||
82 | "\xe3\x81\xa9", // hiragana letter do | ||
83 | "\xe3\x81\xb0", // hiragana letter ba | ||
84 | "\xe3\x81\xb3", // hiragana letter bi | ||
85 | "\xe3\x81\xb6", // hiragana letter bu | ||
86 | "\xe3\x81\xb9", // hiragana letter be | ||
87 | "\xe3\x81\xbc", // hiragana letter bo | ||
88 | "\xe3\x81\xb1", // hiragana letter pa | ||
89 | "\xe3\x81\xb4", // hiragana letter pi | ||
90 | "\xe3\x81\xb7", // hiragana letter pu | ||
91 | "\xe3\x81\xba", // hiragana letter pe | ||
92 | "\xe3\x81\xbd", // hiragana letter po | ||
93 | "\xe3\x81\xa3", // hiragana letter small tu | ||
94 | "\xe3\x82\xa2", // katakana letter a | ||
95 | "\xe3\x82\xa4", // katakana letter i | ||
96 | "\xe3\x82\xa6", // katakana letter u | ||
97 | "\xe3\x82\xa8", // katakana letter e | ||
98 | "\xe3\x82\xaa", // katakana letter o | ||
99 | "\xe3\x82\xab", // katakana letter ka | ||
100 | "\xe3\x82\xad", // katakana letter ki | ||
101 | "\xe3\x82\xaf", // katakana letter ku | ||
102 | "\xe3\x82\xb1", // katakana letter ke | ||
103 | "\xe3\x82\xb3", // katakana letter ko | ||
104 | "\xe3\x82\xb5", // katakana letter sa | ||
105 | "\xe3\x82\xb7", // katakana letter si | ||
106 | "\xe3\x82\xb9", // katakana letter su | ||
107 | "\xe3\x82\xbb", // katakana letter se | ||
108 | "\xe3\x82\xbd", // katakana letter so | ||
109 | "\xe3\x82\xbf", // katakana letter ta | ||
110 | "\xe3\x83\x81", // katakana letter ti | ||
111 | "\xe3\x83\x84", // katakana letter tu | ||
112 | "\xe3\x83\x86", // katakana letter te | ||
113 | "\xe3\x83\x88", // katakana letter to | ||
114 | "\xe3\x83\x8a", // katakana letter na | ||
115 | "\xe3\x83\x8b", // katakana letter ni | ||
116 | "\xe3\x83\x8c", // katakana letter nu | ||
117 | "\xe3\x83\x8d", // katakana letter ne | ||
118 | "\xe3\x83\x8e", // katakana letter no | ||
119 | "\xe3\x83\x8f", // katakana letter ha | ||
120 | "\xe3\x83\x92", // katakana letter hi | ||
121 | "\xe3\x83\x95", // katakana letter hu | ||
122 | "\xe3\x83\x98", // katakana letter he | ||
123 | "\xe3\x83\x9b", // katakana letter ho | ||
124 | "\xe3\x83\x9e", // katakana letter ma | ||
125 | "\xe3\x83\x9f", // katakana letter mi | ||
126 | "\xe3\x83\xa0", // katakana letter mu | ||
127 | "\xe3\x83\xa1", // katakana letter me | ||
128 | "\xe3\x83\xa2", // katakana letter mo | ||
129 | "\xe3\x83\xa4", // katakana letter ya | ||
130 | "\xe3\x83\xa6", // katakana letter yu | ||
131 | "\xe3\x83\xa8", // katakana letter yo | ||
132 | "\xe3\x83\xa9", // katakana letter ra | ||
133 | "\xe3\x83\xaa", // katakana letter ri | ||
134 | "\xe3\x83\xab", // katakana letter ru | ||
135 | "\xe3\x83\xac", // katakana letter re | ||
136 | "\xe3\x83\xad", // katakana letter ro | ||
137 | "\xe3\x83\xaf", // katakana letter wa | ||
138 | "\xe3\x83\xb2", // katakana letter wo | ||
139 | "\xe3\x83\xb3", // katakana letter n | ||
140 | "\xe3\x82\xa1", // katakana letter small a | ||
141 | "\xe3\x82\xa3", // katakana letter small i | ||
142 | "\xe3\x82\xa5", // katakana letter small u | ||
143 | "\xe3\x82\xa7", // katakana letter small e | ||
144 | "\xe3\x82\xa9", // katakana letter small o | ||
145 | "\xe3\x83\xa3", // katakana letter small ya | ||
146 | "\xe3\x83\xa5", // katakana letter small yu | ||
147 | "\xe3\x83\xa7", // katakana letter small yo | ||
148 | "\xe3\x82\xac", // katakana letter ga | ||
149 | "\xe3\x82\xae", // katakana letter gi | ||
150 | "\xe3\x82\xb0", // katakana letter gu | ||
151 | "\xe3\x82\xb2", // katakana letter ge | ||
152 | "\xe3\x82\xb4", // katakana letter go | ||
153 | "\xe3\x82\xb6", // katakana letter za | ||
154 | "\xe3\x82\xb8", // katakana letter zi | ||
155 | "\xe3\x82\xba", // katakana letter zu | ||
156 | "\xe3\x82\xbc", // katakana letter ze | ||
157 | "\xe3\x82\xbe", // katakana letter zo | ||
158 | "\xe3\x83\x80", // katakana letter da | ||
159 | "\xe3\x83\x82", // katakana letter di | ||
160 | "\xe3\x83\x85", // katakana letter du | ||
161 | "\xe3\x83\x87", // katakana letter de | ||
162 | "\xe3\x83\x89", // katakana letter do | ||
163 | "\xe3\x83\x90", // katakana letter ba | ||
164 | "\xe3\x83\x93", // katakana letter bi | ||
165 | "\xe3\x83\x96", // katakana letter bu | ||
166 | "\xe3\x83\x99", // katakana letter be | ||
167 | "\xe3\x83\x9c", // katakana letter bo | ||
168 | "\xe3\x83\x91", // katakana letter pa | ||
169 | "\xe3\x83\x94", // katakana letter pi | ||
170 | "\xe3\x83\x97", // katakana letter pu | ||
171 | "\xe3\x83\x9a", // katakana letter pe | ||
172 | "\xe3\x83\x9d", // katakana letter po | ||
173 | "\xe3\x83\x83", // katakana letter small tu | ||
174 | "0", // arabic numeral zero | ||
175 | "1", // arabic numeral one | ||
176 | "2", // arabic numeral two | ||
177 | "3", // arabic numeral three | ||
178 | "4", // arabic numeral four | ||
179 | "5", // arabic numeral five | ||
180 | "6", // arabic numeral six | ||
181 | "7", // arabic numeral seven | ||
182 | "8", // arabic numeral eight | ||
183 | "9", // arabic numeral nine | ||
184 | 0, // exclamation mark [varies] | ||
185 | 0, // question mark [varies] | ||
186 | 0, // period [varies] | ||
187 | 0, // hyphen [varies] | ||
188 | "\xe3\x83\xbb", // interpunct | ||
189 | "\xe2\x80\xa6", // ellipsis | ||
190 | 0, // left double quotation mark [varies] | ||
191 | 0, // right double quotation mark [varies] | ||
192 | 0, // left single quotation mark [varies] | ||
193 | 0, // right single quotation mark [varies] | ||
194 | "\xe2\x99\x82", // mars astrological sign | ||
195 | "\xe2\x99\x80", // venus astrological sign | ||
196 | 0, // unused | ||
197 | ",", // comma | ||
198 | 0, // unused | ||
199 | "/", // forward slash | ||
200 | "A", // uppercase latin letter a | ||
201 | "B", // uppercase latin letter b | ||
202 | "C", // uppercase latin letter c | ||
203 | "D", // uppercase latin letter d | ||
204 | "E", // uppercase latin letter e | ||
205 | "F", // uppercase latin letter f | ||
206 | "G", // uppercase latin letter g | ||
207 | "H", // uppercase latin letter h | ||
208 | "I", // uppercase latin letter i | ||
209 | "J", // uppercase latin letter j | ||
210 | "K", // uppercase latin letter k | ||
211 | "L", // uppercase latin letter l | ||
212 | "M", // uppercase latin letter m | ||
213 | "N", // uppercase latin letter n | ||
214 | "O", // uppercase latin letter o | ||
215 | "P", // uppercase latin letter p | ||
216 | "Q", // uppercase latin letter q | ||
217 | "R", // uppercase latin letter r | ||
218 | "S", // uppercase latin letter s | ||
219 | "T", // uppercase latin letter t | ||
220 | "U", // uppercase latin letter u | ||
221 | "V", // uppercase latin letter v | ||
222 | "W", // uppercase latin letter w | ||
223 | "X", // uppercase latin letter x | ||
224 | "Y", // uppercase latin letter y | ||
225 | "Z", // uppercase latin letter z | ||
226 | "a", // lowercase latin letter a | ||
227 | "b", // lowercase latin letter b | ||
228 | "c", // lowercase latin letter c | ||
229 | "d", // lowercase latin letter d | ||
230 | "e", // lowercase latin letter e | ||
231 | "f", // lowercase latin letter f | ||
232 | "g", // lowercase latin letter g | ||
233 | "h", // lowercase latin letter h | ||
234 | "i", // lowercase latin letter i | ||
235 | "j", // lowercase latin letter j | ||
236 | "k", // lowercase latin letter k | ||
237 | "l", // lowercase latin letter l | ||
238 | "m", // lowercase latin letter m | ||
239 | "n", // lowercase latin letter n | ||
240 | "o", // lowercase latin letter o | ||
241 | "p", // lowercase latin letter p | ||
242 | "q", // lowercase latin letter q | ||
243 | "r", // lowercase latin letter r | ||
244 | "s", // lowercase latin letter s | ||
245 | "t", // lowercase latin letter t | ||
246 | "u", // lowercase latin letter u | ||
247 | "v", // lowercase latin letter v | ||
248 | "w", // lowercase latin letter w | ||
249 | "x", // lowercase latin letter x | ||
250 | "y", // lowercase latin letter y | ||
251 | "z", // lowercase latin letter z | ||
252 | 0, // unused | ||
253 | 0, // unused | ||
254 | "\xc3\x84", // uppercase latin letter a with diaeresis | ||
255 | "\xc3\x96", // uppercase latin letter o with diaeresis | ||
256 | "\xc3\x9c", // uppercase latin letter u with diaeresis | ||
257 | "\xc3\xa4", // lowercase latin letter a with diaeresis | ||
258 | "\xc3\xb6", // lowercase latin letter o with diaeresis | ||
259 | "\xc3\xbc" // lowercase latin letter u with diaeresis | ||
18 | }; | 260 | }; |
19 | 261 | ||
20 | char debugGen3Decode(u8 val) | 262 | /** |
263 | * Converts a string encoded with the propietary gen 3 character encoding into | ||
264 | * a UTF-8 encoded string. The function will read the input buffer until either | ||
265 | * the max length has been reached, or an 0xFF has been found. The output buffer | ||
266 | * must be at least one plus three times the max length in size. | ||
267 | */ | ||
268 | void decodePokemonCharset( | ||
269 | const u8* input, | ||
270 | int maxLength, | ||
271 | char* output, | ||
272 | enum PokemonLanguage language) | ||
21 | { | 273 | { |
22 | return charmap[val - 0xA0]; | 274 | for (int i=0; i<maxLength; i++) |
275 | { | ||
276 | char cur = *input; | ||
277 | input++; | ||
278 | |||
279 | if (cur == 0xff) | ||
280 | { | ||
281 | break; | ||
282 | } | ||
283 | |||
284 | const char* next = 0; | ||
285 | |||
286 | if (cur == 0xab) | ||
287 | { | ||
288 | // fullwidth exclamation mark | ||
289 | if (language == Japanese) | ||
290 | { | ||
291 | next = "\xef\xbc\x81"; | ||
292 | } | ||
293 | // exclamation mark | ||
294 | else { | ||
295 | next = "!"; | ||
296 | } | ||
297 | } else if (cur == 0xac) | ||
298 | { | ||
299 | // fullwidth question mark | ||
300 | if (language == Japanese) | ||
301 | { | ||
302 | next = "\xef\xbc\x9f"; | ||
303 | } | ||
304 | // question mark | ||
305 | else { | ||
306 | next = "?"; | ||
307 | } | ||
308 | } else if (cur == 0xad) | ||
309 | { | ||
310 | // ideographic full stop | ||
311 | if (language == Japanese) | ||
312 | { | ||
313 | next = "\xe3\x80\x82"; | ||
314 | } | ||
315 | // period | ||
316 | else { | ||
317 | next = "."; | ||
318 | } | ||
319 | } else if (cur == 0xae) | ||
320 | { | ||
321 | // katakana-hiragana prolonged sound mark | ||
322 | if (language == Japanese) | ||
323 | { | ||
324 | next = "\xe3\x83\xbc"; | ||
325 | } | ||
326 | // hyphen | ||
327 | else { | ||
328 | next = "-"; | ||
329 | } | ||
330 | } else if (cur == 0xb1) | ||
331 | { | ||
332 | // left white corner bracket | ||
333 | if (language == Japanese) | ||
334 | { | ||
335 | next = "\xe3\x80\x83"; | ||
336 | } | ||
337 | // double low-9 quotation mark | ||
338 | else if (language == German) | ||
339 | { | ||
340 | next = "\xe2\x80\x9e"; | ||
341 | } | ||
342 | // left double angle quotation mark | ||
343 | else if (language == French) | ||
344 | { | ||
345 | next = "\xc2\xab"; | ||
346 | } | ||
347 | // left double quotation mark | ||
348 | else { | ||
349 | next = "\xe2\x80\x9c"; | ||
350 | } | ||
351 | } else if (cur == 0xb2) | ||
352 | { | ||
353 | // right white corner bracket | ||
354 | if (language == Japanese) | ||
355 | { | ||
356 | next = "\xe3\x80\x8f"; | ||
357 | } | ||
358 | // left double quotation mark | ||
359 | else if (language == German) | ||
360 | { | ||
361 | next = "\xe2\x80\x9c"; | ||
362 | } | ||
363 | // right double angle quotation mark | ||
364 | else if (language == French) | ||
365 | { | ||
366 | next = "\xc2\xbb"; | ||
367 | } | ||
368 | // right double quotation mark | ||
369 | else { | ||
370 | next = "\xe2\x80\x9d"; | ||
371 | } | ||
372 | } else if (cur == 0xb3) | ||
373 | { | ||
374 | // left corner bracket | ||
375 | if (language == Japanese) | ||
376 | { | ||
377 | next = "\xe3\x80\x8c"; | ||
378 | } | ||
379 | // left single quotation mark | ||
380 | else { | ||
381 | next = "\xe2\x80\x98"; | ||
382 | } | ||
383 | } else if (cur == 0xb4) | ||
384 | { | ||
385 | // right corner bracket | ||
386 | if (language == Japanese) | ||
387 | { | ||
388 | next = "\xe3\x80\x8d"; | ||
389 | } | ||
390 | // right single quotation mark | ||
391 | else { | ||
392 | next = "\xe2\x80\x99"; | ||
393 | } | ||
394 | } else { | ||
395 | next = charmap[(int)cur]; | ||
396 | } | ||
397 | |||
398 | for (; *next != 0; next++) | ||
399 | { | ||
400 | *output++ = *next; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | *output = 0; | ||
23 | } | 405 | } |