Subversion Repositories SvarDOS

Rev

Rev 1291 | Rev 1295 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
597 mateuszvis 1
/*
1247 mateusz.vi 2
 * Copyright (C) 2021-2023 Mateusz Viste
597 mateuszvis 3
 *
4
 * usage: tlumacz en fr pl etc
5
 *
601 mateuszvis 6
 * computes an out.lng file that contains all language ressources.
597 mateuszvis 7
 *
8
 */
9
 
10
 
11
#include <stdio.h>
12
#include <stdlib.h>
13
#include <string.h>
1290 bernd.boec 14
#include <ctype.h>
597 mateuszvis 15
 
1248 mateusz.vi 16
#include "svarlang.h"
597 mateuszvis 17
 
1290 bernd.boec 18
#define STRINGS_CAP 65000   /* string storage size in characters */
1293 mateusz.vi 19
#define DICT_CAP    10000   /* dictionary size in elements */
597 mateuszvis 20
 
21
/* read a single line from fd and fills it into dst, returns line length
22
 * ending CR/LF is trimmed, as well as any trailing spaces */
23
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
24
  unsigned short l, lastnonspace = 0;
25
 
1290 bernd.boec 26
  if (fgets(dst, (int)dstsz, fd) == NULL) return(0xffff); /* EOF */
597 mateuszvis 27
  /* trim at first CR or LF and return len */
28
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
29
    if (dst[l] != ' ') lastnonspace = l;
30
  }
31
 
32
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
33
  dst[l] = 0;
34
 
35
  return(l);
36
}
37
 
38
 
1114 mateusz.vi 39
/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to
597 mateuszvis 40
 * the actual string part on success, or NULL on error */
1114 mateusz.vi 41
static const char *parseline(unsigned short *id, const char *s) {
597 mateuszvis 42
  int i;
43
  int dotpos = 0, colpos = 0, gotdigits = 0;
44
 
1114 mateusz.vi 45
  /* strings prefixed by '?' are flagged as "dirty": ignore this flag here */
46
  if (*s == '?') s++;
47
 
597 mateuszvis 48
  /* I must have a . and a : in the first 9 bytes */
49
  for (i = 0;; i++) {
50
    if (s[i] == '.') {
51
      if ((dotpos != 0) || (gotdigits == 0)) break;
52
      dotpos = i;
53
      gotdigits = 0;
54
    } else if (s[i] == ':') {
55
      if (gotdigits != 0) colpos = i;
56
      break;
57
    } else if ((s[i] < '0') || (s[i] > '9')) {
58
      break;
59
    }
60
    gotdigits++;
61
  }
62
  /* did I collect everything? */
63
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
64
 
65
  *id = atoi(s);
66
  *id <<= 8;
67
  *id |= atoi(s + dotpos + 1);
68
 
69
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
70
 
71
  return(s + colpos + 1);
72
}
73
 
74
 
639 mateusz.vi 75
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
76
 * the new length of the string. */
77
static unsigned short unesc_string(char *linebuff) {
78
  unsigned short i;
79
  for (i = 0; linebuff[i] != 0; i++) {
80
    if (linebuff[i] != '\\') continue;
1290 bernd.boec 81
    memmove(linebuff + i, linebuff + i + 1, strlen(linebuff + i));
639 mateusz.vi 82
    if (linebuff[i] == 0) break;
83
    switch (linebuff[i]) {
1248 mateusz.vi 84
      case 'e':
85
        linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */
86
        break;
639 mateusz.vi 87
      case 'n':
88
        linebuff[i] = '\n';
89
        break;
90
      case 'r':
91
        linebuff[i] = '\r';
92
        break;
93
      case 't':
94
        linebuff[i] = '\t';
95
        break;
96
    }
97
  }
98
  return(i);
99
}
100
 
1290 bernd.boec 101
#pragma pack(1)
102
typedef struct dict_entry {
103
    unsigned short id;
104
    unsigned short offset;
105
} dict_entry_t;
106
#pragma pack()
639 mateusz.vi 107
 
1290 bernd.boec 108
typedef struct svl_lang {
109
  char id[2];
110
  unsigned short num_strings;
111
 
112
  dict_entry_t *dict;
113
  size_t dict_cap;
114
 
115
  char *strings;
116
  char *strings_end;
117
  size_t strings_cap;
118
 
119
} svl_lang_t;
120
 
121
 
122
static svl_lang_t * svl_lang_new(char langid[2], size_t dict_cap, size_t strings_cap)
123
{
124
  svl_lang_t *l;
125
 
126
  l = malloc(sizeof(svl_lang_t));
127
  if (!l) return NULL;
128
 
129
  l->id[0] = (char)toupper(langid[0]);
130
  l->id[1] = (char)toupper(langid[1]);
131
 
132
  l->dict = malloc(dict_cap * sizeof(dict_entry_t));
133
  if (!l->dict) {
134
    return NULL;
135
  }
136
  l->dict_cap = dict_cap;
137
 
138
  l->num_strings = 0;
139
  l->strings = l->strings_end = malloc(strings_cap);
140
  if (!l->strings) {
141
    free(l->dict);
142
    return NULL;
143
  }
144
  l->strings_cap = strings_cap;
145
  return l;
146
}
147
 
148
 
149
/* compacts the dict and string buffer */
150
static void svl_compact_lang(svl_lang_t *l)
151
{
152
  size_t bytes;
153
  bytes = l->strings_end - l->strings;
154
  if (bytes < l->strings_cap) {
155
    l->strings = l->strings_end = realloc(l->strings, bytes);
156
    l->strings_end += bytes;
157
    l->strings_cap = bytes;
158
  }
159
  l->dict_cap = l->num_strings;
160
  l->dict = realloc(l->dict, l->dict_cap * sizeof(dict_entry_t));
161
}
162
 
163
 
164
static void svl_lang_free(svl_lang_t *l)
165
{
166
  l->num_strings = 0;
167
  if (l->dict) {
168
    free(l->dict);
169
    l->dict = NULL;
170
  }
171
  if (l->strings) {
172
    free(l->strings);
173
    l->strings = l->strings_end = NULL;
174
  }
175
  l->dict_cap = 0;
176
  l->strings_cap = 0;
177
}
178
 
179
 
180
static size_t svl_strings_bytes(svl_lang_t *l)
181
{
182
  return l->strings_end - l->strings;
183
}
184
 
185
 
186
static size_t svl_dict_bytes(svl_lang_t *l)
187
{
188
  return l->num_strings * sizeof(dict_entry_t);
189
}
190
 
191
 
192
static int svl_add_str(svl_lang_t *l, unsigned short id, const char *s)
193
{
194
  size_t len = strlen(s) + 1;
195
  size_t cursor;
196
 
197
  if (l->strings_cap < svl_strings_bytes(l) + len ||
198
      l->dict_cap < (l->num_strings + 1) * sizeof(dict_entry_t)) {
199
    return 0;
200
  }
1293 mateusz.vi 201
 
1290 bernd.boec 202
  /* find dictionary insert position, search backwards in assumption
203
     that in translation files, strings are generally ordered ascending */
204
  for (cursor = l->num_strings; cursor > 0 && l->dict[cursor-1].id > id; cursor--);
205
 
206
  memmove(&(l->dict[cursor+1]), &(l->dict[cursor]), sizeof(dict_entry_t)*(l->num_strings - cursor));
207
  l->dict[cursor].id = id;
208
  l->dict[cursor].offset = l->strings_end - l->strings;
209
 
210
  memcpy(l->strings_end, s, len);
211
  l->strings_end += len;
212
  l->num_strings++;
213
 
214
  return 1;
215
}
216
 
217
 
218
static int svl_find(svl_lang_t *l, unsigned short id)
219
{
220
   size_t left = 0, right = l->num_strings - 1, x;
221
   unsigned short v;
222
 
223
   if (l->num_strings == 0) return 0;
224
 
225
   while (left <= right ) {
226
      x = left + ( (right - left ) >> 2 );
227
      v = l->dict[x].id;
228
      if ( id == v ) return 1;
229
      else if ( id > v ) left = x + 1;
230
      else right = x - 1;
231
   }
232
   return 0;
233
}
234
 
1061 mateusz.vi 235
/* opens a CATS-style file and compiles it into a ressources lang block
236
 * returns 0 on error, or the size of the generated data block otherwise */
1290 bernd.boec 237
static unsigned short svl_lang_from_cats_file(svl_lang_t *l, svl_lang_t *refl) {
238
  unsigned short linelen;
597 mateuszvis 239
  FILE *fd;
1290 bernd.boec 240
  char fname[] = "xx.txt";
623 mateuszvis 241
  static char linebuf[8192];
1114 mateusz.vi 242
  const char *ptr;
1290 bernd.boec 243
  unsigned short id, maxid=0, maxid_line, linecount;
244
  int i;
597 mateuszvis 245
 
1290 bernd.boec 246
  fname[strlen(fname) - 6] = (char)tolower( l->id[0] );
247
  fname[strlen(fname) - 5] = (char)tolower( l->id[1] );
597 mateuszvis 248
 
249
  fd = fopen(fname, "rb");
250
  if (fd == NULL) {
251
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
252
    return(0);
253
  }
254
 
255
  for (linecount = 1;; linecount++) {
256
    linelen = readl(linebuf, sizeof(linebuf), fd);
257
    if (linelen == 0xffff) break; /* EOF */
258
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
259
 
639 mateusz.vi 260
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
261
    linelen = unesc_string(linebuf);
262
 
597 mateuszvis 263
    /* read id and get ptr to actual string ("1.15:string") */
264
    ptr = parseline(&id, linebuf);
1272 mateusz.vi 265
 
266
    /* handle malformed lines */
597 mateuszvis 267
    if (ptr == NULL) {
1272 mateusz.vi 268
      printf("WARNING: %s[#%u] is malformed (linelen = %u):\r\n", fname, linecount, linelen);
623 mateuszvis 269
      puts(linebuf);
1272 mateusz.vi 270
      continue;
597 mateuszvis 271
    }
1272 mateusz.vi 272
 
273
    /* ignore empty strings (but emit a warning) */
274
    if (ptr[0] == 0) {
1271 bernd.boec 275
      printf("WARNING: %s[#%u] ignoring empty string %u.%u\r\n", fname, linecount, id >> 8, id & 0xff);
276
      continue;
277
    }
597 mateuszvis 278
 
1114 mateusz.vi 279
    /* warn about dirty lines */
280
    if (linebuf[0] == '?') {
281
      printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\r\n", fname, linecount, id >> 8, id & 0xff);
282
    }
283
 
1290 bernd.boec 284
    /* add the string contained in current line, if conditions are met */
285
    if (!svl_find(l, id)) {
286
      if (refl == NULL || svl_find(refl, id)) {
287
        if (!svl_add_str(l, id, ptr)) {
288
          printf("ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
289
          fclose(fd);
290
          return 0;
291
        }
292
        if (id >= maxid) {
293
          maxid = id;
294
          maxid_line = linecount;
295
        }
296
        else {
1293 mateusz.vi 297
          printf("WARNING:%s[#%u] file unsorted - line %u has higher id %u.%u\r\n", fname, linecount, maxid_line, maxid >> 8, maxid & 0xff);
1290 bernd.boec 298
        }
299
      }
300
      else {
301
        printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
302
      }
623 mateuszvis 303
    }
1290 bernd.boec 304
    else {
1293 mateusz.vi 305
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
597 mateuszvis 306
    }
307
  }
308
 
309
  fclose(fd);
310
 
1290 bernd.boec 311
  /* if reflang provided, pull missing strings from it */
312
  if (refl != NULL) {
313
    for (i = 0; i < refl->num_strings; i++) {
314
      id = refl->dict[i].id;
315
      if (!svl_find(l, id)) {
597 mateuszvis 316
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
1291 bernd.boec 317
        if (!svl_add_str(l, id, refl->strings + refl->dict[i].offset)) {
1290 bernd.boec 318
          printf("ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
319
          return 0;
320
        }
597 mateuszvis 321
      }
322
    }
323
  }
324
 
1290 bernd.boec 325
  return(svl_strings_bytes(l));
326
}
597 mateuszvis 327
 
1290 bernd.boec 328
 
329
static int svl_write_header(unsigned short num_strings, FILE *fd)
330
{
1293 mateusz.vi 331
  return (fwrite("SvL\x1a", 1, 4, fd) == 4) &&
1290 bernd.boec 332
          (fwrite(&num_strings, 1, 2, fd) == 2);
597 mateuszvis 333
}
334
 
335
 
1290 bernd.boec 336
static int svl_write_lang(svl_lang_t *l, FILE *fd)
337
{
338
  unsigned short strings_bytes = svl_strings_bytes(l);
597 mateuszvis 339
 
1290 bernd.boec 340
  return (fwrite(&l->id, 1, 2, fd) == 2) &&
341
         (fwrite(&strings_bytes, 1, 2, fd) == 2) &&
342
         (fwrite(l->dict, 1, svl_dict_bytes(l), fd) == svl_dict_bytes(l)) &&
343
         (fwrite(l->strings, 1, svl_strings_bytes(l), fd) == svl_strings_bytes(l));
344
}
345
 
346
 
347
static int svl_write_c_source(svl_lang_t *l, const char *fn, unsigned short biggest_langsz)
348
{
349
  FILE *fd;
350
  int i;
351
  unsigned short strings_bytes = svl_strings_bytes(l);
352
  unsigned short nextnlat = 0;
353
 
354
  fd = fopen(fn, "wb");
355
  if (fd == NULL) {
356
    puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
357
    return 0;
358
  } else {
359
    unsigned short allocsz = biggest_langsz + (biggest_langsz / 20);
360
    printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz,
361
           allocsz);
362
    fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
363
    fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
364
    fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
365
    fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
366
    for (i = 0; i < strings_bytes; i++) {
367
      if (!fprintf(fd, "0x%02x", l->strings[i])) {
368
        fclose(fd);
369
        return 0;
370
      }
371
 
372
      if (i + 1 < strings_bytes) fprintf(fd, ",");
373
      nextnlat++;
374
      if (l->strings[i] == '\0' || nextnlat == 16) {
375
        fprintf(fd, "\r\n");
376
        nextnlat = 0;
377
      }
378
    }
379
    fprintf(fd, "};\r\n\r\n");
380
 
381
    fprintf(fd, "unsigned short svarlang_dict[%u] = {\r\n", l->num_strings * 2);
382
    for (i = 0; i < l->num_strings; i++) {
383
      if (!fprintf(fd, "0x%04x,0x%04x", l->dict[i].id, l->dict[i].offset)) {
384
        fclose(fd);
385
        return 0;
386
      }
387
      if (i + 1 < l->num_strings) fprintf(fd, ",");
388
      fprintf(fd, "\r\n");
389
    }
390
    fprintf(fd, "};\r\n");
391
 
392
    fclose(fd);
393
  }
394
 
395
  return 1;
396
}
397
 
398
 
597 mateuszvis 399
int main(int argc, char **argv) {
400
  FILE *fd;
401
  int ecode = 0;
1290 bernd.boec 402
  svl_lang_t *lang, *reflang = NULL;
403
 
404
  int i;
1061 mateusz.vi 405
  unsigned short biggest_langsz = 0;
597 mateuszvis 406
 
407
  if (argc < 2) {
1247 mateusz.vi 408
    puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");
409
    puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");
410
    puts("into a single resource file (OUT.LNG).");
411
    puts("");
412
    puts("usage: tlumacz en fr pl ...");
597 mateuszvis 413
    return(1);
414
  }
415
 
601 mateuszvis 416
  fd = fopen("out.lng", "wb");
597 mateuszvis 417
  if (fd == NULL) {
1250 mateusz.vi 418
    puts("ERR: failed to open or create OUT.LNG");
597 mateuszvis 419
    return(1);
420
  }
421
 
422
  /* write lang blocks */
423
  for (i = 1; i < argc; i++) {
424
    unsigned short sz;
425
    char id[3];
426
 
427
    if (strlen(argv[i]) != 2) {
428
      printf("INVALID LANG SPECIFIED: %s\r\n", argv[i]);
429
      ecode = 1;
430
      break;
431
    }
432
    id[0] = argv[i][0];
433
    id[1] = argv[i][1];
434
    id[2] = 0;
435
 
1290 bernd.boec 436
    if ((lang = svl_lang_new(id, DICT_CAP, STRINGS_CAP)) == NULL) {
437
      printf("OUT OF MEMORY\r\n");
438
      return(1);
439
    }
440
 
441
    sz = svl_lang_from_cats_file(lang, reflang);
597 mateuszvis 442
    if (sz == 0) {
443
      printf("ERROR COMPUTING LANG '%s'\r\n", id);
444
      ecode = 1;
445
      break;
446
    } else {
447
      printf("computed %s lang block of %u bytes\r\n", id, sz);
1061 mateusz.vi 448
      if (sz > biggest_langsz) biggest_langsz = sz;
597 mateuszvis 449
    }
1290 bernd.boec 450
    svl_compact_lang(lang);
451
 
452
    /* write header if first (reference) language */
453
    if (i == 1) {
454
      if (!svl_write_header(lang->num_strings, fd)) {
455
        printf("ERROR WRITING TO OUTPUT FILE\r\n");
456
        ecode = 1;
457
        break;
458
      }
459
    }
1293 mateusz.vi 460
 
1290 bernd.boec 461
    /* write lang ID to file, followed string table size, and then
462
       the dictionary and string table for current language */
463
    if (!svl_write_lang(lang, fd)) {
597 mateuszvis 464
      printf("ERROR WRITING TO OUTPUT FILE\r\n");
465
      ecode = 1;
466
      break;
467
    }
1290 bernd.boec 468
 
1061 mateusz.vi 469
    /* remember reference data for other languages */
597 mateuszvis 470
    if (i == 1) {
1290 bernd.boec 471
      reflang = lang;
597 mateuszvis 472
    }
1290 bernd.boec 473
    else {
474
      svl_lang_free(lang);
475
      lang = NULL;
476
    }
597 mateuszvis 477
  }
478
 
1061 mateusz.vi 479
  /* compute the deflang.c file containing a dump of the reference block */
1290 bernd.boec 480
  if (!svl_write_c_source(reflang, "deflang.c", biggest_langsz)) {
1061 mateusz.vi 481
    puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
482
    ecode = 1;
483
  }
484
 
1290 bernd.boec 485
  /* clean up */
486
  if (reflang) {
487
    svl_lang_free(reflang);
488
    reflang = NULL;
489
  }
490
 
597 mateuszvis 491
  return(ecode);
492
}