Subversion Repositories SvarDOS

Rev

Rev 1295 | Rev 2014 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
597 mateuszvis 1
/*
1247 mateusz.vi 2
 * Copyright (C) 2021-2023 Mateusz Viste
597 mateuszvis 3
 *
1295 mateusz.vi 4
 * Dictionary-based lookups contributed by Bernd Boeckmann, 2023
5
 *
597 mateuszvis 6
 * usage: tlumacz en fr pl etc
7
 *
1295 mateusz.vi 8
 * computes an out.lng file that contains all language resources.
597 mateuszvis 9
 *
10
 */
11
 
12
 
13
#include <stdio.h>
14
#include <stdlib.h>
15
#include <string.h>
1290 bernd.boec 16
#include <ctype.h>
597 mateuszvis 17
 
1248 mateusz.vi 18
#include "svarlang.h"
597 mateuszvis 19
 
1290 bernd.boec 20
#define STRINGS_CAP 65000   /* string storage size in characters */
1293 mateusz.vi 21
#define DICT_CAP    10000   /* dictionary size in elements */
597 mateuszvis 22
 
23
/* read a single line from fd and fills it into dst, returns line length
24
 * ending CR/LF is trimmed, as well as any trailing spaces */
25
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
26
  unsigned short l, lastnonspace = 0;
27
 
1290 bernd.boec 28
  if (fgets(dst, (int)dstsz, fd) == NULL) return(0xffff); /* EOF */
597 mateuszvis 29
  /* trim at first CR or LF and return len */
30
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
31
    if (dst[l] != ' ') lastnonspace = l;
32
  }
33
 
34
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
35
  dst[l] = 0;
36
 
37
  return(l);
38
}
39
 
40
 
1114 mateusz.vi 41
/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to
597 mateuszvis 42
 * the actual string part on success, or NULL on error */
1114 mateusz.vi 43
static const char *parseline(unsigned short *id, const char *s) {
597 mateuszvis 44
  int i;
45
  int dotpos = 0, colpos = 0, gotdigits = 0;
46
 
1114 mateusz.vi 47
  /* strings prefixed by '?' are flagged as "dirty": ignore this flag here */
48
  if (*s == '?') s++;
49
 
597 mateuszvis 50
  /* I must have a . and a : in the first 9 bytes */
51
  for (i = 0;; i++) {
52
    if (s[i] == '.') {
53
      if ((dotpos != 0) || (gotdigits == 0)) break;
54
      dotpos = i;
55
      gotdigits = 0;
56
    } else if (s[i] == ':') {
57
      if (gotdigits != 0) colpos = i;
58
      break;
59
    } else if ((s[i] < '0') || (s[i] > '9')) {
60
      break;
61
    }
62
    gotdigits++;
63
  }
64
  /* did I collect everything? */
65
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
66
 
67
  *id = atoi(s);
68
  *id <<= 8;
69
  *id |= atoi(s + dotpos + 1);
70
 
71
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
72
 
73
  return(s + colpos + 1);
74
}
75
 
76
 
639 mateusz.vi 77
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
78
 * the new length of the string. */
79
static unsigned short unesc_string(char *linebuff) {
80
  unsigned short i;
81
  for (i = 0; linebuff[i] != 0; i++) {
82
    if (linebuff[i] != '\\') continue;
1290 bernd.boec 83
    memmove(linebuff + i, linebuff + i + 1, strlen(linebuff + i));
639 mateusz.vi 84
    if (linebuff[i] == 0) break;
85
    switch (linebuff[i]) {
1248 mateusz.vi 86
      case 'e':
87
        linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */
88
        break;
639 mateusz.vi 89
      case 'n':
90
        linebuff[i] = '\n';
91
        break;
92
      case 'r':
93
        linebuff[i] = '\r';
94
        break;
95
      case 't':
96
        linebuff[i] = '\t';
97
        break;
98
    }
99
  }
100
  return(i);
101
}
102
 
1290 bernd.boec 103
#pragma pack(1)
1296 mateusz.vi 104
struct dict_entry {
1295 mateusz.vi 105
  unsigned short id;
106
  unsigned short offset;
1296 mateusz.vi 107
};
1290 bernd.boec 108
#pragma pack()
639 mateusz.vi 109
 
1296 mateusz.vi 110
struct svl_lang {
1290 bernd.boec 111
  char id[2];
112
  unsigned short num_strings;
113
 
1296 mateusz.vi 114
  struct dict_entry *dict;
1290 bernd.boec 115
  size_t dict_cap;
116
 
117
  char *strings;
118
  char *strings_end;
119
  size_t strings_cap;
120
 
1296 mateusz.vi 121
};
1290 bernd.boec 122
 
123
 
1296 mateusz.vi 124
static struct svl_lang *svl_lang_new(const char langid[2], size_t dict_cap, size_t strings_cap) {
125
  struct svl_lang *l;
1290 bernd.boec 126
 
1296 mateusz.vi 127
  l = malloc(sizeof(struct svl_lang));
1295 mateusz.vi 128
  if (!l) return(NULL);
1290 bernd.boec 129
 
130
  l->id[0] = (char)toupper(langid[0]);
131
  l->id[1] = (char)toupper(langid[1]);
132
 
1296 mateusz.vi 133
  l->dict = malloc(dict_cap * sizeof(struct dict_entry));
1295 mateusz.vi 134
  if (!l->dict) return(NULL);
135
 
1290 bernd.boec 136
  l->dict_cap = dict_cap;
137
 
138
  l->num_strings = 0;
139
  l->strings = l->strings_end = malloc(strings_cap);
140
  if (!l->strings) {
141
    free(l->dict);
1295 mateusz.vi 142
    return(NULL);
1290 bernd.boec 143
  }
144
  l->strings_cap = strings_cap;
1295 mateusz.vi 145
 
146
  return(l);
1290 bernd.boec 147
}
148
 
149
 
150
/* compacts the dict and string buffer */
1296 mateusz.vi 151
static void svl_compact_lang(struct svl_lang *l) {
1290 bernd.boec 152
  size_t bytes;
153
  bytes = l->strings_end - l->strings;
154
  if (bytes < l->strings_cap) {
155
    l->strings = l->strings_end = realloc(l->strings, bytes);
156
    l->strings_end += bytes;
157
    l->strings_cap = bytes;
158
  }
159
  l->dict_cap = l->num_strings;
1296 mateusz.vi 160
  l->dict = realloc(l->dict, l->dict_cap * sizeof(struct dict_entry));
1290 bernd.boec 161
}
162
 
163
 
1296 mateusz.vi 164
static void svl_lang_free(struct svl_lang *l) {
1290 bernd.boec 165
  l->num_strings = 0;
166
  if (l->dict) {
167
    free(l->dict);
168
    l->dict = NULL;
169
  }
170
  if (l->strings) {
171
    free(l->strings);
172
    l->strings = l->strings_end = NULL;
173
  }
174
  l->dict_cap = 0;
175
  l->strings_cap = 0;
176
}
177
 
178
 
1296 mateusz.vi 179
static size_t svl_strings_bytes(const struct svl_lang *l) {
1295 mateusz.vi 180
  return(l->strings_end - l->strings);
1290 bernd.boec 181
}
182
 
183
 
1296 mateusz.vi 184
static size_t svl_dict_bytes(const struct svl_lang *l) {
185
  return(l->num_strings * sizeof(struct dict_entry));
1290 bernd.boec 186
}
187
 
188
 
1296 mateusz.vi 189
static int svl_add_str(struct svl_lang *l, unsigned short id, const char *s) {
1290 bernd.boec 190
  size_t len = strlen(s) + 1;
191
  size_t cursor;
192
 
1296 mateusz.vi 193
  if ((l->strings_cap < svl_strings_bytes(l) + len) || (l->dict_cap < (l->num_strings + 1) * sizeof(struct dict_entry))) {
1295 mateusz.vi 194
    return(0);
1290 bernd.boec 195
  }
1293 mateusz.vi 196
 
1290 bernd.boec 197
  /* find dictionary insert position, search backwards in assumption
198
     that in translation files, strings are generally ordered ascending */
199
  for (cursor = l->num_strings; cursor > 0 && l->dict[cursor-1].id > id; cursor--);
200
 
1296 mateusz.vi 201
  memmove(&(l->dict[cursor+1]), &(l->dict[cursor]), sizeof(struct dict_entry) * (l->num_strings - cursor));
1290 bernd.boec 202
  l->dict[cursor].id = id;
203
  l->dict[cursor].offset = l->strings_end - l->strings;
204
 
205
  memcpy(l->strings_end, s, len);
206
  l->strings_end += len;
207
  l->num_strings++;
208
 
1295 mateusz.vi 209
  return(1);
1290 bernd.boec 210
}
211
 
212
 
1296 mateusz.vi 213
static int svl_find(const struct svl_lang *l, unsigned short id) {
1295 mateusz.vi 214
  size_t left = 0, right = l->num_strings - 1, x;
215
  unsigned short v;
1290 bernd.boec 216
 
1295 mateusz.vi 217
  if (l->num_strings == 0) return(0);
1290 bernd.boec 218
 
1295 mateusz.vi 219
  while (left <= right ) {
220
    x = left + ( (right - left ) >> 2 );
221
    v = l->dict[x].id;
1296 mateusz.vi 222
    if ( id == v ) return(1); /* found! */
223
 
224
    if (id > v) {
1295 mateusz.vi 225
      left = x + 1;
226
    } else {
227
      right = x - 1;
228
    }
229
  }
230
  return(0);
1290 bernd.boec 231
}
232
 
1295 mateusz.vi 233
 
1061 mateusz.vi 234
/* opens a CATS-style file and compiles it into a ressources lang block
235
 * returns 0 on error, or the size of the generated data block otherwise */
1296 mateusz.vi 236
static unsigned short svl_lang_from_cats_file(struct svl_lang *l, struct svl_lang *refl) {
1290 bernd.boec 237
  unsigned short linelen;
597 mateuszvis 238
  FILE *fd;
1290 bernd.boec 239
  char fname[] = "xx.txt";
623 mateuszvis 240
  static char linebuf[8192];
1114 mateusz.vi 241
  const char *ptr;
1290 bernd.boec 242
  unsigned short id, maxid=0, maxid_line, linecount;
243
  int i;
597 mateuszvis 244
 
1290 bernd.boec 245
  fname[strlen(fname) - 6] = (char)tolower( l->id[0] );
246
  fname[strlen(fname) - 5] = (char)tolower( l->id[1] );
597 mateuszvis 247
 
248
  fd = fopen(fname, "rb");
249
  if (fd == NULL) {
250
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
251
    return(0);
252
  }
253
 
254
  for (linecount = 1;; linecount++) {
255
    linelen = readl(linebuf, sizeof(linebuf), fd);
256
    if (linelen == 0xffff) break; /* EOF */
257
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
258
 
639 mateusz.vi 259
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
260
    linelen = unesc_string(linebuf);
261
 
597 mateuszvis 262
    /* read id and get ptr to actual string ("1.15:string") */
263
    ptr = parseline(&id, linebuf);
1272 mateusz.vi 264
 
265
    /* handle malformed lines */
597 mateuszvis 266
    if (ptr == NULL) {
1272 mateusz.vi 267
      printf("WARNING: %s[#%u] is malformed (linelen = %u):\r\n", fname, linecount, linelen);
623 mateuszvis 268
      puts(linebuf);
1272 mateusz.vi 269
      continue;
597 mateuszvis 270
    }
1272 mateusz.vi 271
 
272
    /* ignore empty strings (but emit a warning) */
273
    if (ptr[0] == 0) {
1271 bernd.boec 274
      printf("WARNING: %s[#%u] ignoring empty string %u.%u\r\n", fname, linecount, id >> 8, id & 0xff);
275
      continue;
276
    }
597 mateuszvis 277
 
1114 mateusz.vi 278
    /* warn about dirty lines */
279
    if (linebuf[0] == '?') {
280
      printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\r\n", fname, linecount, id >> 8, id & 0xff);
281
    }
282
 
1290 bernd.boec 283
    /* add the string contained in current line, if conditions are met */
284
    if (!svl_find(l, id)) {
1295 mateusz.vi 285
      if ((refl == NULL) || (svl_find(refl, id))) {
1290 bernd.boec 286
        if (!svl_add_str(l, id, ptr)) {
287
          printf("ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
288
          fclose(fd);
1295 mateusz.vi 289
          return(0);
1290 bernd.boec 290
        }
291
        if (id >= maxid) {
292
          maxid = id;
293
          maxid_line = linecount;
1295 mateusz.vi 294
        } else {
1293 mateusz.vi 295
          printf("WARNING:%s[#%u] file unsorted - line %u has higher id %u.%u\r\n", fname, linecount, maxid_line, maxid >> 8, maxid & 0xff);
1290 bernd.boec 296
        }
1295 mateusz.vi 297
      } else {
1290 bernd.boec 298
        printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
299
      }
1295 mateusz.vi 300
    } else {
1293 mateusz.vi 301
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
597 mateuszvis 302
    }
303
  }
304
 
305
  fclose(fd);
306
 
1290 bernd.boec 307
  /* if reflang provided, pull missing strings from it */
308
  if (refl != NULL) {
309
    for (i = 0; i < refl->num_strings; i++) {
310
      id = refl->dict[i].id;
311
      if (!svl_find(l, id)) {
597 mateuszvis 312
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
1291 bernd.boec 313
        if (!svl_add_str(l, id, refl->strings + refl->dict[i].offset)) {
1290 bernd.boec 314
          printf("ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1295 mateusz.vi 315
          return(0);
1290 bernd.boec 316
        }
597 mateuszvis 317
      }
318
    }
319
  }
320
 
1290 bernd.boec 321
  return(svl_strings_bytes(l));
322
}
597 mateuszvis 323
 
1290 bernd.boec 324
 
1295 mateusz.vi 325
static int svl_write_header(unsigned short num_strings, FILE *fd) {
326
  return((fwrite("SvL\x1a", 1, 4, fd) == 4) && (fwrite(&num_strings, 1, 2, fd) == 2));
597 mateuszvis 327
}
328
 
329
 
1296 mateusz.vi 330
static int svl_write_lang(const struct svl_lang *l, FILE *fd) {
1290 bernd.boec 331
  unsigned short strings_bytes = svl_strings_bytes(l);
597 mateuszvis 332
 
1295 mateusz.vi 333
  return((fwrite(&l->id, 1, 2, fd) == 2) &&
1290 bernd.boec 334
         (fwrite(&strings_bytes, 1, 2, fd) == 2) &&
335
         (fwrite(l->dict, 1, svl_dict_bytes(l), fd) == svl_dict_bytes(l)) &&
1295 mateusz.vi 336
         (fwrite(l->strings, 1, svl_strings_bytes(l), fd) == svl_strings_bytes(l)));
1290 bernd.boec 337
}
338
 
339
 
1296 mateusz.vi 340
static int svl_write_c_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz) {
1290 bernd.boec 341
  FILE *fd;
342
  int i;
343
  unsigned short strings_bytes = svl_strings_bytes(l);
344
  unsigned short nextnlat = 0;
1295 mateusz.vi 345
  unsigned short allocsz;
1290 bernd.boec 346
 
347
  fd = fopen(fn, "wb");
348
  if (fd == NULL) {
349
    puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
1295 mateusz.vi 350
    return(0);
351
  }
1290 bernd.boec 352
 
1295 mateusz.vi 353
  allocsz = biggest_langsz + (biggest_langsz / 20);
354
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
355
  fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
356
  fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
357
  fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
358
  fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
359
 
360
  for (i = 0; i < strings_bytes; i++) {
361
    if (!fprintf(fd, "0x%02x", l->strings[i])) {
362
      fclose(fd);
363
      return(0);
1290 bernd.boec 364
    }
365
 
1295 mateusz.vi 366
    if (i + 1 < strings_bytes) fprintf(fd, ",");
367
    nextnlat++;
368
    if (l->strings[i] == '\0' || nextnlat == 16) {
1290 bernd.boec 369
      fprintf(fd, "\r\n");
1295 mateusz.vi 370
      nextnlat = 0;
1290 bernd.boec 371
    }
1295 mateusz.vi 372
  }
373
  fprintf(fd, "};\r\n\r\n");
1290 bernd.boec 374
 
1295 mateusz.vi 375
  fprintf(fd, "unsigned short svarlang_dict[%u] = {\r\n", l->num_strings * 2);
376
  for (i = 0; i < l->num_strings; i++) {
377
    if (!fprintf(fd, "0x%04x,0x%04x", l->dict[i].id, l->dict[i].offset)) {
378
      fclose(fd);
379
      return(0);
380
    }
381
    if (i + 1 < l->num_strings) fprintf(fd, ",");
382
    fprintf(fd, "\r\n");
1290 bernd.boec 383
  }
1295 mateusz.vi 384
  fprintf(fd, "};\r\n");
1290 bernd.boec 385
 
1295 mateusz.vi 386
  fclose(fd);
387
 
388
  return(1);
1290 bernd.boec 389
}
390
 
391
 
597 mateuszvis 392
int main(int argc, char **argv) {
393
  FILE *fd;
394
  int ecode = 0;
1290 bernd.boec 395
  int i;
1061 mateusz.vi 396
  unsigned short biggest_langsz = 0;
1296 mateusz.vi 397
  struct svl_lang *lang, *reflang = NULL;
597 mateuszvis 398
 
399
  if (argc < 2) {
1247 mateusz.vi 400
    puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");
401
    puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");
402
    puts("into a single resource file (OUT.LNG).");
403
    puts("");
404
    puts("usage: tlumacz en fr pl ...");
597 mateuszvis 405
    return(1);
406
  }
407
 
601 mateuszvis 408
  fd = fopen("out.lng", "wb");
597 mateuszvis 409
  if (fd == NULL) {
1250 mateusz.vi 410
    puts("ERR: failed to open or create OUT.LNG");
597 mateuszvis 411
    return(1);
412
  }
413
 
414
  /* write lang blocks */
415
  for (i = 1; i < argc; i++) {
416
    unsigned short sz;
417
    char id[3];
418
 
419
    if (strlen(argv[i]) != 2) {
420
      printf("INVALID LANG SPECIFIED: %s\r\n", argv[i]);
421
      ecode = 1;
422
      break;
423
    }
424
    id[0] = argv[i][0];
425
    id[1] = argv[i][1];
426
    id[2] = 0;
427
 
1290 bernd.boec 428
    if ((lang = svl_lang_new(id, DICT_CAP, STRINGS_CAP)) == NULL) {
429
      printf("OUT OF MEMORY\r\n");
430
      return(1);
431
    }
432
 
433
    sz = svl_lang_from_cats_file(lang, reflang);
597 mateuszvis 434
    if (sz == 0) {
435
      printf("ERROR COMPUTING LANG '%s'\r\n", id);
436
      ecode = 1;
437
      break;
438
    } else {
439
      printf("computed %s lang block of %u bytes\r\n", id, sz);
1061 mateusz.vi 440
      if (sz > biggest_langsz) biggest_langsz = sz;
597 mateuszvis 441
    }
1290 bernd.boec 442
    svl_compact_lang(lang);
443
 
444
    /* write header if first (reference) language */
445
    if (i == 1) {
446
      if (!svl_write_header(lang->num_strings, fd)) {
447
        printf("ERROR WRITING TO OUTPUT FILE\r\n");
448
        ecode = 1;
449
        break;
450
      }
451
    }
1293 mateusz.vi 452
 
1290 bernd.boec 453
    /* write lang ID to file, followed string table size, and then
454
       the dictionary and string table for current language */
455
    if (!svl_write_lang(lang, fd)) {
597 mateuszvis 456
      printf("ERROR WRITING TO OUTPUT FILE\r\n");
457
      ecode = 1;
458
      break;
459
    }
1290 bernd.boec 460
 
1061 mateusz.vi 461
    /* remember reference data for other languages */
597 mateuszvis 462
    if (i == 1) {
1290 bernd.boec 463
      reflang = lang;
1295 mateusz.vi 464
    } else {
1290 bernd.boec 465
      svl_lang_free(lang);
466
      lang = NULL;
467
    }
597 mateuszvis 468
  }
469
 
1061 mateusz.vi 470
  /* compute the deflang.c file containing a dump of the reference block */
1290 bernd.boec 471
  if (!svl_write_c_source(reflang, "deflang.c", biggest_langsz)) {
1061 mateusz.vi 472
    puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
473
    ecode = 1;
474
  }
475
 
1290 bernd.boec 476
  /* clean up */
477
  if (reflang) {
478
    svl_lang_free(reflang);
479
    reflang = NULL;
480
  }
481
 
597 mateuszvis 482
  return(ecode);
483
}