Subversion Repositories SvarDOS

Rev

Rev 2102 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
597 mateuszvis 1
/*
2083 mateusz.vi 2
 * Copyright (C) 2021-2024 Mateusz Viste
597 mateuszvis 3
 *
1295 mateusz.vi 4
 * Dictionary-based lookups contributed by Bernd Boeckmann, 2023
5
 *
597 mateuszvis 6
 * usage: tlumacz en fr pl etc
7
 *
2083 mateusz.vi 8
 * computes:
9
 * OUT.LNG -> contains all language resources.
10
 * OUTC.LNG -> same as OUT.LNG but with compressed strings (slower to load).
597 mateuszvis 11
 *
2083 mateusz.vi 12
 * === COMPRESSION ===========================================================
13
 * The compression scheme is very simple. It is applied only to strings (ie.
2102 mateusz.vi 14
 * not the dictionnary) and it is basically a stream of 16-bit words (tokens).
15
 *
16
 * Token format is LLLL OOOO OOOO OOOO, where:
17
 * OOOO OOOO OOOO is the back reference offset (number of bytes-1 to rewind)
18
 * LLLL is the number of bytes (-1) that have to be copied from the offset.
19
 *
20
 * However, if LLLL is zero then the token's format is different:
21
 * 0000 RRRR BBBB BBBB
22
 *
23
 * The above form occurs when uncompressible data is encountered:
24
 * BBBB BBBB is the literal value of a byte to be copied
25
 * RRRR is the number of RAW (uncompressible) WORDS that follow (possibly 0)
26
 *
2083 mateusz.vi 27
 * where each WORD value contains the following bits "LLLL OOOO OOOO OOOO":
28
 *
29
 * OOOO OOOO OOOO = a backreference offset ("look that many bytes back")
30
 * LLLL = the number of bytes to copy from the backreference
31
 *
32
 * To recognize a compressed lang block one has to look at the id of the block
33
 * (16-bit language id). If its highest bit is set (0x8000) then the lang block
34
 * is compressed.
597 mateuszvis 35
 */
36
 
37
 
38
#include <stdio.h>
39
#include <stdlib.h>
40
#include <string.h>
1290 bernd.boec 41
#include <ctype.h>
597 mateuszvis 42
 
1248 mateusz.vi 43
#include "svarlang.h"
597 mateuszvis 44
 
1290 bernd.boec 45
#define STRINGS_CAP 65000   /* string storage size in characters */
1293 mateusz.vi 46
#define DICT_CAP    10000   /* dictionary size in elements */
597 mateuszvis 47
 
2014 bernd.boec 48
enum {                      /* DEFLANG output format */
49
  C_OUTPUT,
50
  ASM_OUTPUT,
51
  NASM_OUTPUT
52
};
53
 
54
 
597 mateuszvis 55
/* read a single line from fd and fills it into dst, returns line length
56
 * ending CR/LF is trimmed, as well as any trailing spaces */
57
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
58
  unsigned short l, lastnonspace = 0;
59
 
1290 bernd.boec 60
  if (fgets(dst, (int)dstsz, fd) == NULL) return(0xffff); /* EOF */
597 mateuszvis 61
  /* trim at first CR or LF and return len */
62
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
63
    if (dst[l] != ' ') lastnonspace = l;
64
  }
65
 
66
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
67
  dst[l] = 0;
68
 
69
  return(l);
70
}
71
 
72
 
1114 mateusz.vi 73
/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to
597 mateuszvis 74
 * the actual string part on success, or NULL on error */
1114 mateusz.vi 75
static const char *parseline(unsigned short *id, const char *s) {
597 mateuszvis 76
  int i;
77
  int dotpos = 0, colpos = 0, gotdigits = 0;
78
 
1114 mateusz.vi 79
  /* strings prefixed by '?' are flagged as "dirty": ignore this flag here */
80
  if (*s == '?') s++;
81
 
597 mateuszvis 82
  /* I must have a . and a : in the first 9 bytes */
83
  for (i = 0;; i++) {
84
    if (s[i] == '.') {
85
      if ((dotpos != 0) || (gotdigits == 0)) break;
86
      dotpos = i;
87
      gotdigits = 0;
88
    } else if (s[i] == ':') {
89
      if (gotdigits != 0) colpos = i;
90
      break;
91
    } else if ((s[i] < '0') || (s[i] > '9')) {
92
      break;
93
    }
94
    gotdigits++;
95
  }
96
  /* did I collect everything? */
97
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
98
 
99
  *id = atoi(s);
100
  *id <<= 8;
101
  *id |= atoi(s + dotpos + 1);
102
 
103
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
104
 
105
  return(s + colpos + 1);
106
}
107
 
108
 
639 mateusz.vi 109
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
110
 * the new length of the string. */
111
static unsigned short unesc_string(char *linebuff) {
112
  unsigned short i;
113
  for (i = 0; linebuff[i] != 0; i++) {
114
    if (linebuff[i] != '\\') continue;
1290 bernd.boec 115
    memmove(linebuff + i, linebuff + i + 1, strlen(linebuff + i));
639 mateusz.vi 116
    if (linebuff[i] == 0) break;
117
    switch (linebuff[i]) {
1248 mateusz.vi 118
      case 'e':
119
        linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */
120
        break;
639 mateusz.vi 121
      case 'n':
122
        linebuff[i] = '\n';
123
        break;
124
      case 'r':
125
        linebuff[i] = '\r';
126
        break;
127
      case 't':
128
        linebuff[i] = '\t';
129
        break;
130
    }
131
  }
132
  return(i);
133
}
134
 
1290 bernd.boec 135
#pragma pack(1)
1296 mateusz.vi 136
struct dict_entry {
1295 mateusz.vi 137
  unsigned short id;
138
  unsigned short offset;
1296 mateusz.vi 139
};
1290 bernd.boec 140
#pragma pack()
639 mateusz.vi 141
 
1296 mateusz.vi 142
struct svl_lang {
1290 bernd.boec 143
  char id[2];
144
  unsigned short num_strings;
145
 
1296 mateusz.vi 146
  struct dict_entry *dict;
1290 bernd.boec 147
  size_t dict_cap;
148
 
149
  char *strings;
150
  char *strings_end;
151
  size_t strings_cap;
152
 
1296 mateusz.vi 153
};
1290 bernd.boec 154
 
155
 
1296 mateusz.vi 156
static struct svl_lang *svl_lang_new(const char langid[2], size_t dict_cap, size_t strings_cap) {
157
  struct svl_lang *l;
1290 bernd.boec 158
 
1296 mateusz.vi 159
  l = malloc(sizeof(struct svl_lang));
1295 mateusz.vi 160
  if (!l) return(NULL);
1290 bernd.boec 161
 
162
  l->id[0] = (char)toupper(langid[0]);
163
  l->id[1] = (char)toupper(langid[1]);
164
 
1296 mateusz.vi 165
  l->dict = malloc(dict_cap * sizeof(struct dict_entry));
1295 mateusz.vi 166
  if (!l->dict) return(NULL);
167
 
1290 bernd.boec 168
  l->dict_cap = dict_cap;
169
 
170
  l->num_strings = 0;
171
  l->strings = l->strings_end = malloc(strings_cap);
172
  if (!l->strings) {
173
    free(l->dict);
1295 mateusz.vi 174
    return(NULL);
1290 bernd.boec 175
  }
176
  l->strings_cap = strings_cap;
1295 mateusz.vi 177
 
178
  return(l);
1290 bernd.boec 179
}
180
 
181
 
182
/* compacts the dict and string buffer */
1296 mateusz.vi 183
static void svl_compact_lang(struct svl_lang *l) {
1290 bernd.boec 184
  size_t bytes;
185
  bytes = l->strings_end - l->strings;
186
  if (bytes < l->strings_cap) {
187
    l->strings = l->strings_end = realloc(l->strings, bytes);
188
    l->strings_end += bytes;
189
    l->strings_cap = bytes;
190
  }
191
  l->dict_cap = l->num_strings;
1296 mateusz.vi 192
  l->dict = realloc(l->dict, l->dict_cap * sizeof(struct dict_entry));
1290 bernd.boec 193
}
194
 
195
 
1296 mateusz.vi 196
static void svl_lang_free(struct svl_lang *l) {
1290 bernd.boec 197
  l->num_strings = 0;
198
  if (l->dict) {
199
    free(l->dict);
200
    l->dict = NULL;
201
  }
202
  if (l->strings) {
203
    free(l->strings);
204
    l->strings = l->strings_end = NULL;
205
  }
206
  l->dict_cap = 0;
207
  l->strings_cap = 0;
208
}
209
 
210
 
1296 mateusz.vi 211
static size_t svl_strings_bytes(const struct svl_lang *l) {
1295 mateusz.vi 212
  return(l->strings_end - l->strings);
1290 bernd.boec 213
}
214
 
215
 
1296 mateusz.vi 216
static size_t svl_dict_bytes(const struct svl_lang *l) {
217
  return(l->num_strings * sizeof(struct dict_entry));
1290 bernd.boec 218
}
219
 
220
 
1296 mateusz.vi 221
static int svl_add_str(struct svl_lang *l, unsigned short id, const char *s) {
1290 bernd.boec 222
  size_t len = strlen(s) + 1;
223
  size_t cursor;
224
 
1296 mateusz.vi 225
  if ((l->strings_cap < svl_strings_bytes(l) + len) || (l->dict_cap < (l->num_strings + 1) * sizeof(struct dict_entry))) {
1295 mateusz.vi 226
    return(0);
1290 bernd.boec 227
  }
1293 mateusz.vi 228
 
1290 bernd.boec 229
  /* find dictionary insert position, search backwards in assumption
230
     that in translation files, strings are generally ordered ascending */
231
  for (cursor = l->num_strings; cursor > 0 && l->dict[cursor-1].id > id; cursor--);
232
 
1296 mateusz.vi 233
  memmove(&(l->dict[cursor+1]), &(l->dict[cursor]), sizeof(struct dict_entry) * (l->num_strings - cursor));
1290 bernd.boec 234
  l->dict[cursor].id = id;
235
  l->dict[cursor].offset = l->strings_end - l->strings;
236
 
237
  memcpy(l->strings_end, s, len);
238
  l->strings_end += len;
239
  l->num_strings++;
240
 
1295 mateusz.vi 241
  return(1);
1290 bernd.boec 242
}
243
 
244
 
1296 mateusz.vi 245
static int svl_find(const struct svl_lang *l, unsigned short id) {
1295 mateusz.vi 246
  size_t left = 0, right = l->num_strings - 1, x;
247
  unsigned short v;
1290 bernd.boec 248
 
1295 mateusz.vi 249
  if (l->num_strings == 0) return(0);
1290 bernd.boec 250
 
1295 mateusz.vi 251
  while (left <= right ) {
252
    x = left + ( (right - left ) >> 2 );
253
    v = l->dict[x].id;
1296 mateusz.vi 254
    if ( id == v ) return(1); /* found! */
255
 
256
    if (id > v) {
1295 mateusz.vi 257
      left = x + 1;
258
    } else {
259
      right = x - 1;
260
    }
261
  }
262
  return(0);
1290 bernd.boec 263
}
264
 
1295 mateusz.vi 265
 
1061 mateusz.vi 266
/* opens a CATS-style file and compiles it into a ressources lang block
267
 * returns 0 on error, or the size of the generated data block otherwise */
1296 mateusz.vi 268
static unsigned short svl_lang_from_cats_file(struct svl_lang *l, struct svl_lang *refl) {
1290 bernd.boec 269
  unsigned short linelen;
597 mateuszvis 270
  FILE *fd;
1290 bernd.boec 271
  char fname[] = "xx.txt";
623 mateuszvis 272
  static char linebuf[8192];
1114 mateusz.vi 273
  const char *ptr;
1290 bernd.boec 274
  unsigned short id, maxid=0, maxid_line, linecount;
275
  int i;
597 mateuszvis 276
 
1290 bernd.boec 277
  fname[strlen(fname) - 6] = (char)tolower( l->id[0] );
278
  fname[strlen(fname) - 5] = (char)tolower( l->id[1] );
597 mateuszvis 279
 
280
  fd = fopen(fname, "rb");
281
  if (fd == NULL) {
282
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
283
    return(0);
284
  }
285
 
286
  for (linecount = 1;; linecount++) {
287
    linelen = readl(linebuf, sizeof(linebuf), fd);
288
    if (linelen == 0xffff) break; /* EOF */
289
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
290
 
639 mateusz.vi 291
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
292
    linelen = unesc_string(linebuf);
293
 
597 mateuszvis 294
    /* read id and get ptr to actual string ("1.15:string") */
295
    ptr = parseline(&id, linebuf);
1272 mateusz.vi 296
 
297
    /* handle malformed lines */
597 mateuszvis 298
    if (ptr == NULL) {
1272 mateusz.vi 299
      printf("WARNING: %s[#%u] is malformed (linelen = %u):\r\n", fname, linecount, linelen);
623 mateuszvis 300
      puts(linebuf);
1272 mateusz.vi 301
      continue;
597 mateuszvis 302
    }
1272 mateusz.vi 303
 
304
    /* ignore empty strings (but emit a warning) */
305
    if (ptr[0] == 0) {
1271 bernd.boec 306
      printf("WARNING: %s[#%u] ignoring empty string %u.%u\r\n", fname, linecount, id >> 8, id & 0xff);
307
      continue;
308
    }
597 mateuszvis 309
 
1114 mateusz.vi 310
    /* warn about dirty lines */
311
    if (linebuf[0] == '?') {
312
      printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\r\n", fname, linecount, id >> 8, id & 0xff);
313
    }
314
 
1290 bernd.boec 315
    /* add the string contained in current line, if conditions are met */
316
    if (!svl_find(l, id)) {
1295 mateusz.vi 317
      if ((refl == NULL) || (svl_find(refl, id))) {
1290 bernd.boec 318
        if (!svl_add_str(l, id, ptr)) {
2014 bernd.boec 319
          fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1290 bernd.boec 320
          fclose(fd);
1295 mateusz.vi 321
          return(0);
1290 bernd.boec 322
        }
323
        if (id >= maxid) {
324
          maxid = id;
325
          maxid_line = linecount;
1295 mateusz.vi 326
        } else {
1293 mateusz.vi 327
          printf("WARNING:%s[#%u] file unsorted - line %u has higher id %u.%u\r\n", fname, linecount, maxid_line, maxid >> 8, maxid & 0xff);
1290 bernd.boec 328
        }
1295 mateusz.vi 329
      } else {
1290 bernd.boec 330
        printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
331
      }
1295 mateusz.vi 332
    } else {
1293 mateusz.vi 333
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
597 mateuszvis 334
    }
335
  }
336
 
337
  fclose(fd);
338
 
1290 bernd.boec 339
  /* if reflang provided, pull missing strings from it */
340
  if (refl != NULL) {
341
    for (i = 0; i < refl->num_strings; i++) {
342
      id = refl->dict[i].id;
343
      if (!svl_find(l, id)) {
597 mateuszvis 344
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
1291 bernd.boec 345
        if (!svl_add_str(l, id, refl->strings + refl->dict[i].offset)) {
2014 bernd.boec 346
          fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1295 mateusz.vi 347
          return(0);
1290 bernd.boec 348
        }
597 mateuszvis 349
      }
350
    }
351
  }
352
 
1290 bernd.boec 353
  return(svl_strings_bytes(l));
354
}
597 mateuszvis 355
 
1290 bernd.boec 356
 
1295 mateusz.vi 357
static int svl_write_header(unsigned short num_strings, FILE *fd) {
358
  return((fwrite("SvL\x1a", 1, 4, fd) == 4) && (fwrite(&num_strings, 1, 2, fd) == 2));
597 mateuszvis 359
}
360
 
361
 
2102 mateusz.vi 362
 
363
/* write qlen literal bytes into dst, returns amount of "compressed" bytes */
364
static unsigned short mvcomp_litqueue_dump(unsigned short **dst, const unsigned char *q, unsigned short qlen) {
365
  unsigned short complen = 0;
366
 
367
  AGAIN:
368
 
369
  /* are we done? (also take care of guys calling me in for jokes) */
370
  if (qlen == 0) return(complen);
371
 
372
  qlen--; /* now it's between 0 and 30 */
373
  /* write the length and first char */
374
  **dst = ((qlen / 2) << 8) | q[0];
375
  *dst += 1;
376
  q++;
377
  complen += 2;
378
 
379
  /* anything left? */
380
  if (qlen == 0) return(complen);
381
 
382
  /* write the pending words */
383
  if (qlen > 1) {
384
    memcpy(*dst, q, (qlen/2)*2);
385
    *dst += qlen / 2;
386
    q += (qlen / 2) * 2;
387
    complen += (qlen / 2) * 2;
388
    qlen -= (qlen / 2) * 2;
389
  }
390
 
391
  /* one byte might still be left if it did not fit inside a word */
392
  goto AGAIN;
393
}
394
 
395
 
2083 mateusz.vi 396
/* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size */
397
static unsigned short mvcomp(char *dstbuf, const char *src, unsigned short len) {
398
  unsigned short complen = 0;
399
  unsigned short *dst = (void *)dstbuf;
400
  unsigned short bytesprocessed = 0;
2102 mateusz.vi 401
  unsigned char litqueue[32];
402
  unsigned char litqueuelen = 0;
2083 mateusz.vi 403
 
404
  /* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */
405
  while (len > 0) {
406
    unsigned short matchlen;
2106 mateusz.vi 407
    unsigned short minmatch;
2083 mateusz.vi 408
    unsigned short offset;
2084 mateusz.vi 409
    matchlen = 16;
2083 mateusz.vi 410
    if (len < matchlen) matchlen = len;
411
 
2106 mateusz.vi 412
    /* look for a minimum match of 2 bytes, unless I have some pending literal bytes
413
     * awaiting, in which case I am going through a new data pattern and it is more
414
     * efficient to wait for a 3-bytes match before breaking the literal string */
415
    if (litqueuelen != 0) {
416
      minmatch = 3;
417
    } else {
418
      minmatch = 2;
419
    }
420
 
421
    for (; matchlen >= minmatch; matchlen--) {
2086 mateusz.vi 422
      /* start at -matchlen and try to match something moving backward */
423
      unsigned short maxoffset = 4096;
424
      if (maxoffset > bytesprocessed) maxoffset = bytesprocessed;
2083 mateusz.vi 425
 
2086 mateusz.vi 426
      for (offset = matchlen; offset <= maxoffset; offset++) {
2083 mateusz.vi 427
        if (memcmp(src, src - offset, matchlen) == 0) {
2086 mateusz.vi 428
          //printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]);
2083 mateusz.vi 429
          goto FOUND;
430
        }
431
      }
432
    }
433
 
2102 mateusz.vi 434
    /* if here: no match found, write a literal byte to queue */
435
    litqueue[litqueuelen++] = *src;
2083 mateusz.vi 436
    src++;
437
    bytesprocessed++;
438
    len--;
2102 mateusz.vi 439
 
440
    /* dump literal queue to dst if max length reached */
441
    if (litqueuelen == 31) {
442
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
443
      litqueuelen = 0;
444
    }
2083 mateusz.vi 445
    continue;
446
 
447
    FOUND: /* found a match of matchlen bytes at -offset */
2102 mateusz.vi 448
 
449
    /* dump awaiting literal queue to dst first */
450
    if (litqueuelen != 0) {
451
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
452
      litqueuelen = 0;
453
    }
454
 
2084 mateusz.vi 455
    *dst = ((matchlen - 1) << 12) | (offset - 1);
2083 mateusz.vi 456
    dst++;
457
    src += matchlen;
458
    bytesprocessed += matchlen;
459
    len -= matchlen;
460
    complen += 2;
461
  }
462
 
2102 mateusz.vi 463
  /* dump awaiting literal queue to dst first */
464
  if (litqueuelen != 0) {
465
    complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
466
    litqueuelen = 0;
467
  }
468
 
2083 mateusz.vi 469
  return(complen);
470
}
471
 
472
 
473
/* write the language block (id, dict, strings) into the LNG file.
474
 * strings are compressed if compflag != 0 */
475
static int svl_write_lang(const struct svl_lang *l, FILE *fd, int compflag) {
1290 bernd.boec 476
  unsigned short strings_bytes = svl_strings_bytes(l);
2083 mateusz.vi 477
  unsigned short langid = *((unsigned short *)(&l->id));
478
  const char *stringsptr = l->strings;
597 mateuszvis 479
 
2083 mateusz.vi 480
  /* if compressed then do the magic */
481
  if (compflag) {
482
    static char compstrings[65000];
2089 mateusz.vi 483
    unsigned short comp_bytes;
484
    comp_bytes = mvcomp(compstrings, l->strings, strings_bytes);
485
    if (comp_bytes < strings_bytes) {
486
      printf("lang %c%c mvcomp-ressed (%u bytes -> %u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
487
      langid |= 0x8000; /* LNG langblock flag that means "this lang is compressed" */
488
      strings_bytes = comp_bytes;
489
      stringsptr = compstrings;
490
    } else {
491
      printf("lang %c%c left UNCOMPRESSED (uncomp=%u bytes ; mvcomp=%u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
492
    }
2083 mateusz.vi 493
  }
494
 
495
  return((fwrite(&langid, 1, 2, fd) == 2) &&
1290 bernd.boec 496
         (fwrite(&strings_bytes, 1, 2, fd) == 2) &&
497
         (fwrite(l->dict, 1, svl_dict_bytes(l), fd) == svl_dict_bytes(l)) &&
2083 mateusz.vi 498
         (fwrite(stringsptr, 1, strings_bytes, fd) == strings_bytes));
1290 bernd.boec 499
}
500
 
501
 
1296 mateusz.vi 502
static int svl_write_c_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz) {
1290 bernd.boec 503
  FILE *fd;
504
  int i;
505
  unsigned short strings_bytes = svl_strings_bytes(l);
506
  unsigned short nextnlat = 0;
1295 mateusz.vi 507
  unsigned short allocsz;
1290 bernd.boec 508
 
509
  fd = fopen(fn, "wb");
510
  if (fd == NULL) {
1295 mateusz.vi 511
    return(0);
512
  }
1290 bernd.boec 513
 
1295 mateusz.vi 514
  allocsz = biggest_langsz + (biggest_langsz / 20);
515
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
516
  fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
517
  fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
518
  fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
519
  fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
520
 
521
  for (i = 0; i < strings_bytes; i++) {
522
    if (!fprintf(fd, "0x%02x", l->strings[i])) {
523
      fclose(fd);
524
      return(0);
1290 bernd.boec 525
    }
526
 
1295 mateusz.vi 527
    if (i + 1 < strings_bytes) fprintf(fd, ",");
528
    nextnlat++;
529
    if (l->strings[i] == '\0' || nextnlat == 16) {
1290 bernd.boec 530
      fprintf(fd, "\r\n");
1295 mateusz.vi 531
      nextnlat = 0;
1290 bernd.boec 532
    }
1295 mateusz.vi 533
  }
534
  fprintf(fd, "};\r\n\r\n");
1290 bernd.boec 535
 
1295 mateusz.vi 536
  fprintf(fd, "unsigned short svarlang_dict[%u] = {\r\n", l->num_strings * 2);
537
  for (i = 0; i < l->num_strings; i++) {
538
    if (!fprintf(fd, "0x%04x,0x%04x", l->dict[i].id, l->dict[i].offset)) {
539
      fclose(fd);
540
      return(0);
541
    }
542
    if (i + 1 < l->num_strings) fprintf(fd, ",");
543
    fprintf(fd, "\r\n");
1290 bernd.boec 544
  }
1295 mateusz.vi 545
  fprintf(fd, "};\r\n");
1290 bernd.boec 546
 
1295 mateusz.vi 547
  fclose(fd);
548
 
549
  return(1);
1290 bernd.boec 550
}
551
 
552
 
2014 bernd.boec 553
static int svl_write_asm_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz, int format) {
554
  FILE *fd;
555
  int i;
556
  unsigned short strings_bytes = svl_strings_bytes(l);
557
  unsigned short nextnlat = 0;
558
  unsigned short allocsz;
559
 
560
  const char *public = (format == ASM_OUTPUT) ? "public" : "global";
561
 
562
  fd = fopen(fn, "wb");
563
  if (fd == NULL) {
564
    return(0);
565
  }
566
 
567
  allocsz = biggest_langsz + (biggest_langsz / 20);
568
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
569
  fprintf(fd, "; THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY)\r\n");
570
  fprintf(fd, "%s svarlang_memsz\r\n", public);
571
  fprintf(fd, "svarlang_memsz dw %u\r\n", allocsz);
572
  fprintf(fd, "%s svarlang_string_count\r\n", public);
573
  fprintf(fd, "svarlang_string_count dw %u\r\n\r\n", l->num_strings);
574
  fprintf(fd, "%s svarlang_mem\r\n", public);
575
  fprintf(fd, "svarlang_mem:\r\n");
576
 
577
  if (strings_bytes > 0) fprintf(fd, "db ");
578
 
579
  for (i = 0; i < strings_bytes; i++) {
2016 bernd.boec 580
    if (!fprintf(fd, "%u", l->strings[i])) {
2014 bernd.boec 581
      fclose(fd);
582
      return(0);
583
    }
584
 
585
    nextnlat++;
586
    if (l->strings[i] == '\0' || nextnlat == 16) {
587
      fprintf(fd, "\r\n");
588
      if (i + 1 < strings_bytes ) fprintf(fd, "db ");
589
      nextnlat = 0;
590
    }
591
    else {
592
      fprintf(fd, ",");
593
    }
594
  }
595
 
596
  fprintf(fd, "\r\n%s svarlang_dict\r\n", public);
597
  fprintf(fd, "svarlang_dict:\r\n");
598
  for (i = 0; i < l->num_strings; i++) {
2016 bernd.boec 599
    if (!fprintf(fd, "dw %u,%u\r\n", l->dict[i].id, l->dict[i].offset)) {
2014 bernd.boec 600
      fclose(fd);
601
      return(0);
602
    }
603
  }
604
 
605
  fclose(fd);
606
 
607
  return(1);
608
}
609
 
610
 
597 mateuszvis 611
int main(int argc, char **argv) {
2090 mateusz.vi 612
  FILE *fd;
597 mateuszvis 613
  int ecode = 0;
2014 bernd.boec 614
  int i, output_format = C_OUTPUT;
2092 mateusz.vi 615
  int mvcomp_enabled = 1;
2094 mateusz.vi 616
  int excref = 0;
1061 mateusz.vi 617
  unsigned short biggest_langsz = 0;
2015 bernd.boec 618
  struct svl_lang *lang = NULL, *reflang = NULL;
597 mateuszvis 619
 
620
  if (argc < 2) {
1247 mateusz.vi 621
    puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");
622
    puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");
2094 mateusz.vi 623
    puts("into a single resource file (OUT.LNG). Also generates a deflang source");
624
    puts("file that contains a properly sized buffer pre-filled with the first");
625
    puts("(reference) language.");
1247 mateusz.vi 626
    puts("");
2095 mateusz.vi 627
    puts("usage: tlumacz [/c | /asm | /nasm] [/nocomp] [/excref] en fr pl ...");
2092 mateusz.vi 628
    puts("");
2094 mateusz.vi 629
    puts("/c        generates deflang.c (default)");
630
    puts("/asm      deflang ASM output");
631
    puts("/nasm     deflang NASM output");
632
    puts("/nocomp   disables compression of strings in the LNG file");
633
    puts("/excref   excludes ref lang from the LNG file (inserted to deflang only)");
597 mateuszvis 634
    return(1);
635
  }
636
 
601 mateuszvis 637
  fd = fopen("out.lng", "wb");
597 mateuszvis 638
  if (fd == NULL) {
2014 bernd.boec 639
    fprintf(stderr, "ERROR: FAILED TO CREATE OR OPEN OUT.LNG");
597 mateuszvis 640
    return(1);
641
  }
642
 
643
  /* write lang blocks */
644
  for (i = 1; i < argc; i++) {
645
    unsigned short sz;
646
    char id[3];
647
 
2014 bernd.boec 648
    if (!strcmp(argv[i], "/c")) {
649
      output_format = C_OUTPUT;
650
      continue;
2090 mateusz.vi 651
    } else if (!strcmp(argv[i], "/asm")) {
2014 bernd.boec 652
      output_format = ASM_OUTPUT;
653
      continue;
654
    } else if(!strcmp(argv[i], "/nasm")) {
655
      output_format = NASM_OUTPUT;
656
      continue;
2092 mateusz.vi 657
    } else if(!strcmp(argv[i], "/nocomp")) {
658
      mvcomp_enabled = 0;
2090 mateusz.vi 659
      continue;
2094 mateusz.vi 660
    } else if(!strcmp(argv[i], "/excref")) {
661
      excref = 1;
662
      continue;
2014 bernd.boec 663
    }
664
 
597 mateuszvis 665
    if (strlen(argv[i]) != 2) {
2014 bernd.boec 666
      fprintf(stderr, "INVALID LANG SPECIFIED: %s\r\n", argv[i]);
597 mateuszvis 667
      ecode = 1;
2015 bernd.boec 668
      goto exit_main;
597 mateuszvis 669
    }
670
    id[0] = argv[i][0];
671
    id[1] = argv[i][1];
672
    id[2] = 0;
673
 
1290 bernd.boec 674
    if ((lang = svl_lang_new(id, DICT_CAP, STRINGS_CAP)) == NULL) {
2014 bernd.boec 675
      fprintf(stderr, "OUT OF MEMORY\r\n");
2015 bernd.boec 676
      ecode = 1;
677
      goto exit_main;
1290 bernd.boec 678
    }
679
 
680
    sz = svl_lang_from_cats_file(lang, reflang);
597 mateuszvis 681
    if (sz == 0) {
2014 bernd.boec 682
      fprintf(stderr, "ERROR COMPUTING LANG '%s'\r\n", id);
597 mateuszvis 683
      ecode = 1;
2015 bernd.boec 684
      goto exit_main;
597 mateuszvis 685
    } else {
686
      printf("computed %s lang block of %u bytes\r\n", id, sz);
1061 mateusz.vi 687
      if (sz > biggest_langsz) biggest_langsz = sz;
597 mateuszvis 688
    }
1290 bernd.boec 689
    svl_compact_lang(lang);
690
 
691
    /* write header if first (reference) language */
2090 mateusz.vi 692
    if (!reflang) {
1290 bernd.boec 693
      if (!svl_write_header(lang->num_strings, fd)) {
2014 bernd.boec 694
        fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");
1290 bernd.boec 695
        ecode = 1;
2015 bernd.boec 696
        goto exit_main;
1290 bernd.boec 697
      }
698
    }
1293 mateusz.vi 699
 
1290 bernd.boec 700
    /* write lang ID to file, followed string table size, and then
2094 mateusz.vi 701
       the dictionary and string table for current language
702
       skip this for reference language if /excref given */
703
    if ((reflang != NULL) || (excref == 0)) {
704
      if (!svl_write_lang(lang, fd, mvcomp_enabled)) {
705
        fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");
706
        ecode = 1;
707
        goto exit_main;
708
      }
2095 mateusz.vi 709
    } else {
710
      puts("ref language NOT saved in the LNG file (/excref)");
597 mateuszvis 711
    }
1290 bernd.boec 712
 
1061 mateusz.vi 713
    /* remember reference data for other languages */
2014 bernd.boec 714
    if (!reflang) {
1290 bernd.boec 715
      reflang = lang;
1295 mateusz.vi 716
    } else {
1290 bernd.boec 717
      svl_lang_free(lang);
718
      lang = NULL;
719
    }
597 mateuszvis 720
  }
721
 
2014 bernd.boec 722
  if (!reflang) {
723
    fprintf(stderr, "ERROR: NO LANGUAGE GIVEN\r\n");
2015 bernd.boec 724
    ecode = 1;
725
    goto exit_main;
1061 mateusz.vi 726
  }
727
 
2014 bernd.boec 728
  /* compute the deflang file containing a dump of the reference block */
729
  if (output_format == C_OUTPUT) {
730
    if (!svl_write_c_source(reflang, "deflang.c", biggest_langsz)) {
731
      fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.C\r\n");
732
      ecode = 1;
2083 mateusz.vi 733
    }
2014 bernd.boec 734
  } else {
735
    if (!svl_write_asm_source(reflang, "deflang.inc", biggest_langsz, output_format)) {
736
      fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.INC\r\n");
737
      ecode = 1;
738
    }
1290 bernd.boec 739
  }
740
 
2015 bernd.boec 741
exit_main:
742
  if (lang && lang != reflang) {
743
    svl_lang_free(lang);
744
  }
745
  if (reflang) {
746
    svl_lang_free(reflang);
747
    reflang = NULL;
2083 mateusz.vi 748
    lang = NULL;
2015 bernd.boec 749
  }
2014 bernd.boec 750
 
2015 bernd.boec 751
  fclose(fd);
752
 
597 mateuszvis 753
  return(ecode);
754
}