Subversion Repositories SvarDOS

Rev

Rev 2106 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
597 mateuszvis 1
/*
2083 mateusz.vi 2
 * Copyright (C) 2021-2024 Mateusz Viste
597 mateuszvis 3
 *
1295 mateusz.vi 4
 * Dictionary-based lookups contributed by Bernd Boeckmann, 2023
5
 *
597 mateuszvis 6
 * usage: tlumacz en fr pl etc
7
 *
2083 mateusz.vi 8
 * computes:
9
 * OUT.LNG -> contains all language resources.
10
 * OUTC.LNG -> same as OUT.LNG but with compressed strings (slower to load).
597 mateuszvis 11
 *
2083 mateusz.vi 12
 * === COMPRESSION ===========================================================
13
 * The compression scheme is very simple. It is applied only to strings (ie.
2102 mateusz.vi 14
 * not the dictionnary) and it is basically a stream of 16-bit words (tokens).
15
 *
16
 * Token format is LLLL OOOO OOOO OOOO, where:
17
 * OOOO OOOO OOOO is the back reference offset (number of bytes-1 to rewind)
18
 * LLLL is the number of bytes (-1) that have to be copied from the offset.
19
 *
20
 * However, if LLLL is zero then the token's format is different:
21
 * 0000 RRRR BBBB BBBB
22
 *
23
 * The above form occurs when uncompressible data is encountered:
24
 * BBBB BBBB is the literal value of a byte to be copied
25
 * RRRR is the number of RAW (uncompressible) WORDS that follow (possibly 0)
26
 *
2083 mateusz.vi 27
 * where each WORD value contains the following bits "LLLL OOOO OOOO OOOO":
28
 *
29
 * OOOO OOOO OOOO = a backreference offset ("look that many bytes back")
30
 * LLLL = the number of bytes to copy from the backreference
31
 *
32
 * To recognize a compressed lang block one has to look at the id of the block
33
 * (16-bit language id). If its highest bit is set (0x8000) then the lang block
34
 * is compressed.
597 mateuszvis 35
 */
36
 
37
 
38
#include <stdio.h>
39
#include <stdlib.h>
40
#include <string.h>
1290 bernd.boec 41
#include <ctype.h>
597 mateuszvis 42
 
1248 mateusz.vi 43
#include "svarlang.h"
597 mateuszvis 44
 
1290 bernd.boec 45
#define STRINGS_CAP 65000   /* string storage size in characters */
1293 mateusz.vi 46
#define DICT_CAP    10000   /* dictionary size in elements */
597 mateuszvis 47
 
2014 bernd.boec 48
enum {                      /* DEFLANG output format */
49
  C_OUTPUT,
2108 mateusz.vi 50
  NO_OUTPUT,
2014 bernd.boec 51
  ASM_OUTPUT,
52
  NASM_OUTPUT
53
};
54
 
55
 
597 mateuszvis 56
/* read a single line from fd and fills it into dst, returns line length
57
 * ending CR/LF is trimmed, as well as any trailing spaces */
58
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
59
  unsigned short l, lastnonspace = 0;
60
 
1290 bernd.boec 61
  if (fgets(dst, (int)dstsz, fd) == NULL) return(0xffff); /* EOF */
597 mateuszvis 62
  /* trim at first CR or LF and return len */
63
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
64
    if (dst[l] != ' ') lastnonspace = l;
65
  }
66
 
67
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
68
  dst[l] = 0;
69
 
70
  return(l);
71
}
72
 
73
 
1114 mateusz.vi 74
/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to
597 mateuszvis 75
 * the actual string part on success, or NULL on error */
1114 mateusz.vi 76
static const char *parseline(unsigned short *id, const char *s) {
597 mateuszvis 77
  int i;
78
  int dotpos = 0, colpos = 0, gotdigits = 0;
79
 
1114 mateusz.vi 80
  /* strings prefixed by '?' are flagged as "dirty": ignore this flag here */
81
  if (*s == '?') s++;
82
 
597 mateuszvis 83
  /* I must have a . and a : in the first 9 bytes */
84
  for (i = 0;; i++) {
85
    if (s[i] == '.') {
86
      if ((dotpos != 0) || (gotdigits == 0)) break;
87
      dotpos = i;
88
      gotdigits = 0;
89
    } else if (s[i] == ':') {
90
      if (gotdigits != 0) colpos = i;
91
      break;
92
    } else if ((s[i] < '0') || (s[i] > '9')) {
93
      break;
94
    }
95
    gotdigits++;
96
  }
97
  /* did I collect everything? */
98
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
99
 
100
  *id = atoi(s);
101
  *id <<= 8;
102
  *id |= atoi(s + dotpos + 1);
103
 
104
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
105
 
106
  return(s + colpos + 1);
107
}
108
 
109
 
639 mateusz.vi 110
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
111
 * the new length of the string. */
112
static unsigned short unesc_string(char *linebuff) {
113
  unsigned short i;
114
  for (i = 0; linebuff[i] != 0; i++) {
115
    if (linebuff[i] != '\\') continue;
1290 bernd.boec 116
    memmove(linebuff + i, linebuff + i + 1, strlen(linebuff + i));
639 mateusz.vi 117
    if (linebuff[i] == 0) break;
118
    switch (linebuff[i]) {
1248 mateusz.vi 119
      case 'e':
120
        linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */
121
        break;
639 mateusz.vi 122
      case 'n':
123
        linebuff[i] = '\n';
124
        break;
125
      case 'r':
126
        linebuff[i] = '\r';
127
        break;
128
      case 't':
129
        linebuff[i] = '\t';
130
        break;
131
    }
132
  }
133
  return(i);
134
}
135
 
1290 bernd.boec 136
#pragma pack(1)
1296 mateusz.vi 137
struct dict_entry {
1295 mateusz.vi 138
  unsigned short id;
139
  unsigned short offset;
1296 mateusz.vi 140
};
1290 bernd.boec 141
#pragma pack()
639 mateusz.vi 142
 
1296 mateusz.vi 143
struct svl_lang {
1290 bernd.boec 144
  char id[2];
145
  unsigned short num_strings;
146
 
1296 mateusz.vi 147
  struct dict_entry *dict;
1290 bernd.boec 148
  size_t dict_cap;
149
 
150
  char *strings;
151
  char *strings_end;
152
  size_t strings_cap;
153
 
1296 mateusz.vi 154
};
1290 bernd.boec 155
 
156
 
1296 mateusz.vi 157
static struct svl_lang *svl_lang_new(const char langid[2], size_t dict_cap, size_t strings_cap) {
158
  struct svl_lang *l;
1290 bernd.boec 159
 
1296 mateusz.vi 160
  l = malloc(sizeof(struct svl_lang));
1295 mateusz.vi 161
  if (!l) return(NULL);
1290 bernd.boec 162
 
163
  l->id[0] = (char)toupper(langid[0]);
164
  l->id[1] = (char)toupper(langid[1]);
165
 
1296 mateusz.vi 166
  l->dict = malloc(dict_cap * sizeof(struct dict_entry));
1295 mateusz.vi 167
  if (!l->dict) return(NULL);
168
 
1290 bernd.boec 169
  l->dict_cap = dict_cap;
170
 
171
  l->num_strings = 0;
172
  l->strings = l->strings_end = malloc(strings_cap);
173
  if (!l->strings) {
174
    free(l->dict);
1295 mateusz.vi 175
    return(NULL);
1290 bernd.boec 176
  }
177
  l->strings_cap = strings_cap;
1295 mateusz.vi 178
 
179
  return(l);
1290 bernd.boec 180
}
181
 
182
 
183
/* compacts the dict and string buffer */
1296 mateusz.vi 184
static void svl_compact_lang(struct svl_lang *l) {
1290 bernd.boec 185
  size_t bytes;
186
  bytes = l->strings_end - l->strings;
187
  if (bytes < l->strings_cap) {
188
    l->strings = l->strings_end = realloc(l->strings, bytes);
189
    l->strings_end += bytes;
190
    l->strings_cap = bytes;
191
  }
192
  l->dict_cap = l->num_strings;
1296 mateusz.vi 193
  l->dict = realloc(l->dict, l->dict_cap * sizeof(struct dict_entry));
1290 bernd.boec 194
}
195
 
196
 
1296 mateusz.vi 197
static void svl_lang_free(struct svl_lang *l) {
1290 bernd.boec 198
  l->num_strings = 0;
199
  if (l->dict) {
200
    free(l->dict);
201
    l->dict = NULL;
202
  }
203
  if (l->strings) {
204
    free(l->strings);
205
    l->strings = l->strings_end = NULL;
206
  }
207
  l->dict_cap = 0;
208
  l->strings_cap = 0;
209
}
210
 
211
 
1296 mateusz.vi 212
static size_t svl_strings_bytes(const struct svl_lang *l) {
1295 mateusz.vi 213
  return(l->strings_end - l->strings);
1290 bernd.boec 214
}
215
 
216
 
1296 mateusz.vi 217
static size_t svl_dict_bytes(const struct svl_lang *l) {
218
  return(l->num_strings * sizeof(struct dict_entry));
1290 bernd.boec 219
}
220
 
221
 
1296 mateusz.vi 222
static int svl_add_str(struct svl_lang *l, unsigned short id, const char *s) {
1290 bernd.boec 223
  size_t len = strlen(s) + 1;
224
  size_t cursor;
225
 
1296 mateusz.vi 226
  if ((l->strings_cap < svl_strings_bytes(l) + len) || (l->dict_cap < (l->num_strings + 1) * sizeof(struct dict_entry))) {
1295 mateusz.vi 227
    return(0);
1290 bernd.boec 228
  }
1293 mateusz.vi 229
 
1290 bernd.boec 230
  /* find dictionary insert position, search backwards in assumption
231
     that in translation files, strings are generally ordered ascending */
232
  for (cursor = l->num_strings; cursor > 0 && l->dict[cursor-1].id > id; cursor--);
233
 
1296 mateusz.vi 234
  memmove(&(l->dict[cursor+1]), &(l->dict[cursor]), sizeof(struct dict_entry) * (l->num_strings - cursor));
1290 bernd.boec 235
  l->dict[cursor].id = id;
236
  l->dict[cursor].offset = l->strings_end - l->strings;
237
 
238
  memcpy(l->strings_end, s, len);
239
  l->strings_end += len;
240
  l->num_strings++;
241
 
1295 mateusz.vi 242
  return(1);
1290 bernd.boec 243
}
244
 
245
 
1296 mateusz.vi 246
static int svl_find(const struct svl_lang *l, unsigned short id) {
1295 mateusz.vi 247
  size_t left = 0, right = l->num_strings - 1, x;
248
  unsigned short v;
1290 bernd.boec 249
 
1295 mateusz.vi 250
  if (l->num_strings == 0) return(0);
1290 bernd.boec 251
 
1295 mateusz.vi 252
  while (left <= right ) {
253
    x = left + ( (right - left ) >> 2 );
254
    v = l->dict[x].id;
1296 mateusz.vi 255
    if ( id == v ) return(1); /* found! */
256
 
257
    if (id > v) {
1295 mateusz.vi 258
      left = x + 1;
259
    } else {
260
      right = x - 1;
261
    }
262
  }
263
  return(0);
1290 bernd.boec 264
}
265
 
1295 mateusz.vi 266
 
1061 mateusz.vi 267
/* opens a CATS-style file and compiles it into a ressources lang block
268
 * returns 0 on error, or the size of the generated data block otherwise */
1296 mateusz.vi 269
static unsigned short svl_lang_from_cats_file(struct svl_lang *l, struct svl_lang *refl) {
1290 bernd.boec 270
  unsigned short linelen;
597 mateuszvis 271
  FILE *fd;
1290 bernd.boec 272
  char fname[] = "xx.txt";
623 mateuszvis 273
  static char linebuf[8192];
1114 mateusz.vi 274
  const char *ptr;
1290 bernd.boec 275
  unsigned short id, maxid=0, maxid_line, linecount;
276
  int i;
597 mateuszvis 277
 
1290 bernd.boec 278
  fname[strlen(fname) - 6] = (char)tolower( l->id[0] );
279
  fname[strlen(fname) - 5] = (char)tolower( l->id[1] );
597 mateuszvis 280
 
281
  fd = fopen(fname, "rb");
282
  if (fd == NULL) {
283
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
284
    return(0);
285
  }
286
 
287
  for (linecount = 1;; linecount++) {
288
    linelen = readl(linebuf, sizeof(linebuf), fd);
289
    if (linelen == 0xffff) break; /* EOF */
290
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
291
 
639 mateusz.vi 292
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
293
    linelen = unesc_string(linebuf);
294
 
597 mateuszvis 295
    /* read id and get ptr to actual string ("1.15:string") */
296
    ptr = parseline(&id, linebuf);
1272 mateusz.vi 297
 
298
    /* handle malformed lines */
597 mateuszvis 299
    if (ptr == NULL) {
1272 mateusz.vi 300
      printf("WARNING: %s[#%u] is malformed (linelen = %u):\r\n", fname, linecount, linelen);
623 mateuszvis 301
      puts(linebuf);
1272 mateusz.vi 302
      continue;
597 mateuszvis 303
    }
1272 mateusz.vi 304
 
305
    /* ignore empty strings (but emit a warning) */
306
    if (ptr[0] == 0) {
1271 bernd.boec 307
      printf("WARNING: %s[#%u] ignoring empty string %u.%u\r\n", fname, linecount, id >> 8, id & 0xff);
308
      continue;
309
    }
597 mateuszvis 310
 
1114 mateusz.vi 311
    /* warn about dirty lines */
312
    if (linebuf[0] == '?') {
313
      printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\r\n", fname, linecount, id >> 8, id & 0xff);
314
    }
315
 
1290 bernd.boec 316
    /* add the string contained in current line, if conditions are met */
317
    if (!svl_find(l, id)) {
1295 mateusz.vi 318
      if ((refl == NULL) || (svl_find(refl, id))) {
1290 bernd.boec 319
        if (!svl_add_str(l, id, ptr)) {
2014 bernd.boec 320
          fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1290 bernd.boec 321
          fclose(fd);
1295 mateusz.vi 322
          return(0);
1290 bernd.boec 323
        }
324
        if (id >= maxid) {
325
          maxid = id;
326
          maxid_line = linecount;
1295 mateusz.vi 327
        } else {
1293 mateusz.vi 328
          printf("WARNING:%s[#%u] file unsorted - line %u has higher id %u.%u\r\n", fname, linecount, maxid_line, maxid >> 8, maxid & 0xff);
1290 bernd.boec 329
        }
1295 mateusz.vi 330
      } else {
1290 bernd.boec 331
        printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
332
      }
1295 mateusz.vi 333
    } else {
1293 mateusz.vi 334
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
597 mateuszvis 335
    }
336
  }
337
 
338
  fclose(fd);
339
 
1290 bernd.boec 340
  /* if reflang provided, pull missing strings from it */
341
  if (refl != NULL) {
342
    for (i = 0; i < refl->num_strings; i++) {
343
      id = refl->dict[i].id;
344
      if (!svl_find(l, id)) {
597 mateuszvis 345
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
1291 bernd.boec 346
        if (!svl_add_str(l, id, refl->strings + refl->dict[i].offset)) {
2014 bernd.boec 347
          fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1295 mateusz.vi 348
          return(0);
1290 bernd.boec 349
        }
597 mateuszvis 350
      }
351
    }
352
  }
353
 
1290 bernd.boec 354
  return(svl_strings_bytes(l));
355
}
597 mateuszvis 356
 
1290 bernd.boec 357
 
1295 mateusz.vi 358
static int svl_write_header(unsigned short num_strings, FILE *fd) {
359
  return((fwrite("SvL\x1a", 1, 4, fd) == 4) && (fwrite(&num_strings, 1, 2, fd) == 2));
597 mateuszvis 360
}
361
 
362
 
2102 mateusz.vi 363
 
364
/* write qlen literal bytes into dst, returns amount of "compressed" bytes */
365
static unsigned short mvcomp_litqueue_dump(unsigned short **dst, const unsigned char *q, unsigned short qlen) {
366
  unsigned short complen = 0;
367
 
368
  AGAIN:
369
 
370
  /* are we done? (also take care of guys calling me in for jokes) */
371
  if (qlen == 0) return(complen);
372
 
373
  qlen--; /* now it's between 0 and 30 */
374
  /* write the length and first char */
375
  **dst = ((qlen / 2) << 8) | q[0];
376
  *dst += 1;
377
  q++;
378
  complen += 2;
379
 
380
  /* anything left? */
381
  if (qlen == 0) return(complen);
382
 
383
  /* write the pending words */
384
  if (qlen > 1) {
385
    memcpy(*dst, q, (qlen/2)*2);
386
    *dst += qlen / 2;
387
    q += (qlen / 2) * 2;
388
    complen += (qlen / 2) * 2;
389
    qlen -= (qlen / 2) * 2;
390
  }
391
 
392
  /* one byte might still be left if it did not fit inside a word */
393
  goto AGAIN;
394
}
395
 
396
 
2083 mateusz.vi 397
/* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size */
398
static unsigned short mvcomp(char *dstbuf, const char *src, unsigned short len) {
399
  unsigned short complen = 0;
400
  unsigned short *dst = (void *)dstbuf;
401
  unsigned short bytesprocessed = 0;
2102 mateusz.vi 402
  unsigned char litqueue[32];
403
  unsigned char litqueuelen = 0;
2083 mateusz.vi 404
 
405
  /* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */
406
  while (len > 0) {
407
    unsigned short matchlen;
2106 mateusz.vi 408
    unsigned short minmatch;
2083 mateusz.vi 409
    unsigned short offset;
2084 mateusz.vi 410
    matchlen = 16;
2083 mateusz.vi 411
    if (len < matchlen) matchlen = len;
412
 
2106 mateusz.vi 413
    /* look for a minimum match of 2 bytes, unless I have some pending literal bytes
414
     * awaiting, in which case I am going through a new data pattern and it is more
415
     * efficient to wait for a 3-bytes match before breaking the literal string */
416
    if (litqueuelen != 0) {
417
      minmatch = 3;
418
    } else {
419
      minmatch = 2;
420
    }
421
 
422
    for (; matchlen >= minmatch; matchlen--) {
2086 mateusz.vi 423
      /* start at -matchlen and try to match something moving backward */
424
      unsigned short maxoffset = 4096;
425
      if (maxoffset > bytesprocessed) maxoffset = bytesprocessed;
2083 mateusz.vi 426
 
2086 mateusz.vi 427
      for (offset = matchlen; offset <= maxoffset; offset++) {
2083 mateusz.vi 428
        if (memcmp(src, src - offset, matchlen) == 0) {
2086 mateusz.vi 429
          //printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]);
2083 mateusz.vi 430
          goto FOUND;
431
        }
432
      }
433
    }
434
 
2102 mateusz.vi 435
    /* if here: no match found, write a literal byte to queue */
436
    litqueue[litqueuelen++] = *src;
2083 mateusz.vi 437
    src++;
438
    bytesprocessed++;
439
    len--;
2102 mateusz.vi 440
 
441
    /* dump literal queue to dst if max length reached */
442
    if (litqueuelen == 31) {
443
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
444
      litqueuelen = 0;
445
    }
2083 mateusz.vi 446
    continue;
447
 
448
    FOUND: /* found a match of matchlen bytes at -offset */
2102 mateusz.vi 449
 
450
    /* dump awaiting literal queue to dst first */
451
    if (litqueuelen != 0) {
452
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
453
      litqueuelen = 0;
454
    }
455
 
2084 mateusz.vi 456
    *dst = ((matchlen - 1) << 12) | (offset - 1);
2083 mateusz.vi 457
    dst++;
458
    src += matchlen;
459
    bytesprocessed += matchlen;
460
    len -= matchlen;
461
    complen += 2;
462
  }
463
 
2102 mateusz.vi 464
  /* dump awaiting literal queue to dst first */
465
  if (litqueuelen != 0) {
466
    complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
467
    litqueuelen = 0;
468
  }
469
 
2083 mateusz.vi 470
  return(complen);
471
}
472
 
473
 
474
/* write the language block (id, dict, strings) into the LNG file.
475
 * strings are compressed if compflag != 0 */
476
static int svl_write_lang(const struct svl_lang *l, FILE *fd, int compflag) {
1290 bernd.boec 477
  unsigned short strings_bytes = svl_strings_bytes(l);
2083 mateusz.vi 478
  unsigned short langid = *((unsigned short *)(&l->id));
479
  const char *stringsptr = l->strings;
597 mateuszvis 480
 
2083 mateusz.vi 481
  /* if compressed then do the magic */
482
  if (compflag) {
483
    static char compstrings[65000];
2089 mateusz.vi 484
    unsigned short comp_bytes;
485
    comp_bytes = mvcomp(compstrings, l->strings, strings_bytes);
486
    if (comp_bytes < strings_bytes) {
487
      printf("lang %c%c mvcomp-ressed (%u bytes -> %u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
488
      langid |= 0x8000; /* LNG langblock flag that means "this lang is compressed" */
489
      strings_bytes = comp_bytes;
490
      stringsptr = compstrings;
491
    } else {
492
      printf("lang %c%c left UNCOMPRESSED (uncomp=%u bytes ; mvcomp=%u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
493
    }
2083 mateusz.vi 494
  }
495
 
496
  return((fwrite(&langid, 1, 2, fd) == 2) &&
1290 bernd.boec 497
         (fwrite(&strings_bytes, 1, 2, fd) == 2) &&
498
         (fwrite(l->dict, 1, svl_dict_bytes(l), fd) == svl_dict_bytes(l)) &&
2083 mateusz.vi 499
         (fwrite(stringsptr, 1, strings_bytes, fd) == strings_bytes));
1290 bernd.boec 500
}
501
 
502
 
1296 mateusz.vi 503
static int svl_write_c_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz) {
1290 bernd.boec 504
  FILE *fd;
505
  int i;
506
  unsigned short strings_bytes = svl_strings_bytes(l);
507
  unsigned short nextnlat = 0;
1295 mateusz.vi 508
  unsigned short allocsz;
1290 bernd.boec 509
 
510
  fd = fopen(fn, "wb");
511
  if (fd == NULL) {
1295 mateusz.vi 512
    return(0);
513
  }
1290 bernd.boec 514
 
1295 mateusz.vi 515
  allocsz = biggest_langsz + (biggest_langsz / 20);
516
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
517
  fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
518
  fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
519
  fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
520
  fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
521
 
522
  for (i = 0; i < strings_bytes; i++) {
523
    if (!fprintf(fd, "0x%02x", l->strings[i])) {
524
      fclose(fd);
525
      return(0);
1290 bernd.boec 526
    }
527
 
1295 mateusz.vi 528
    if (i + 1 < strings_bytes) fprintf(fd, ",");
529
    nextnlat++;
530
    if (l->strings[i] == '\0' || nextnlat == 16) {
1290 bernd.boec 531
      fprintf(fd, "\r\n");
1295 mateusz.vi 532
      nextnlat = 0;
1290 bernd.boec 533
    }
1295 mateusz.vi 534
  }
535
  fprintf(fd, "};\r\n\r\n");
1290 bernd.boec 536
 
1295 mateusz.vi 537
  fprintf(fd, "unsigned short svarlang_dict[%u] = {\r\n", l->num_strings * 2);
538
  for (i = 0; i < l->num_strings; i++) {
539
    if (!fprintf(fd, "0x%04x,0x%04x", l->dict[i].id, l->dict[i].offset)) {
540
      fclose(fd);
541
      return(0);
542
    }
543
    if (i + 1 < l->num_strings) fprintf(fd, ",");
544
    fprintf(fd, "\r\n");
1290 bernd.boec 545
  }
1295 mateusz.vi 546
  fprintf(fd, "};\r\n");
1290 bernd.boec 547
 
1295 mateusz.vi 548
  fclose(fd);
549
 
550
  return(1);
1290 bernd.boec 551
}
552
 
553
 
2014 bernd.boec 554
static int svl_write_asm_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz, int format) {
555
  FILE *fd;
556
  int i;
557
  unsigned short strings_bytes = svl_strings_bytes(l);
558
  unsigned short nextnlat = 0;
559
  unsigned short allocsz;
560
 
561
  const char *public = (format == ASM_OUTPUT) ? "public" : "global";
562
 
563
  fd = fopen(fn, "wb");
564
  if (fd == NULL) {
565
    return(0);
566
  }
567
 
568
  allocsz = biggest_langsz + (biggest_langsz / 20);
569
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
570
  fprintf(fd, "; THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY)\r\n");
571
  fprintf(fd, "%s svarlang_memsz\r\n", public);
572
  fprintf(fd, "svarlang_memsz dw %u\r\n", allocsz);
573
  fprintf(fd, "%s svarlang_string_count\r\n", public);
574
  fprintf(fd, "svarlang_string_count dw %u\r\n\r\n", l->num_strings);
575
  fprintf(fd, "%s svarlang_mem\r\n", public);
576
  fprintf(fd, "svarlang_mem:\r\n");
577
 
578
  if (strings_bytes > 0) fprintf(fd, "db ");
579
 
580
  for (i = 0; i < strings_bytes; i++) {
2016 bernd.boec 581
    if (!fprintf(fd, "%u", l->strings[i])) {
2014 bernd.boec 582
      fclose(fd);
583
      return(0);
584
    }
585
 
586
    nextnlat++;
587
    if (l->strings[i] == '\0' || nextnlat == 16) {
588
      fprintf(fd, "\r\n");
589
      if (i + 1 < strings_bytes ) fprintf(fd, "db ");
590
      nextnlat = 0;
591
    }
592
    else {
593
      fprintf(fd, ",");
594
    }
595
  }
596
 
597
  fprintf(fd, "\r\n%s svarlang_dict\r\n", public);
598
  fprintf(fd, "svarlang_dict:\r\n");
599
  for (i = 0; i < l->num_strings; i++) {
2016 bernd.boec 600
    if (!fprintf(fd, "dw %u,%u\r\n", l->dict[i].id, l->dict[i].offset)) {
2014 bernd.boec 601
      fclose(fd);
602
      return(0);
603
    }
604
  }
605
 
606
  fclose(fd);
607
 
608
  return(1);
609
}
610
 
611
 
597 mateuszvis 612
int main(int argc, char **argv) {
2090 mateusz.vi 613
  FILE *fd;
597 mateuszvis 614
  int ecode = 0;
2014 bernd.boec 615
  int i, output_format = C_OUTPUT;
2092 mateusz.vi 616
  int mvcomp_enabled = 1;
2094 mateusz.vi 617
  int excref = 0;
1061 mateusz.vi 618
  unsigned short biggest_langsz = 0;
2015 bernd.boec 619
  struct svl_lang *lang = NULL, *reflang = NULL;
597 mateuszvis 620
 
621
  if (argc < 2) {
1247 mateusz.vi 622
    puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");
623
    puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");
2094 mateusz.vi 624
    puts("into a single resource file (OUT.LNG). Also generates a deflang source");
625
    puts("file that contains a properly sized buffer pre-filled with the first");
626
    puts("(reference) language.");
1247 mateusz.vi 627
    puts("");
2108 mateusz.vi 628
    puts("usage: tlumacz [/c|/asm|/nasm|/nodef] [/nocomp] [/excref] en fr pl ...");
2092 mateusz.vi 629
    puts("");
2094 mateusz.vi 630
    puts("/c        generates deflang.c (default)");
631
    puts("/asm      deflang ASM output");
632
    puts("/nasm     deflang NASM output");
2108 mateusz.vi 633
    puts("/nodef    does NOT generate a deflang source file (only an LNG file)");
2094 mateusz.vi 634
    puts("/nocomp   disables compression of strings in the LNG file");
635
    puts("/excref   excludes ref lang from the LNG file (inserted to deflang only)");
597 mateuszvis 636
    return(1);
637
  }
638
 
601 mateuszvis 639
  fd = fopen("out.lng", "wb");
597 mateuszvis 640
  if (fd == NULL) {
2014 bernd.boec 641
    fprintf(stderr, "ERROR: FAILED TO CREATE OR OPEN OUT.LNG");
597 mateuszvis 642
    return(1);
643
  }
644
 
645
  /* write lang blocks */
646
  for (i = 1; i < argc; i++) {
647
    unsigned short sz;
648
    char id[3];
649
 
2014 bernd.boec 650
    if (!strcmp(argv[i], "/c")) {
651
      output_format = C_OUTPUT;
652
      continue;
2090 mateusz.vi 653
    } else if (!strcmp(argv[i], "/asm")) {
2014 bernd.boec 654
      output_format = ASM_OUTPUT;
655
      continue;
656
    } else if(!strcmp(argv[i], "/nasm")) {
657
      output_format = NASM_OUTPUT;
658
      continue;
2092 mateusz.vi 659
    } else if(!strcmp(argv[i], "/nocomp")) {
660
      mvcomp_enabled = 0;
2090 mateusz.vi 661
      continue;
2108 mateusz.vi 662
    } else if(!strcmp(argv[i], "/nodef")) {
663
      output_format = NO_OUTPUT;
664
      continue;
2094 mateusz.vi 665
    } else if(!strcmp(argv[i], "/excref")) {
666
      excref = 1;
667
      continue;
2014 bernd.boec 668
    }
669
 
597 mateuszvis 670
    if (strlen(argv[i]) != 2) {
2014 bernd.boec 671
      fprintf(stderr, "INVALID LANG SPECIFIED: %s\r\n", argv[i]);
597 mateuszvis 672
      ecode = 1;
2015 bernd.boec 673
      goto exit_main;
597 mateuszvis 674
    }
675
    id[0] = argv[i][0];
676
    id[1] = argv[i][1];
677
    id[2] = 0;
678
 
1290 bernd.boec 679
    if ((lang = svl_lang_new(id, DICT_CAP, STRINGS_CAP)) == NULL) {
2014 bernd.boec 680
      fprintf(stderr, "OUT OF MEMORY\r\n");
2015 bernd.boec 681
      ecode = 1;
682
      goto exit_main;
1290 bernd.boec 683
    }
684
 
685
    sz = svl_lang_from_cats_file(lang, reflang);
597 mateuszvis 686
    if (sz == 0) {
2014 bernd.boec 687
      fprintf(stderr, "ERROR COMPUTING LANG '%s'\r\n", id);
597 mateuszvis 688
      ecode = 1;
2015 bernd.boec 689
      goto exit_main;
597 mateuszvis 690
    } else {
691
      printf("computed %s lang block of %u bytes\r\n", id, sz);
1061 mateusz.vi 692
      if (sz > biggest_langsz) biggest_langsz = sz;
597 mateuszvis 693
    }
1290 bernd.boec 694
    svl_compact_lang(lang);
695
 
696
    /* write header if first (reference) language */
2090 mateusz.vi 697
    if (!reflang) {
1290 bernd.boec 698
      if (!svl_write_header(lang->num_strings, fd)) {
2014 bernd.boec 699
        fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");
1290 bernd.boec 700
        ecode = 1;
2015 bernd.boec 701
        goto exit_main;
1290 bernd.boec 702
      }
703
    }
1293 mateusz.vi 704
 
1290 bernd.boec 705
    /* write lang ID to file, followed string table size, and then
2094 mateusz.vi 706
       the dictionary and string table for current language
707
       skip this for reference language if /excref given */
708
    if ((reflang != NULL) || (excref == 0)) {
709
      if (!svl_write_lang(lang, fd, mvcomp_enabled)) {
710
        fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");
711
        ecode = 1;
712
        goto exit_main;
713
      }
2095 mateusz.vi 714
    } else {
715
      puts("ref language NOT saved in the LNG file (/excref)");
597 mateuszvis 716
    }
1290 bernd.boec 717
 
1061 mateusz.vi 718
    /* remember reference data for other languages */
2014 bernd.boec 719
    if (!reflang) {
1290 bernd.boec 720
      reflang = lang;
1295 mateusz.vi 721
    } else {
1290 bernd.boec 722
      svl_lang_free(lang);
723
      lang = NULL;
724
    }
597 mateuszvis 725
  }
726
 
2014 bernd.boec 727
  if (!reflang) {
728
    fprintf(stderr, "ERROR: NO LANGUAGE GIVEN\r\n");
2015 bernd.boec 729
    ecode = 1;
730
    goto exit_main;
1061 mateusz.vi 731
  }
732
 
2108 mateusz.vi 733
  /* compute the deflang file containing a dump of the reference lang block */
2014 bernd.boec 734
  if (output_format == C_OUTPUT) {
735
    if (!svl_write_c_source(reflang, "deflang.c", biggest_langsz)) {
736
      fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.C\r\n");
737
      ecode = 1;
2083 mateusz.vi 738
    }
2108 mateusz.vi 739
  } else if ((output_format == ASM_OUTPUT) || (output_format == NASM_OUTPUT)) {
2014 bernd.boec 740
    if (!svl_write_asm_source(reflang, "deflang.inc", biggest_langsz, output_format)) {
741
      fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.INC\r\n");
742
      ecode = 1;
743
    }
1290 bernd.boec 744
  }
745
 
2015 bernd.boec 746
exit_main:
2108 mateusz.vi 747
  if (lang && (lang != reflang)) {
2015 bernd.boec 748
    svl_lang_free(lang);
749
  }
750
  if (reflang) {
751
    svl_lang_free(reflang);
752
    reflang = NULL;
2083 mateusz.vi 753
    lang = NULL;
2015 bernd.boec 754
  }
2014 bernd.boec 755
 
2015 bernd.boec 756
  fclose(fd);
757
 
597 mateuszvis 758
  return(ecode);
759
}