Subversion Repositories SvarDOS

Rev

Rev 2123 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
597 mateuszvis 1
/*
2083 mateusz.vi 2
 * Copyright (C) 2021-2024 Mateusz Viste
597 mateuszvis 3
 *
1295 mateusz.vi 4
 * Dictionary-based lookups contributed by Bernd Boeckmann, 2023
5
 *
597 mateuszvis 6
 * usage: tlumacz en fr pl etc
7
 *
2083 mateusz.vi 8
 * computes:
9
 * OUT.LNG -> contains all language resources.
10
 * OUTC.LNG -> same as OUT.LNG but with compressed strings (slower to load).
597 mateuszvis 11
 *
2083 mateusz.vi 12
 * === COMPRESSION ===========================================================
13
 * The compression scheme is very simple. It is applied only to strings (ie.
2102 mateusz.vi 14
 * not the dictionnary) and it is basically a stream of 16-bit words (tokens).
15
 *
16
 * Token format is LLLL OOOO OOOO OOOO, where:
17
 * OOOO OOOO OOOO is the back reference offset (number of bytes-1 to rewind)
18
 * LLLL is the number of bytes (-1) that have to be copied from the offset.
19
 *
20
 * However, if LLLL is zero then the token's format is different:
21
 * 0000 RRRR BBBB BBBB
22
 *
23
 * The above form occurs when uncompressible data is encountered:
24
 * BBBB BBBB is the literal value of a byte to be copied
25
 * RRRR is the number of RAW (uncompressible) WORDS that follow (possibly 0)
26
 *
2083 mateusz.vi 27
 * where each WORD value contains the following bits "LLLL OOOO OOOO OOOO":
28
 *
29
 * OOOO OOOO OOOO = a backreference offset ("look that many bytes back")
30
 * LLLL = the number of bytes to copy from the backreference
31
 *
32
 * To recognize a compressed lang block one has to look at the id of the block
33
 * (16-bit language id). If its highest bit is set (0x8000) then the lang block
34
 * is compressed.
597 mateuszvis 35
 */
36
 
37
 
38
#include <stdio.h>
39
#include <stdlib.h>
40
#include <string.h>
1290 bernd.boec 41
#include <ctype.h>
597 mateuszvis 42
 
1248 mateusz.vi 43
#include "svarlang.h"
597 mateuszvis 44
 
1290 bernd.boec 45
#define STRINGS_CAP 65000   /* string storage size in characters */
1293 mateusz.vi 46
#define DICT_CAP    10000   /* dictionary size in elements */
597 mateuszvis 47
 
2014 bernd.boec 48
enum {                      /* DEFLANG output format */
49
  C_OUTPUT,
2108 mateusz.vi 50
  NO_OUTPUT,
2014 bernd.boec 51
  ASM_OUTPUT,
52
  NASM_OUTPUT
53
};
54
 
55
 
597 mateuszvis 56
/* read a single line from fd and fills it into dst, returns line length
57
 * ending CR/LF is trimmed, as well as any trailing spaces */
58
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
59
  unsigned short l, lastnonspace = 0;
60
 
1290 bernd.boec 61
  if (fgets(dst, (int)dstsz, fd) == NULL) return(0xffff); /* EOF */
597 mateuszvis 62
  /* trim at first CR or LF and return len */
63
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
64
    if (dst[l] != ' ') lastnonspace = l;
65
  }
66
 
67
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
68
  dst[l] = 0;
69
 
70
  return(l);
71
}
72
 
73
 
1114 mateusz.vi 74
/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to
597 mateuszvis 75
 * the actual string part on success, or NULL on error */
1114 mateusz.vi 76
static const char *parseline(unsigned short *id, const char *s) {
597 mateuszvis 77
  int i;
78
  int dotpos = 0, colpos = 0, gotdigits = 0;
79
 
1114 mateusz.vi 80
  /* strings prefixed by '?' are flagged as "dirty": ignore this flag here */
81
  if (*s == '?') s++;
82
 
597 mateuszvis 83
  /* I must have a . and a : in the first 9 bytes */
84
  for (i = 0;; i++) {
85
    if (s[i] == '.') {
86
      if ((dotpos != 0) || (gotdigits == 0)) break;
87
      dotpos = i;
88
      gotdigits = 0;
89
    } else if (s[i] == ':') {
90
      if (gotdigits != 0) colpos = i;
91
      break;
92
    } else if ((s[i] < '0') || (s[i] > '9')) {
93
      break;
94
    }
95
    gotdigits++;
96
  }
97
  /* did I collect everything? */
98
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
99
 
100
  *id = atoi(s);
101
  *id <<= 8;
102
  *id |= atoi(s + dotpos + 1);
103
 
104
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
105
 
106
  return(s + colpos + 1);
107
}
108
 
109
 
639 mateusz.vi 110
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
111
 * the new length of the string. */
112
static unsigned short unesc_string(char *linebuff) {
113
  unsigned short i;
114
  for (i = 0; linebuff[i] != 0; i++) {
115
    if (linebuff[i] != '\\') continue;
1290 bernd.boec 116
    memmove(linebuff + i, linebuff + i + 1, strlen(linebuff + i));
639 mateusz.vi 117
    if (linebuff[i] == 0) break;
118
    switch (linebuff[i]) {
1248 mateusz.vi 119
      case 'e':
120
        linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */
121
        break;
639 mateusz.vi 122
      case 'n':
123
        linebuff[i] = '\n';
124
        break;
125
      case 'r':
126
        linebuff[i] = '\r';
127
        break;
128
      case 't':
129
        linebuff[i] = '\t';
130
        break;
131
    }
132
  }
133
  return(i);
134
}
135
 
1290 bernd.boec 136
#pragma pack(1)
1296 mateusz.vi 137
struct dict_entry {
1295 mateusz.vi 138
  unsigned short id;
139
  unsigned short offset;
1296 mateusz.vi 140
};
1290 bernd.boec 141
#pragma pack()
639 mateusz.vi 142
 
1296 mateusz.vi 143
struct svl_lang {
1290 bernd.boec 144
  char id[2];
145
  unsigned short num_strings;
146
 
1296 mateusz.vi 147
  struct dict_entry *dict;
1290 bernd.boec 148
  size_t dict_cap;
149
 
150
  char *strings;
151
  char *strings_end;
152
  size_t strings_cap;
153
 
1296 mateusz.vi 154
};
1290 bernd.boec 155
 
156
 
1296 mateusz.vi 157
static struct svl_lang *svl_lang_new(const char langid[2], size_t dict_cap, size_t strings_cap) {
158
  struct svl_lang *l;
1290 bernd.boec 159
 
1296 mateusz.vi 160
  l = malloc(sizeof(struct svl_lang));
1295 mateusz.vi 161
  if (!l) return(NULL);
1290 bernd.boec 162
 
163
  l->id[0] = (char)toupper(langid[0]);
164
  l->id[1] = (char)toupper(langid[1]);
165
 
1296 mateusz.vi 166
  l->dict = malloc(dict_cap * sizeof(struct dict_entry));
1295 mateusz.vi 167
  if (!l->dict) return(NULL);
168
 
1290 bernd.boec 169
  l->dict_cap = dict_cap;
170
 
171
  l->num_strings = 0;
172
  l->strings = l->strings_end = malloc(strings_cap);
173
  if (!l->strings) {
174
    free(l->dict);
1295 mateusz.vi 175
    return(NULL);
1290 bernd.boec 176
  }
177
  l->strings_cap = strings_cap;
1295 mateusz.vi 178
 
179
  return(l);
1290 bernd.boec 180
}
181
 
182
 
183
/* compacts the dict and string buffer */
1296 mateusz.vi 184
static void svl_compact_lang(struct svl_lang *l) {
1290 bernd.boec 185
  size_t bytes;
186
  bytes = l->strings_end - l->strings;
187
  if (bytes < l->strings_cap) {
188
    l->strings = l->strings_end = realloc(l->strings, bytes);
189
    l->strings_end += bytes;
190
    l->strings_cap = bytes;
191
  }
192
  l->dict_cap = l->num_strings;
1296 mateusz.vi 193
  l->dict = realloc(l->dict, l->dict_cap * sizeof(struct dict_entry));
1290 bernd.boec 194
}
195
 
196
 
1296 mateusz.vi 197
static void svl_lang_free(struct svl_lang *l) {
1290 bernd.boec 198
  l->num_strings = 0;
199
  if (l->dict) {
200
    free(l->dict);
201
    l->dict = NULL;
202
  }
203
  if (l->strings) {
204
    free(l->strings);
205
    l->strings = l->strings_end = NULL;
206
  }
207
  l->dict_cap = 0;
208
  l->strings_cap = 0;
209
}
210
 
211
 
1296 mateusz.vi 212
static size_t svl_strings_bytes(const struct svl_lang *l) {
1295 mateusz.vi 213
  return(l->strings_end - l->strings);
1290 bernd.boec 214
}
215
 
216
 
1296 mateusz.vi 217
static size_t svl_dict_bytes(const struct svl_lang *l) {
218
  return(l->num_strings * sizeof(struct dict_entry));
1290 bernd.boec 219
}
220
 
221
 
1296 mateusz.vi 222
static int svl_add_str(struct svl_lang *l, unsigned short id, const char *s) {
1290 bernd.boec 223
  size_t len = strlen(s) + 1;
224
  size_t cursor;
225
 
1296 mateusz.vi 226
  if ((l->strings_cap < svl_strings_bytes(l) + len) || (l->dict_cap < (l->num_strings + 1) * sizeof(struct dict_entry))) {
1295 mateusz.vi 227
    return(0);
1290 bernd.boec 228
  }
1293 mateusz.vi 229
 
1290 bernd.boec 230
  /* find dictionary insert position, search backwards in assumption
231
     that in translation files, strings are generally ordered ascending */
232
  for (cursor = l->num_strings; cursor > 0 && l->dict[cursor-1].id > id; cursor--);
233
 
1296 mateusz.vi 234
  memmove(&(l->dict[cursor+1]), &(l->dict[cursor]), sizeof(struct dict_entry) * (l->num_strings - cursor));
1290 bernd.boec 235
  l->dict[cursor].id = id;
236
  l->dict[cursor].offset = l->strings_end - l->strings;
237
 
238
  memcpy(l->strings_end, s, len);
239
  l->strings_end += len;
240
  l->num_strings++;
241
 
1295 mateusz.vi 242
  return(1);
1290 bernd.boec 243
}
244
 
245
 
1296 mateusz.vi 246
static int svl_find(const struct svl_lang *l, unsigned short id) {
1295 mateusz.vi 247
  size_t left = 0, right = l->num_strings - 1, x;
248
  unsigned short v;
1290 bernd.boec 249
 
1295 mateusz.vi 250
  if (l->num_strings == 0) return(0);
1290 bernd.boec 251
 
1295 mateusz.vi 252
  while (left <= right ) {
253
    x = left + ( (right - left ) >> 2 );
254
    v = l->dict[x].id;
1296 mateusz.vi 255
    if ( id == v ) return(1); /* found! */
256
 
257
    if (id > v) {
1295 mateusz.vi 258
      left = x + 1;
259
    } else {
260
      right = x - 1;
261
    }
262
  }
263
  return(0);
1290 bernd.boec 264
}
265
 
1295 mateusz.vi 266
 
1061 mateusz.vi 267
/* opens a CATS-style file and compiles it into a ressources lang block
268
 * returns 0 on error, or the size of the generated data block otherwise */
1296 mateusz.vi 269
static unsigned short svl_lang_from_cats_file(struct svl_lang *l, struct svl_lang *refl) {
1290 bernd.boec 270
  unsigned short linelen;
597 mateuszvis 271
  FILE *fd;
1290 bernd.boec 272
  char fname[] = "xx.txt";
623 mateuszvis 273
  static char linebuf[8192];
1114 mateusz.vi 274
  const char *ptr;
1290 bernd.boec 275
  unsigned short id, maxid=0, maxid_line, linecount;
276
  int i;
597 mateuszvis 277
 
1290 bernd.boec 278
  fname[strlen(fname) - 6] = (char)tolower( l->id[0] );
279
  fname[strlen(fname) - 5] = (char)tolower( l->id[1] );
597 mateuszvis 280
 
281
  fd = fopen(fname, "rb");
282
  if (fd == NULL) {
2143 mateusz.vi 283
    printf("ERROR: FAILED TO OPEN '%s'\n", fname);
597 mateuszvis 284
    return(0);
285
  }
286
 
287
  for (linecount = 1;; linecount++) {
288
    linelen = readl(linebuf, sizeof(linebuf), fd);
289
    if (linelen == 0xffff) break; /* EOF */
290
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
291
 
639 mateusz.vi 292
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
293
    linelen = unesc_string(linebuf);
294
 
597 mateuszvis 295
    /* read id and get ptr to actual string ("1.15:string") */
296
    ptr = parseline(&id, linebuf);
1272 mateusz.vi 297
 
298
    /* handle malformed lines */
597 mateuszvis 299
    if (ptr == NULL) {
2143 mateusz.vi 300
      printf("WARNING: %s[#%u] is malformed (linelen = %u):\n", fname, linecount, linelen);
623 mateuszvis 301
      puts(linebuf);
1272 mateusz.vi 302
      continue;
597 mateuszvis 303
    }
1272 mateusz.vi 304
 
305
    /* ignore empty strings (but emit a warning) */
306
    if (ptr[0] == 0) {
2143 mateusz.vi 307
      printf("WARNING: %s[#%u] ignoring empty string %u.%u\n", fname, linecount, id >> 8, id & 0xff);
1271 bernd.boec 308
      continue;
309
    }
597 mateuszvis 310
 
1114 mateusz.vi 311
    /* warn about dirty lines */
312
    if (linebuf[0] == '?') {
2143 mateusz.vi 313
      printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\n", fname, linecount, id >> 8, id & 0xff);
1114 mateusz.vi 314
    }
315
 
1290 bernd.boec 316
    /* add the string contained in current line, if conditions are met */
317
    if (!svl_find(l, id)) {
1295 mateusz.vi 318
      if ((refl == NULL) || (svl_find(refl, id))) {
1290 bernd.boec 319
        if (!svl_add_str(l, id, ptr)) {
2014 bernd.boec 320
          fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1290 bernd.boec 321
          fclose(fd);
1295 mateusz.vi 322
          return(0);
1290 bernd.boec 323
        }
324
        if (id >= maxid) {
325
          maxid = id;
326
          maxid_line = linecount;
1295 mateusz.vi 327
        } else {
2143 mateusz.vi 328
          printf("WARNING:%s[#%u] file unsorted - line %u has higher id %u.%u\n", fname, linecount, maxid_line, maxid >> 8, maxid & 0xff);
1290 bernd.boec 329
        }
1295 mateusz.vi 330
      } else {
2143 mateusz.vi 331
        printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\n", fname, linecount, id >> 8, id & 0xff);
1290 bernd.boec 332
      }
1295 mateusz.vi 333
    } else {
2143 mateusz.vi 334
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\n", fname, linecount, id >> 8, id & 0xff);
597 mateuszvis 335
    }
336
  }
337
 
338
  fclose(fd);
339
 
1290 bernd.boec 340
  /* if reflang provided, pull missing strings from it */
341
  if (refl != NULL) {
342
    for (i = 0; i < refl->num_strings; i++) {
343
      id = refl->dict[i].id;
344
      if (!svl_find(l, id)) {
2143 mateusz.vi 345
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\n", fname, id >> 8, id & 0xff);
1291 bernd.boec 346
        if (!svl_add_str(l, id, refl->strings + refl->dict[i].offset)) {
2014 bernd.boec 347
          fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);
1295 mateusz.vi 348
          return(0);
1290 bernd.boec 349
        }
597 mateuszvis 350
      }
351
    }
352
  }
353
 
1290 bernd.boec 354
  return(svl_strings_bytes(l));
355
}
597 mateuszvis 356
 
1290 bernd.boec 357
 
1295 mateusz.vi 358
static int svl_write_header(unsigned short num_strings, FILE *fd) {
359
  return((fwrite("SvL\x1a", 1, 4, fd) == 4) && (fwrite(&num_strings, 1, 2, fd) == 2));
597 mateuszvis 360
}
361
 
362
 
2102 mateusz.vi 363
 
364
/* write qlen literal bytes into dst, returns amount of "compressed" bytes */
365
static unsigned short mvcomp_litqueue_dump(unsigned short **dst, const unsigned char *q, unsigned short qlen) {
366
  unsigned short complen = 0;
367
 
368
  AGAIN:
369
 
370
  /* are we done? (also take care of guys calling me in for jokes) */
371
  if (qlen == 0) return(complen);
372
 
373
  qlen--; /* now it's between 0 and 30 */
374
  /* write the length and first char */
2117 mateusz.vi 375
  **dst = (unsigned short)((qlen / 2) << 8) | q[0];
2102 mateusz.vi 376
  *dst += 1;
377
  q++;
378
  complen += 2;
379
 
380
  /* anything left? */
381
  if (qlen == 0) return(complen);
382
 
383
  /* write the pending words */
384
  if (qlen > 1) {
385
    memcpy(*dst, q, (qlen/2)*2);
386
    *dst += qlen / 2;
387
    q += (qlen / 2) * 2;
388
    complen += (qlen / 2) * 2;
389
    qlen -= (qlen / 2) * 2;
390
  }
391
 
392
  /* one byte might still be left if it did not fit inside a word */
393
  goto AGAIN;
394
}
395
 
396
 
2117 mateusz.vi 397
/* compare up to n bytes of locations s1 and s2, returns the amount of same bytes (0..n) */
398
static unsigned short comparemem(const unsigned char *s1, const unsigned char *s2, unsigned short n) {
399
  unsigned short i;
400
  for (i = 0; (i < n) && (s1[i] == s2[i]); i++);
401
  return(i);
402
}
403
 
404
 
405
/* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size
406
 * updates len with the number of input bytes left unprocessed */
2123 mateusz.vi 407
static unsigned short mvcomp(void *dstbuf, size_t dstbufsz, const unsigned char *src, size_t *len, unsigned short *maxbytesahead) {
2083 mateusz.vi 408
  unsigned short complen = 0;
2117 mateusz.vi 409
  unsigned short *dst = dstbuf;
2083 mateusz.vi 410
  unsigned short bytesprocessed = 0;
2102 mateusz.vi 411
  unsigned char litqueue[32];
412
  unsigned char litqueuelen = 0;
2083 mateusz.vi 413
 
2123 mateusz.vi 414
  *maxbytesahead = 0;
415
 
2083 mateusz.vi 416
  /* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */
2117 mateusz.vi 417
  while (*len > 0) {
2083 mateusz.vi 418
    unsigned short matchlen;
2106 mateusz.vi 419
    unsigned short minmatch;
2083 mateusz.vi 420
    unsigned short offset;
2084 mateusz.vi 421
    matchlen = 16;
2117 mateusz.vi 422
    if (*len < matchlen) matchlen = (unsigned short)(*len);
2083 mateusz.vi 423
 
2123 mateusz.vi 424
    /* monitor the amount of bytes that the compressed stream is "ahead" of
425
     * uncompressed data, this is an information used later to size a proper
426
     * buffer for in-place depacking */
427
    if (complen + litqueuelen + 2 > bytesprocessed) {
428
      unsigned short mvstreamlen = complen + litqueuelen + 2;
429
      if (*maxbytesahead < (mvstreamlen - bytesprocessed)) *maxbytesahead = mvstreamlen - bytesprocessed;
430
    }
431
 
2117 mateusz.vi 432
    /* abort if no space in output buffer, but do NOT break a literal queue */
433
    if ((complen >= dstbufsz - 32) && (litqueuelen == 0)) return(complen);
434
 
2106 mateusz.vi 435
    /* look for a minimum match of 2 bytes, unless I have some pending literal bytes
436
     * awaiting, in which case I am going through a new data pattern and it is more
2117 mateusz.vi 437
     * efficient to wait for a longer match before breaking the literal string */
438
    if (litqueuelen & 1) {
439
      minmatch = 3; /* breaking an uneven queue is less expensive */
440
    } else if (litqueuelen > 0) {
441
      goto NOMATCH; /* breaking an even-sized literal queue is never a good idea */
2106 mateusz.vi 442
    } else {
443
      minmatch = 2;
444
    }
445
 
2117 mateusz.vi 446
    if (matchlen >= minmatch) {
447
      /* start at -1 and try to match something moving backward. note that
448
       * matching a string longer than the offset is perfectly valid, this
449
       * allows for encoding self-duplicating strings (see MVCOMP.TXT) */
2086 mateusz.vi 450
      unsigned short maxoffset = 4096;
2117 mateusz.vi 451
      unsigned short longestmatch = 0;
452
      unsigned short longestmatchoffset = 0;
2086 mateusz.vi 453
      if (maxoffset > bytesprocessed) maxoffset = bytesprocessed;
2083 mateusz.vi 454
 
2112 mateusz.vi 455
      for (offset = 1; offset <= maxoffset; offset++) {
2117 mateusz.vi 456
        unsigned short matchingbytes;
457
        /* quick skip if first two bytes to not match (never interested in 1-byte matches) */
458
        if (*((const unsigned short *)src) != *(const unsigned short *)(src - offset)) continue;
459
        /* compute the exact number of bytes that match */
460
        matchingbytes = comparemem(src, src - offset, matchlen);
461
        if (matchingbytes == matchlen) {
2086 mateusz.vi 462
          //printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]);
2083 mateusz.vi 463
          goto FOUND;
464
        }
2117 mateusz.vi 465
        if (matchingbytes > longestmatch) {
466
          longestmatch = matchingbytes;
467
          longestmatchoffset = offset ;
468
        }
2083 mateusz.vi 469
      }
2117 mateusz.vi 470
      /* is the longest match interesting? */
471
      if (longestmatch >= minmatch) {
472
        matchlen = longestmatch;
473
        offset = longestmatchoffset;
474
        goto FOUND;
475
      }
2083 mateusz.vi 476
    }
477
 
2117 mateusz.vi 478
    NOMATCH:
479
 
2102 mateusz.vi 480
    /* if here: no match found, write a literal byte to queue */
481
    litqueue[litqueuelen++] = *src;
2083 mateusz.vi 482
    src++;
483
    bytesprocessed++;
2117 mateusz.vi 484
    *len -= 1;
2102 mateusz.vi 485
 
486
    /* dump literal queue to dst if max length reached */
487
    if (litqueuelen == 31) {
488
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
489
      litqueuelen = 0;
490
    }
2083 mateusz.vi 491
    continue;
492
 
493
    FOUND: /* found a match of matchlen bytes at -offset */
2102 mateusz.vi 494
 
495
    /* dump awaiting literal queue to dst first */
496
    if (litqueuelen != 0) {
497
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
498
      litqueuelen = 0;
499
    }
500
 
2117 mateusz.vi 501
    *dst = (unsigned short)((matchlen - 1) << 12) | (offset - 1);
2083 mateusz.vi 502
    dst++;
503
    src += matchlen;
504
    bytesprocessed += matchlen;
2117 mateusz.vi 505
    *len -= matchlen;
2083 mateusz.vi 506
    complen += 2;
507
  }
508
 
2102 mateusz.vi 509
  /* dump awaiting literal queue to dst first */
510
  if (litqueuelen != 0) {
511
    complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
512
    litqueuelen = 0;
513
  }
514
 
2083 mateusz.vi 515
  return(complen);
516
}
517
 
518
 
519
/* write the language block (id, dict, strings) into the LNG file.
520
 * strings are compressed if compflag != 0 */
2123 mateusz.vi 521
static int svl_write_lang(const struct svl_lang *l, FILE *fd, int compflag, unsigned short *buffrequired) {
1290 bernd.boec 522
  unsigned short strings_bytes = svl_strings_bytes(l);
2083 mateusz.vi 523
  unsigned short langid = *((unsigned short *)(&l->id));
524
  const char *stringsptr = l->strings;
597 mateuszvis 525
 
2083 mateusz.vi 526
  /* if compressed then do the magic */
527
  if (compflag) {
528
    static char compstrings[65000];
2089 mateusz.vi 529
    unsigned short comp_bytes;
2117 mateusz.vi 530
    size_t stringslen = strings_bytes;
2123 mateusz.vi 531
    unsigned short mvcompbytesahead;
532
    comp_bytes = mvcomp(compstrings, sizeof(compstrings), l->strings, &stringslen, &mvcompbytesahead);
533
    if (mvcompbytesahead + stringslen > *buffrequired) {
534
      *buffrequired = mvcompbytesahead + stringslen;
535
    }
2089 mateusz.vi 536
    if (comp_bytes < strings_bytes) {
2123 mateusz.vi 537
      printf("lang %c%c mvcomp-ressed (%u bytes -> %u bytes) mvcomp stream at most %u bytes ahead of raw data (%u bytes needed for in-place decomp)\n", l->id[0], l->id[1], strings_bytes, comp_bytes, mvcompbytesahead, strings_bytes + mvcompbytesahead);
2089 mateusz.vi 538
      langid |= 0x8000; /* LNG langblock flag that means "this lang is compressed" */
539
      strings_bytes = comp_bytes;
540
      stringsptr = compstrings;
541
    } else {
542
      printf("lang %c%c left UNCOMPRESSED (uncomp=%u bytes ; mvcomp=%u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
543
    }
2083 mateusz.vi 544
  }
545
 
546
  return((fwrite(&langid, 1, 2, fd) == 2) &&
1290 bernd.boec 547
         (fwrite(&strings_bytes, 1, 2, fd) == 2) &&
548
         (fwrite(l->dict, 1, svl_dict_bytes(l), fd) == svl_dict_bytes(l)) &&
2083 mateusz.vi 549
         (fwrite(stringsptr, 1, strings_bytes, fd) == strings_bytes));
1290 bernd.boec 550
}
551
 
552
 
1296 mateusz.vi 553
static int svl_write_c_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz) {
1290 bernd.boec 554
  FILE *fd;
555
  int i;
556
  unsigned short strings_bytes = svl_strings_bytes(l);
557
  unsigned short nextnlat = 0;
1295 mateusz.vi 558
  unsigned short allocsz;
1290 bernd.boec 559
 
560
  fd = fopen(fn, "wb");
561
  if (fd == NULL) {
1295 mateusz.vi 562
    return(0);
563
  }
1290 bernd.boec 564
 
2123 mateusz.vi 565
  allocsz = biggest_langsz + (biggest_langsz / 20);
566
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
1295 mateusz.vi 567
  fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
568
  fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
569
  fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
570
  fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
571
 
572
  for (i = 0; i < strings_bytes; i++) {
573
    if (!fprintf(fd, "0x%02x", l->strings[i])) {
574
      fclose(fd);
575
      return(0);
1290 bernd.boec 576
    }
577
 
1295 mateusz.vi 578
    if (i + 1 < strings_bytes) fprintf(fd, ",");
579
    nextnlat++;
580
    if (l->strings[i] == '\0' || nextnlat == 16) {
1290 bernd.boec 581
      fprintf(fd, "\r\n");
1295 mateusz.vi 582
      nextnlat = 0;
1290 bernd.boec 583
    }
1295 mateusz.vi 584
  }
585
  fprintf(fd, "};\r\n\r\n");
1290 bernd.boec 586
 
1295 mateusz.vi 587
  fprintf(fd, "unsigned short svarlang_dict[%u] = {\r\n", l->num_strings * 2);
588
  for (i = 0; i < l->num_strings; i++) {
589
    if (!fprintf(fd, "0x%04x,0x%04x", l->dict[i].id, l->dict[i].offset)) {
590
      fclose(fd);
591
      return(0);
592
    }
593
    if (i + 1 < l->num_strings) fprintf(fd, ",");
594
    fprintf(fd, "\r\n");
1290 bernd.boec 595
  }
1295 mateusz.vi 596
  fprintf(fd, "};\r\n");
1290 bernd.boec 597
 
1295 mateusz.vi 598
  fclose(fd);
599
 
600
  return(1);
1290 bernd.boec 601
}
602
 
603
 
2014 bernd.boec 604
static int svl_write_asm_source(const struct svl_lang *l, const char *fn, unsigned short biggest_langsz, int format) {
605
  FILE *fd;
606
  int i;
607
  unsigned short strings_bytes = svl_strings_bytes(l);
608
  unsigned short nextnlat = 0;
609
  unsigned short allocsz;
610
 
611
  const char *public = (format == ASM_OUTPUT) ? "public" : "global";
612
 
613
  fd = fopen(fn, "wb");
614
  if (fd == NULL) {
615
    return(0);
616
  }
617
 
618
  allocsz = biggest_langsz + (biggest_langsz / 20);
619
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
620
  fprintf(fd, "; THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY)\r\n");
621
  fprintf(fd, "%s svarlang_memsz\r\n", public);
622
  fprintf(fd, "svarlang_memsz dw %u\r\n", allocsz);
623
  fprintf(fd, "%s svarlang_string_count\r\n", public);
624
  fprintf(fd, "svarlang_string_count dw %u\r\n\r\n", l->num_strings);
625
  fprintf(fd, "%s svarlang_mem\r\n", public);
626
  fprintf(fd, "svarlang_mem:\r\n");
627
 
628
  if (strings_bytes > 0) fprintf(fd, "db ");
629
 
630
  for (i = 0; i < strings_bytes; i++) {
2016 bernd.boec 631
    if (!fprintf(fd, "%u", l->strings[i])) {
2014 bernd.boec 632
      fclose(fd);
633
      return(0);
634
    }
635
 
636
    nextnlat++;
637
    if (l->strings[i] == '\0' || nextnlat == 16) {
638
      fprintf(fd, "\r\n");
639
      if (i + 1 < strings_bytes ) fprintf(fd, "db ");
640
      nextnlat = 0;
641
    }
642
    else {
643
      fprintf(fd, ",");
644
    }
645
  }
646
 
647
  fprintf(fd, "\r\n%s svarlang_dict\r\n", public);
648
  fprintf(fd, "svarlang_dict:\r\n");
649
  for (i = 0; i < l->num_strings; i++) {
2016 bernd.boec 650
    if (!fprintf(fd, "dw %u,%u\r\n", l->dict[i].id, l->dict[i].offset)) {
2014 bernd.boec 651
      fclose(fd);
652
      return(0);
653
    }
654
  }
655
 
656
  fclose(fd);
657
 
658
  return(1);
659
}
660
 
661
 
597 mateuszvis 662
int main(int argc, char **argv) {
2090 mateusz.vi 663
  FILE *fd;
597 mateuszvis 664
  int ecode = 0;
2014 bernd.boec 665
  int i, output_format = C_OUTPUT;
2092 mateusz.vi 666
  int mvcomp_enabled = 1;
2094 mateusz.vi 667
  int excref = 0;
1061 mateusz.vi 668
  unsigned short biggest_langsz = 0;
2015 bernd.boec 669
  struct svl_lang *lang = NULL, *reflang = NULL;
597 mateuszvis 670
 
671
  if (argc < 2) {
1247 mateusz.vi 672
    puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");
673
    puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");
2094 mateusz.vi 674
    puts("into a single resource file (OUT.LNG). Also generates a deflang source");
675
    puts("file that contains a properly sized buffer pre-filled with the first");
676
    puts("(reference) language.");
1247 mateusz.vi 677
    puts("");
2108 mateusz.vi 678
    puts("usage: tlumacz [/c|/asm|/nasm|/nodef] [/nocomp] [/excref] en fr pl ...");
2092 mateusz.vi 679
    puts("");
2094 mateusz.vi 680
    puts("/c        generates deflang.c (default)");
681
    puts("/asm      deflang ASM output");
682
    puts("/nasm     deflang NASM output");
2108 mateusz.vi 683
    puts("/nodef    does NOT generate a deflang source file (only an LNG file)");
2113 mateusz.vi 684
    puts("/nocomp   disables the MVCOMP compression of strings in the LNG file");
2094 mateusz.vi 685
    puts("/excref   excludes ref lang from the LNG file (inserted to deflang only)");
597 mateuszvis 686
    return(1);
687
  }
688
 
601 mateuszvis 689
  fd = fopen("out.lng", "wb");
597 mateuszvis 690
  if (fd == NULL) {
2014 bernd.boec 691
    fprintf(stderr, "ERROR: FAILED TO CREATE OR OPEN OUT.LNG");
597 mateuszvis 692
    return(1);
693
  }
694
 
695
  /* write lang blocks */
696
  for (i = 1; i < argc; i++) {
697
    unsigned short sz;
698
    char id[3];
699
 
2014 bernd.boec 700
    if (!strcmp(argv[i], "/c")) {
701
      output_format = C_OUTPUT;
702
      continue;
2090 mateusz.vi 703
    } else if (!strcmp(argv[i], "/asm")) {
2014 bernd.boec 704
      output_format = ASM_OUTPUT;
705
      continue;
706
    } else if(!strcmp(argv[i], "/nasm")) {
707
      output_format = NASM_OUTPUT;
708
      continue;
2092 mateusz.vi 709
    } else if(!strcmp(argv[i], "/nocomp")) {
710
      mvcomp_enabled = 0;
2090 mateusz.vi 711
      continue;
2108 mateusz.vi 712
    } else if(!strcmp(argv[i], "/nodef")) {
713
      output_format = NO_OUTPUT;
714
      continue;
2094 mateusz.vi 715
    } else if(!strcmp(argv[i], "/excref")) {
716
      excref = 1;
717
      continue;
2014 bernd.boec 718
    }
719
 
597 mateuszvis 720
    if (strlen(argv[i]) != 2) {
2014 bernd.boec 721
      fprintf(stderr, "INVALID LANG SPECIFIED: %s\r\n", argv[i]);
597 mateuszvis 722
      ecode = 1;
2015 bernd.boec 723
      goto exit_main;
597 mateuszvis 724
    }
725
    id[0] = argv[i][0];
726
    id[1] = argv[i][1];
727
    id[2] = 0;
728
 
1290 bernd.boec 729
    if ((lang = svl_lang_new(id, DICT_CAP, STRINGS_CAP)) == NULL) {
2014 bernd.boec 730
      fprintf(stderr, "OUT OF MEMORY\r\n");
2015 bernd.boec 731
      ecode = 1;
732
      goto exit_main;
1290 bernd.boec 733
    }
734
 
735
    sz = svl_lang_from_cats_file(lang, reflang);
597 mateuszvis 736
    if (sz == 0) {
2014 bernd.boec 737
      fprintf(stderr, "ERROR COMPUTING LANG '%s'\r\n", id);
597 mateuszvis 738
      ecode = 1;
2015 bernd.boec 739
      goto exit_main;
597 mateuszvis 740
    } else {
741
      printf("computed %s lang block of %u bytes\r\n", id, sz);
1061 mateusz.vi 742
      if (sz > biggest_langsz) biggest_langsz = sz;
597 mateuszvis 743
    }
1290 bernd.boec 744
    svl_compact_lang(lang);
745
 
746
    /* write header if first (reference) language */
2090 mateusz.vi 747
    if (!reflang) {
1290 bernd.boec 748
      if (!svl_write_header(lang->num_strings, fd)) {
2014 bernd.boec 749
        fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");
1290 bernd.boec 750
        ecode = 1;
2015 bernd.boec 751
        goto exit_main;
1290 bernd.boec 752
      }
753
    }
1293 mateusz.vi 754
 
1290 bernd.boec 755
    /* write lang ID to file, followed string table size, and then
2094 mateusz.vi 756
       the dictionary and string table for current language
757
       skip this for reference language if /excref given */
758
    if ((reflang != NULL) || (excref == 0)) {
2123 mateusz.vi 759
      /* also updates the biggest_langsz variable to accomodate enough space
760
       * for in-place decompression of mvcomp-compressed lang blocks */
761
      if (!svl_write_lang(lang, fd, mvcomp_enabled, &biggest_langsz)) {
2094 mateusz.vi 762
        fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");
763
        ecode = 1;
764
        goto exit_main;
765
      }
2095 mateusz.vi 766
    } else {
767
      puts("ref language NOT saved in the LNG file (/excref)");
597 mateuszvis 768
    }
1290 bernd.boec 769
 
1061 mateusz.vi 770
    /* remember reference data for other languages */
2014 bernd.boec 771
    if (!reflang) {
1290 bernd.boec 772
      reflang = lang;
1295 mateusz.vi 773
    } else {
1290 bernd.boec 774
      svl_lang_free(lang);
775
      lang = NULL;
776
    }
597 mateuszvis 777
  }
778
 
2014 bernd.boec 779
  if (!reflang) {
780
    fprintf(stderr, "ERROR: NO LANGUAGE GIVEN\r\n");
2015 bernd.boec 781
    ecode = 1;
782
    goto exit_main;
1061 mateusz.vi 783
  }
784
 
2108 mateusz.vi 785
  /* compute the deflang file containing a dump of the reference lang block */
2014 bernd.boec 786
  if (output_format == C_OUTPUT) {
787
    if (!svl_write_c_source(reflang, "deflang.c", biggest_langsz)) {
788
      fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.C\r\n");
789
      ecode = 1;
2083 mateusz.vi 790
    }
2108 mateusz.vi 791
  } else if ((output_format == ASM_OUTPUT) || (output_format == NASM_OUTPUT)) {
2014 bernd.boec 792
    if (!svl_write_asm_source(reflang, "deflang.inc", biggest_langsz, output_format)) {
793
      fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.INC\r\n");
794
      ecode = 1;
795
    }
1290 bernd.boec 796
  }
797
 
2015 bernd.boec 798
exit_main:
2108 mateusz.vi 799
  if (lang && (lang != reflang)) {
2015 bernd.boec 800
    svl_lang_free(lang);
801
  }
802
  if (reflang) {
803
    svl_lang_free(reflang);
804
    reflang = NULL;
2083 mateusz.vi 805
    lang = NULL;
2015 bernd.boec 806
  }
2014 bernd.boec 807
 
2015 bernd.boec 808
  fclose(fd);
809
 
597 mateuszvis 810
  return(ecode);
811
}