Subversion Repositories SvarDOS

Rev

Rev 1271 | Rev 1290 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
597 mateuszvis 1
/*
1247 mateusz.vi 2
 * Copyright (C) 2021-2023 Mateusz Viste
597 mateuszvis 3
 *
4
 * usage: tlumacz en fr pl etc
5
 *
601 mateuszvis 6
 * computes an out.lng file that contains all language ressources.
597 mateuszvis 7
 *
8
 * DAT format:
9
 *
10
 * 4-bytes signature:
11
 * "SvL\x1b"
12
 *
13
 * Then "LANG BLOCKS" follow. Each LANG BLOCK is prefixed with 4 bytes:
14
 * II LL    - II is the LANG identifier ("EN", "PL", etc) and LL is the size
15
 *            of the block (65535 bytes max).
16
 *
17
 * Inside a LANG BLOCK is a set of strings:
18
 *
623 mateuszvis 19
 * II LL S  where II is the string's 16-bit identifier, LL is its length
20
 *          (1-65535) and S is the actual string. All strings are ASCIIZ (ie.
21
 *          they end with a NULL terminator).
597 mateuszvis 22
 *
23
 * The list of strings ends with a single 0-long string.
24
 */
25
 
26
 
27
#include <stdio.h>
28
#include <stdlib.h>
29
#include <string.h>
30
 
1248 mateusz.vi 31
#include "svarlang.h"
597 mateuszvis 32
 
33
 
34
struct bitmap {
35
  unsigned char bits[8192];
36
};
37
 
38
static void bitmap_set(struct bitmap *b, unsigned short id) {
623 mateuszvis 39
  b->bits[id >> 3] |= 1 << (id & 7);
597 mateuszvis 40
}
41
 
42
static int bitmap_get(const struct bitmap *b, unsigned short id) {
623 mateuszvis 43
  return(b->bits[id >> 3] & (1 << (id & 7)));
597 mateuszvis 44
}
45
 
46
static void bitmap_init(struct bitmap *b) {
623 mateuszvis 47
  bzero(b, sizeof(struct bitmap));
597 mateuszvis 48
}
49
 
50
 
51
 
52
/* read a single line from fd and fills it into dst, returns line length
53
 * ending CR/LF is trimmed, as well as any trailing spaces */
54
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
55
  unsigned short l, lastnonspace = 0;
56
 
57
  if (fgets(dst, dstsz, fd) == NULL) return(0xffff); /* EOF */
58
  /* trim at first CR or LF and return len */
59
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
60
    if (dst[l] != ' ') lastnonspace = l;
61
  }
62
 
63
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
64
  dst[l] = 0;
65
 
66
  return(l);
67
}
68
 
69
 
1114 mateusz.vi 70
/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to
597 mateuszvis 71
 * the actual string part on success, or NULL on error */
1114 mateusz.vi 72
static const char *parseline(unsigned short *id, const char *s) {
597 mateuszvis 73
  int i;
74
  int dotpos = 0, colpos = 0, gotdigits = 0;
75
 
1114 mateusz.vi 76
  /* strings prefixed by '?' are flagged as "dirty": ignore this flag here */
77
  if (*s == '?') s++;
78
 
597 mateuszvis 79
  /* I must have a . and a : in the first 9 bytes */
80
  for (i = 0;; i++) {
81
    if (s[i] == '.') {
82
      if ((dotpos != 0) || (gotdigits == 0)) break;
83
      dotpos = i;
84
      gotdigits = 0;
85
    } else if (s[i] == ':') {
86
      if (gotdigits != 0) colpos = i;
87
      break;
88
    } else if ((s[i] < '0') || (s[i] > '9')) {
89
      break;
90
    }
91
    gotdigits++;
92
  }
93
  /* did I collect everything? */
94
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
95
 
96
  *id = atoi(s);
97
  *id <<= 8;
98
  *id |= atoi(s + dotpos + 1);
99
 
100
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
101
 
102
  return(s + colpos + 1);
103
}
104
 
105
 
639 mateusz.vi 106
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
107
 * the new length of the string. */
108
static unsigned short unesc_string(char *linebuff) {
109
  unsigned short i;
110
  for (i = 0; linebuff[i] != 0; i++) {
111
    if (linebuff[i] != '\\') continue;
112
    strcpy(linebuff + i, linebuff + i + 1);
113
    if (linebuff[i] == 0) break;
114
    switch (linebuff[i]) {
1248 mateusz.vi 115
      case 'e':
116
        linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */
117
        break;
639 mateusz.vi 118
      case 'n':
119
        linebuff[i] = '\n';
120
        break;
121
      case 'r':
122
        linebuff[i] = '\r';
123
        break;
124
      case 't':
125
        linebuff[i] = '\t';
126
        break;
127
    }
128
  }
129
  return(i);
130
}
131
 
132
 
1061 mateusz.vi 133
/* opens a CATS-style file and compiles it into a ressources lang block
134
 * returns 0 on error, or the size of the generated data block otherwise */
597 mateuszvis 135
static unsigned short gen_langstrings(unsigned char *buff, const char *langid, struct bitmap *b, const struct bitmap *refb, const unsigned char *refblock) {
136
  unsigned short len = 0, linelen;
137
  FILE *fd;
138
  char fname[] = "XX.TXT";
623 mateuszvis 139
  static char linebuf[8192];
1114 mateusz.vi 140
  const char *ptr;
597 mateuszvis 141
  unsigned short id, linecount;
142
 
143
  bitmap_init(b);
144
 
145
  memcpy(fname + strlen(fname) - 6, langid, 2);
146
 
147
  fd = fopen(fname, "rb");
148
  if (fd == NULL) {
149
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
150
    return(0);
151
  }
152
 
153
  for (linecount = 1;; linecount++) {
154
 
155
    linelen = readl(linebuf, sizeof(linebuf), fd);
156
    if (linelen == 0xffff) break; /* EOF */
157
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
158
 
639 mateusz.vi 159
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
160
    linelen = unesc_string(linebuf);
161
 
597 mateuszvis 162
    /* read id and get ptr to actual string ("1.15:string") */
163
    ptr = parseline(&id, linebuf);
1272 mateusz.vi 164
 
165
    /* handle malformed lines */
597 mateuszvis 166
    if (ptr == NULL) {
1272 mateusz.vi 167
      printf("WARNING: %s[#%u] is malformed (linelen = %u):\r\n", fname, linecount, linelen);
623 mateuszvis 168
      puts(linebuf);
1272 mateusz.vi 169
      continue;
597 mateuszvis 170
    }
1272 mateusz.vi 171
 
172
    /* ignore empty strings (but emit a warning) */
173
    if (ptr[0] == 0) {
1271 bernd.boec 174
      printf("WARNING: %s[#%u] ignoring empty string %u.%u\r\n", fname, linecount, id >> 8, id & 0xff);
175
      continue;
176
    }
597 mateuszvis 177
 
1114 mateusz.vi 178
    /* warn about dirty lines */
179
    if (linebuf[0] == '?') {
180
      printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\r\n", fname, linecount, id >> 8, id & 0xff);
181
    }
182
 
623 mateuszvis 183
    /* write string into block (II LL S) */
597 mateuszvis 184
    memcpy(buff + len, &id, 2);
185
    len += 2;
623 mateuszvis 186
    {
187
      unsigned short slen = strlen(ptr) + 1;
188
      memcpy(buff + len, &slen, 2);
189
      len += 2;
190
      memcpy(buff + len, ptr, slen);
191
      len += slen;
192
    }
597 mateuszvis 193
 
194
    /* if reference bitmap provided: check that the id is valid */
195
    if ((refb != NULL) && (bitmap_get(refb, id) == 0)) {
196
      printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
197
    }
198
 
199
    /* make sure this id is not already present */
200
    if (bitmap_get(b, id) == 0) {
201
      /* set bit in bitmap to remember I have this string */
202
      bitmap_set(b, id);
203
    } else {
204
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
205
    }
206
  }
207
 
208
  fclose(fd);
209
 
210
  /* if refblock provided, pull missing strings from it */
211
  if (refblock != NULL) {
212
    for (;;) {
623 mateuszvis 213
      unsigned short slen;
214
      id = ((unsigned short *)refblock)[0];
215
      slen = ((unsigned short *)refblock)[1];
216
      if ((id == 0) && (slen == 0)) break;
597 mateuszvis 217
      if (bitmap_get(b, id) == 0) {
218
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
219
        /* copy missing string from refblock */
623 mateuszvis 220
        memcpy(buff + len, refblock, slen + 4);
221
        len += slen + 4;
597 mateuszvis 222
      }
623 mateuszvis 223
      refblock += slen + 4;
597 mateuszvis 224
    }
225
  }
226
 
227
  /* write the block terminator (0-long string) */
228
  buff[len++] = 0; /* id */
229
  buff[len++] = 0; /* id */
230
  buff[len++] = 0; /* len */
623 mateuszvis 231
  buff[len++] = 0; /* len */
232
  buff[len++] = 0; /* empty string */
597 mateuszvis 233
 
234
  return(len);
235
}
236
 
237
 
599 mateuszvis 238
#define MEMBLOCKSZ 65000
597 mateuszvis 239
 
240
int main(int argc, char **argv) {
241
  FILE *fd;
242
  int ecode = 0;
243
  char *buff, *refblock;
1061 mateusz.vi 244
  unsigned short refblocksz = 0;
597 mateuszvis 245
  static struct bitmap bufbitmap;
246
  static struct bitmap refbitmap;
247
  unsigned short i;
1061 mateusz.vi 248
  unsigned short biggest_langsz = 0;
597 mateuszvis 249
 
250
  if (argc < 2) {
1247 mateusz.vi 251
    puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");
252
    puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");
253
    puts("into a single resource file (OUT.LNG).");
254
    puts("");
255
    puts("usage: tlumacz en fr pl ...");
597 mateuszvis 256
    return(1);
257
  }
258
 
259
  buff = malloc(MEMBLOCKSZ);
260
  refblock = malloc(MEMBLOCKSZ);
261
  if ((buff == NULL) || (refblock == NULL)) {
262
    puts("out of memory");
263
    return(1);
264
  }
265
 
601 mateuszvis 266
  fd = fopen("out.lng", "wb");
597 mateuszvis 267
  if (fd == NULL) {
1250 mateusz.vi 268
    puts("ERR: failed to open or create OUT.LNG");
597 mateuszvis 269
    return(1);
270
  }
271
 
272
  /* write sig */
273
  fwrite("SvL\x1b", 1, 4, fd);
274
 
275
  /* write lang blocks */
276
  for (i = 1; i < argc; i++) {
277
    unsigned short sz;
278
    char id[3];
279
 
280
    if (strlen(argv[i]) != 2) {
281
      printf("INVALID LANG SPECIFIED: %s\r\n", argv[i]);
282
      ecode = 1;
283
      break;
284
    }
285
 
286
    id[0] = argv[i][0];
287
    id[1] = argv[i][1];
288
    id[2] = 0;
289
    if (id[0] >= 'a') id[0] -= 'a' - 'A';
290
    if (id[1] >= 'a') id[1] -= 'a' - 'A';
291
 
292
    sz = gen_langstrings(buff, id, &bufbitmap, (i != 1)?&refbitmap:NULL, (i != 1)?refblock:NULL);
293
    if (sz == 0) {
294
      printf("ERROR COMPUTING LANG '%s'\r\n", id);
295
      ecode = 1;
296
      break;
297
    } else {
298
      printf("computed %s lang block of %u bytes\r\n", id, sz);
1061 mateusz.vi 299
      if (sz > biggest_langsz) biggest_langsz = sz;
597 mateuszvis 300
    }
301
    /* write lang ID to file, followed by block size and then the actual block */
302
    if ((fwrite(id, 1, 2, fd) != 2) ||
303
        (fwrite(&sz, 1, 2, fd) != 2) ||
304
        (fwrite(buff, 1, sz, fd) != sz)) {
305
      printf("ERROR WRITING TO OUTPUT FILE\r\n");
306
      ecode = 1;
307
      break;
308
    }
1061 mateusz.vi 309
    /* remember reference data for other languages */
597 mateuszvis 310
    if (i == 1) {
1061 mateusz.vi 311
      refblocksz = sz;
597 mateuszvis 312
      memcpy(refblock, buff, MEMBLOCKSZ);
313
      memcpy(&refbitmap, &bufbitmap, sizeof(struct bitmap));
314
    }
315
  }
316
 
317
  fclose(fd);
318
 
1061 mateusz.vi 319
  /* compute the deflang.c file containing a dump of the reference block */
320
  fd = fopen("DEFLANG.C", "wb");
321
  if (fd == NULL) {
322
    puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
323
    ecode = 1;
324
  } else {
325
    unsigned short allocsz = biggest_langsz + (biggest_langsz / 20);
1251 mateusz.vi 326
    unsigned short nextstringin = 0, nextnlat = 40;
327
    printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
1061 mateusz.vi 328
    fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
329
    fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
1251 mateusz.vi 330
    fprintf(fd, "char svarlang_mem[%u] = {", allocsz);
1061 mateusz.vi 331
    for (i = 0; i < refblocksz; i++) {
1251 mateusz.vi 332
      if (nextstringin == 0) {
333
        fprintf(fd, "\r\n");
334
        nextnlat = i + 40;
335
        nextstringin = 4 + (refblock[i + 3] << 8) + refblock[i + 2];
336
        if (nextstringin == 4) nextstringin = 20000; /* last string in block */
337
      }
338
      if (i == nextnlat) {
339
        nextnlat += 40;
340
        fprintf(fd, "\r\n");
341
      }
342
      nextnlat--;
343
      nextstringin--;
1064 mateusz.vi 344
      fprintf(fd, "%u", refblock[i]);
1061 mateusz.vi 345
      if (i + 1 < refblocksz) fprintf(fd, ",");
346
    }
347
    fprintf(fd, "};\r\n");
348
    fclose(fd);
349
  }
350
 
597 mateuszvis 351
  return(ecode);
352
}