Subversion Repositories SvarDOS

Rev

Rev 969 | Rev 1064 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 969 Rev 1061
1
/*
1
/*
2
 * Copyright (C) 2021-2022 Mateusz Viste
2
 * Copyright (C) 2021-2022 Mateusz Viste
3
 *
3
 *
4
 * usage: tlumacz en fr pl etc
4
 * usage: tlumacz en fr pl etc
5
 *
5
 *
6
 * computes an out.lng file that contains all language ressources.
6
 * computes an out.lng file that contains all language ressources.
7
 *
7
 *
8
 * DAT format:
8
 * DAT format:
9
 *
9
 *
10
 * 4-bytes signature:
10
 * 4-bytes signature:
11
 * "SvL\x1b"
11
 * "SvL\x1b"
12
 *
12
 *
13
 * Then "LANG BLOCKS" follow. Each LANG BLOCK is prefixed with 4 bytes:
13
 * Then "LANG BLOCKS" follow. Each LANG BLOCK is prefixed with 4 bytes:
14
 * II LL    - II is the LANG identifier ("EN", "PL", etc) and LL is the size
14
 * II LL    - II is the LANG identifier ("EN", "PL", etc) and LL is the size
15
 *            of the block (65535 bytes max).
15
 *            of the block (65535 bytes max).
16
 *
16
 *
17
 * Inside a LANG BLOCK is a set of strings:
17
 * Inside a LANG BLOCK is a set of strings:
18
 *
18
 *
19
 * II LL S  where II is the string's 16-bit identifier, LL is its length
19
 * II LL S  where II is the string's 16-bit identifier, LL is its length
20
 *          (1-65535) and S is the actual string. All strings are ASCIIZ (ie.
20
 *          (1-65535) and S is the actual string. All strings are ASCIIZ (ie.
21
 *          they end with a NULL terminator).
21
 *          they end with a NULL terminator).
22
 *
22
 *
23
 * The list of strings ends with a single 0-long string.
23
 * The list of strings ends with a single 0-long string.
24
 */
24
 */
25
 
25
 
26
 
26
 
27
#include <stdio.h>
27
#include <stdio.h>
28
#include <stdlib.h>
28
#include <stdlib.h>
29
#include <string.h>
29
#include <string.h>
30
 
30
 
31
 
31
 
32
 
32
 
33
struct bitmap {
33
struct bitmap {
34
  unsigned char bits[8192];
34
  unsigned char bits[8192];
35
};
35
};
36
 
36
 
37
static void bitmap_set(struct bitmap *b, unsigned short id) {
37
static void bitmap_set(struct bitmap *b, unsigned short id) {
38
  b->bits[id >> 3] |= 1 << (id & 7);
38
  b->bits[id >> 3] |= 1 << (id & 7);
39
}
39
}
40
 
40
 
41
static int bitmap_get(const struct bitmap *b, unsigned short id) {
41
static int bitmap_get(const struct bitmap *b, unsigned short id) {
42
  return(b->bits[id >> 3] & (1 << (id & 7)));
42
  return(b->bits[id >> 3] & (1 << (id & 7)));
43
}
43
}
44
 
44
 
45
static void bitmap_init(struct bitmap *b) {
45
static void bitmap_init(struct bitmap *b) {
46
  bzero(b, sizeof(struct bitmap));
46
  bzero(b, sizeof(struct bitmap));
47
}
47
}
48
 
48
 
49
 
49
 
50
 
50
 
51
/* read a single line from fd and fills it into dst, returns line length
51
/* read a single line from fd and fills it into dst, returns line length
52
 * ending CR/LF is trimmed, as well as any trailing spaces */
52
 * ending CR/LF is trimmed, as well as any trailing spaces */
53
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
53
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
54
  unsigned short l, lastnonspace = 0;
54
  unsigned short l, lastnonspace = 0;
55
 
55
 
56
  if (fgets(dst, dstsz, fd) == NULL) return(0xffff); /* EOF */
56
  if (fgets(dst, dstsz, fd) == NULL) return(0xffff); /* EOF */
57
  /* trim at first CR or LF and return len */
57
  /* trim at first CR or LF and return len */
58
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
58
  for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
59
    if (dst[l] != ' ') lastnonspace = l;
59
    if (dst[l] != ' ') lastnonspace = l;
60
  }
60
  }
61
 
61
 
62
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
62
  if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
63
  dst[l] = 0;
63
  dst[l] = 0;
64
 
64
 
65
  return(l);
65
  return(l);
66
}
66
}
67
 
67
 
68
 
68
 
69
/* parse a line in format "1.50:somestring". fills id and returns a pointer to
69
/* parse a line in format "1.50:somestring". fills id and returns a pointer to
70
 * the actual string part on success, or NULL on error */
70
 * the actual string part on success, or NULL on error */
71
static char *parseline(unsigned short *id, char *s) {
71
static char *parseline(unsigned short *id, char *s) {
72
  int i;
72
  int i;
73
  int dotpos = 0, colpos = 0, gotdigits = 0;
73
  int dotpos = 0, colpos = 0, gotdigits = 0;
74
 
74
 
75
  /* I must have a . and a : in the first 9 bytes */
75
  /* I must have a . and a : in the first 9 bytes */
76
  for (i = 0;; i++) {
76
  for (i = 0;; i++) {
77
    if (s[i] == '.') {
77
    if (s[i] == '.') {
78
      if ((dotpos != 0) || (gotdigits == 0)) break;
78
      if ((dotpos != 0) || (gotdigits == 0)) break;
79
      dotpos = i;
79
      dotpos = i;
80
      gotdigits = 0;
80
      gotdigits = 0;
81
    } else if (s[i] == ':') {
81
    } else if (s[i] == ':') {
82
      if (gotdigits != 0) colpos = i;
82
      if (gotdigits != 0) colpos = i;
83
      break;
83
      break;
84
    } else if ((s[i] < '0') || (s[i] > '9')) {
84
    } else if ((s[i] < '0') || (s[i] > '9')) {
85
      break;
85
      break;
86
    }
86
    }
87
    gotdigits++;
87
    gotdigits++;
88
  }
88
  }
89
  /* did I collect everything? */
89
  /* did I collect everything? */
90
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
90
  if ((dotpos == 0) || (colpos == 0)) return(NULL);
91
  if (s[colpos + 1] == 0) return(NULL);
91
  if (s[colpos + 1] == 0) return(NULL);
92
 
92
 
93
  *id = atoi(s);
93
  *id = atoi(s);
94
  *id <<= 8;
94
  *id <<= 8;
95
  *id |= atoi(s + dotpos + 1);
95
  *id |= atoi(s + dotpos + 1);
96
 
96
 
97
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
97
  /* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
98
 
98
 
99
  return(s + colpos + 1);
99
  return(s + colpos + 1);
100
}
100
}
101
 
101
 
102
 
102
 
103
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
103
/* converts escape sequences like "\n" or "\t" into actual bytes, returns
104
 * the new length of the string. */
104
 * the new length of the string. */
105
static unsigned short unesc_string(char *linebuff) {
105
static unsigned short unesc_string(char *linebuff) {
106
  unsigned short i;
106
  unsigned short i;
107
  for (i = 0; linebuff[i] != 0; i++) {
107
  for (i = 0; linebuff[i] != 0; i++) {
108
    if (linebuff[i] != '\\') continue;
108
    if (linebuff[i] != '\\') continue;
109
    strcpy(linebuff + i, linebuff + i + 1);
109
    strcpy(linebuff + i, linebuff + i + 1);
110
    if (linebuff[i] == 0) break;
110
    if (linebuff[i] == 0) break;
111
    switch (linebuff[i]) {
111
    switch (linebuff[i]) {
112
      case 'n':
112
      case 'n':
113
        linebuff[i] = '\n';
113
        linebuff[i] = '\n';
114
        break;
114
        break;
115
      case 'r':
115
      case 'r':
116
        linebuff[i] = '\r';
116
        linebuff[i] = '\r';
117
        break;
117
        break;
118
      case 't':
118
      case 't':
119
        linebuff[i] = '\t';
119
        linebuff[i] = '\t';
120
        break;
120
        break;
121
    }
121
    }
122
  }
122
  }
123
  return(i);
123
  return(i);
124
}
124
}
125
 
125
 
126
 
126
 
127
/* opens a CATS-style file and compiles it into a ressources lang block */
127
/* opens a CATS-style file and compiles it into a ressources lang block
-
 
128
 * returns 0 on error, or the size of the generated data block otherwise */
128
static unsigned short gen_langstrings(unsigned char *buff, const char *langid, struct bitmap *b, const struct bitmap *refb, const unsigned char *refblock) {
129
static unsigned short gen_langstrings(unsigned char *buff, const char *langid, struct bitmap *b, const struct bitmap *refb, const unsigned char *refblock) {
129
  unsigned short len = 0, linelen;
130
  unsigned short len = 0, linelen;
130
  FILE *fd;
131
  FILE *fd;
131
  char fname[] = "XX.TXT";
132
  char fname[] = "XX.TXT";
132
  static char linebuf[8192];
133
  static char linebuf[8192];
133
  char *ptr;
134
  char *ptr;
134
  unsigned short id, linecount;
135
  unsigned short id, linecount;
135
 
136
 
136
  bitmap_init(b);
137
  bitmap_init(b);
137
 
138
 
138
  memcpy(fname + strlen(fname) - 6, langid, 2);
139
  memcpy(fname + strlen(fname) - 6, langid, 2);
139
 
140
 
140
  fd = fopen(fname, "rb");
141
  fd = fopen(fname, "rb");
141
  if (fd == NULL) {
142
  if (fd == NULL) {
142
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
143
    printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
143
    return(0);
144
    return(0);
144
  }
145
  }
145
 
146
 
146
  for (linecount = 1;; linecount++) {
147
  for (linecount = 1;; linecount++) {
147
 
148
 
148
    linelen = readl(linebuf, sizeof(linebuf), fd);
149
    linelen = readl(linebuf, sizeof(linebuf), fd);
149
    if (linelen == 0xffff) break; /* EOF */
150
    if (linelen == 0xffff) break; /* EOF */
150
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
151
    if ((linelen == 0) || (linebuf[0] == '#')) continue;
151
 
152
 
152
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
153
    /* convert escaped chars to actual bytes (\n -> newline, etc) */
153
    linelen = unesc_string(linebuf);
154
    linelen = unesc_string(linebuf);
154
 
155
 
155
    /* read id and get ptr to actual string ("1.15:string") */
156
    /* read id and get ptr to actual string ("1.15:string") */
156
    ptr = parseline(&id, linebuf);
157
    ptr = parseline(&id, linebuf);
157
    if (ptr == NULL) {
158
    if (ptr == NULL) {
158
      printf("ERROR: line #%u of %s is malformed (linelen = %u):\r\n", linecount, fname, linelen);
159
      printf("ERROR: line #%u of %s is malformed (linelen = %u):\r\n", linecount, fname, linelen);
159
      puts(linebuf);
160
      puts(linebuf);
160
      len = 0;
161
      len = 0;
161
      break;
162
      break;
162
    }
163
    }
163
 
164
 
164
    /* write string into block (II LL S) */
165
    /* write string into block (II LL S) */
165
    memcpy(buff + len, &id, 2);
166
    memcpy(buff + len, &id, 2);
166
    len += 2;
167
    len += 2;
167
    {
168
    {
168
      unsigned short slen = strlen(ptr) + 1;
169
      unsigned short slen = strlen(ptr) + 1;
169
      memcpy(buff + len, &slen, 2);
170
      memcpy(buff + len, &slen, 2);
170
      len += 2;
171
      len += 2;
171
      memcpy(buff + len, ptr, slen);
172
      memcpy(buff + len, ptr, slen);
172
      len += slen;
173
      len += slen;
173
    }
174
    }
174
 
175
 
175
    /* if reference bitmap provided: check that the id is valid */
176
    /* if reference bitmap provided: check that the id is valid */
176
    if ((refb != NULL) && (bitmap_get(refb, id) == 0)) {
177
    if ((refb != NULL) && (bitmap_get(refb, id) == 0)) {
177
      printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
178
      printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
178
    }
179
    }
179
 
180
 
180
    /* make sure this id is not already present */
181
    /* make sure this id is not already present */
181
    if (bitmap_get(b, id) == 0) {
182
    if (bitmap_get(b, id) == 0) {
182
      /* set bit in bitmap to remember I have this string */
183
      /* set bit in bitmap to remember I have this string */
183
      bitmap_set(b, id);
184
      bitmap_set(b, id);
184
    } else {
185
    } else {
185
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
186
      printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
186
    }
187
    }
187
  }
188
  }
188
 
189
 
189
  fclose(fd);
190
  fclose(fd);
190
 
191
 
191
  /* if refblock provided, pull missing strings from it */
192
  /* if refblock provided, pull missing strings from it */
192
  if (refblock != NULL) {
193
  if (refblock != NULL) {
193
    for (;;) {
194
    for (;;) {
194
      unsigned short slen;
195
      unsigned short slen;
195
      id = ((unsigned short *)refblock)[0];
196
      id = ((unsigned short *)refblock)[0];
196
      slen = ((unsigned short *)refblock)[1];
197
      slen = ((unsigned short *)refblock)[1];
197
      if ((id == 0) && (slen == 0)) break;
198
      if ((id == 0) && (slen == 0)) break;
198
      if (bitmap_get(b, id) == 0) {
199
      if (bitmap_get(b, id) == 0) {
199
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
200
        printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
200
        /* copy missing string from refblock */
201
        /* copy missing string from refblock */
201
        memcpy(buff + len, refblock, slen + 4);
202
        memcpy(buff + len, refblock, slen + 4);
202
        len += slen + 4;
203
        len += slen + 4;
203
      }
204
      }
204
      refblock += slen + 4;
205
      refblock += slen + 4;
205
    }
206
    }
206
  }
207
  }
207
 
208
 
208
  /* write the block terminator (0-long string) */
209
  /* write the block terminator (0-long string) */
209
  buff[len++] = 0; /* id */
210
  buff[len++] = 0; /* id */
210
  buff[len++] = 0; /* id */
211
  buff[len++] = 0; /* id */
211
  buff[len++] = 0; /* len */
212
  buff[len++] = 0; /* len */
212
  buff[len++] = 0; /* len */
213
  buff[len++] = 0; /* len */
213
  buff[len++] = 0; /* empty string */
214
  buff[len++] = 0; /* empty string */
214
 
215
 
215
  return(len);
216
  return(len);
216
}
217
}
217
 
218
 
218
 
219
 
219
#define MEMBLOCKSZ 65000
220
#define MEMBLOCKSZ 65000
220
 
221
 
221
int main(int argc, char **argv) {
222
int main(int argc, char **argv) {
222
  FILE *fd;
223
  FILE *fd;
223
  int ecode = 0;
224
  int ecode = 0;
224
  char *buff, *refblock;
225
  char *buff, *refblock;
-
 
226
  unsigned short refblocksz = 0;
225
  static struct bitmap bufbitmap;
227
  static struct bitmap bufbitmap;
226
  static struct bitmap refbitmap;
228
  static struct bitmap refbitmap;
227
  unsigned short i;
229
  unsigned short i;
-
 
230
  unsigned short biggest_langsz = 0;
228
 
231
 
229
  if (argc < 2) {
232
  if (argc < 2) {
230
    puts("usage: tlumacz en fr pl etc");
233
    puts("usage: tlumacz en fr pl etc");
231
    return(1);
234
    return(1);
232
  }
235
  }
233
 
236
 
234
  buff = malloc(MEMBLOCKSZ);
237
  buff = malloc(MEMBLOCKSZ);
235
  refblock = malloc(MEMBLOCKSZ);
238
  refblock = malloc(MEMBLOCKSZ);
236
  if ((buff == NULL) || (refblock == NULL)) {
239
  if ((buff == NULL) || (refblock == NULL)) {
237
    puts("out of memory");
240
    puts("out of memory");
238
    return(1);
241
    return(1);
239
  }
242
  }
240
 
243
 
241
  fd = fopen("out.lng", "wb");
244
  fd = fopen("out.lng", "wb");
242
  if (fd == NULL) {
245
  if (fd == NULL) {
243
    puts("ERR: failed to open or create SVARCOM.LNG");
246
    puts("ERR: failed to open or create SVARCOM.LNG");
244
    return(1);
247
    return(1);
245
  }
248
  }
246
 
249
 
247
  /* write sig */
250
  /* write sig */
248
  fwrite("SvL\x1b", 1, 4, fd);
251
  fwrite("SvL\x1b", 1, 4, fd);
249
 
252
 
250
  /* write lang blocks */
253
  /* write lang blocks */
251
  for (i = 1; i < argc; i++) {
254
  for (i = 1; i < argc; i++) {
252
    unsigned short sz;
255
    unsigned short sz;
253
    char id[3];
256
    char id[3];
254
 
257
 
255
    if (strlen(argv[i]) != 2) {
258
    if (strlen(argv[i]) != 2) {
256
      printf("INVALID LANG SPECIFIED: %s\r\n", argv[i]);
259
      printf("INVALID LANG SPECIFIED: %s\r\n", argv[i]);
257
      ecode = 1;
260
      ecode = 1;
258
      break;
261
      break;
259
    }
262
    }
260
 
263
 
261
    id[0] = argv[i][0];
264
    id[0] = argv[i][0];
262
    id[1] = argv[i][1];
265
    id[1] = argv[i][1];
263
    id[2] = 0;
266
    id[2] = 0;
264
    if (id[0] >= 'a') id[0] -= 'a' - 'A';
267
    if (id[0] >= 'a') id[0] -= 'a' - 'A';
265
    if (id[1] >= 'a') id[1] -= 'a' - 'A';
268
    if (id[1] >= 'a') id[1] -= 'a' - 'A';
266
 
269
 
267
    sz = gen_langstrings(buff, id, &bufbitmap, (i != 1)?&refbitmap:NULL, (i != 1)?refblock:NULL);
270
    sz = gen_langstrings(buff, id, &bufbitmap, (i != 1)?&refbitmap:NULL, (i != 1)?refblock:NULL);
268
    if (sz == 0) {
271
    if (sz == 0) {
269
      printf("ERROR COMPUTING LANG '%s'\r\n", id);
272
      printf("ERROR COMPUTING LANG '%s'\r\n", id);
270
      ecode = 1;
273
      ecode = 1;
271
      break;
274
      break;
272
    } else {
275
    } else {
273
      printf("computed %s lang block of %u bytes\r\n", id, sz);
276
      printf("computed %s lang block of %u bytes\r\n", id, sz);
-
 
277
      if (sz > biggest_langsz) biggest_langsz = sz;
274
    }
278
    }
275
    /* write lang ID to file, followed by block size and then the actual block */
279
    /* write lang ID to file, followed by block size and then the actual block */
276
    if ((fwrite(id, 1, 2, fd) != 2) ||
280
    if ((fwrite(id, 1, 2, fd) != 2) ||
277
        (fwrite(&sz, 1, 2, fd) != 2) ||
281
        (fwrite(&sz, 1, 2, fd) != 2) ||
278
        (fwrite(buff, 1, sz, fd) != sz)) {
282
        (fwrite(buff, 1, sz, fd) != sz)) {
279
      printf("ERROR WRITING TO OUTPUT FILE\r\n");
283
      printf("ERROR WRITING TO OUTPUT FILE\r\n");
280
      ecode = 1;
284
      ecode = 1;
281
      break;
285
      break;
282
    }
286
    }
283
    /* compute the default block for reference language */
287
    /* remember reference data for other languages */
284
    if (i == 1) {
288
    if (i == 1) {
285
      unsigned short x;
-
 
286
      FILE *fd2;
-
 
287
      fd2 = fopen("DEFLANG.C", "wb");
-
 
288
      if (fd2 == NULL) {
-
 
289
        puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
-
 
290
        break;
-
 
291
      }
-
 
292
      fprintf(fd2, "/* THIS FILE HAS BEEN AUTOGENERATE BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
-
 
293
      fprintf(fd2, "const unsigned short svarlang_memsz = %uu;\r\n", sz * 2);
-
 
294
      fprintf(fd2, "char svarlang_mem[%u] = {\r\n", sz * 2);
-
 
295
      for (x = 0; x < sz; x++) {
-
 
296
        fprintf(fd2, "%u", buff[x]);
-
 
297
        if (x + 1 < sz) fprintf(fd2, ",");
-
 
298
        if ((x & 15) == 15) fprintf(fd2, "\r\n");
-
 
299
      }
-
 
300
      fprintf(fd2, "};\r\n");
-
 
301
      fclose(fd2);
289
      refblocksz = sz;
302
      /* remember reference data for other languages */
-
 
303
      memcpy(refblock, buff, MEMBLOCKSZ);
290
      memcpy(refblock, buff, MEMBLOCKSZ);
304
      memcpy(&refbitmap, &bufbitmap, sizeof(struct bitmap));
291
      memcpy(&refbitmap, &bufbitmap, sizeof(struct bitmap));
305
    }
292
    }
306
  }
293
  }
307
 
294
 
308
  fclose(fd);
295
  fclose(fd);
309
 
296
 
-
 
297
  /* compute the deflang.c file containing a dump of the reference block */
-
 
298
  fd = fopen("DEFLANG.C", "wb");
-
 
299
  if (fd == NULL) {
-
 
300
    puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
-
 
301
    ecode = 1;
-
 
302
  } else {
-
 
303
    unsigned short allocsz = biggest_langsz + (biggest_langsz / 20);
-
 
304
    printf("biggest lang block is %u bytes -> allocating a %u bytes buffer\n", biggest_langsz, allocsz);
-
 
305
    fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
-
 
306
    fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
-
 
307
    fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
-
 
308
    for (i = 0; i < refblocksz; i++) {
-
 
309
      fprintf(fd, "%u", buff[i]);
-
 
310
      if (i + 1 < refblocksz) fprintf(fd, ",");
-
 
311
      if ((i & 15) == 15) fprintf(fd, "\r\n");
-
 
312
    }
-
 
313
    fprintf(fd, "};\r\n");
-
 
314
    fclose(fd);
-
 
315
  }
-
 
316
 
310
  return(ecode);
317
  return(ecode);
311
}
318
}
312
 
319