597 |
mateuszvis |
1 |
/*
|
|
|
2 |
* Copyright (C) 2021-2022 Mateusz Viste
|
|
|
3 |
*
|
|
|
4 |
* usage: tlumacz en fr pl etc
|
|
|
5 |
*
|
|
|
6 |
* computes a svarcom.lng file that contains all language ressources found
|
|
|
7 |
* inside dirname.
|
|
|
8 |
*
|
|
|
9 |
* DAT format:
|
|
|
10 |
*
|
|
|
11 |
* 4-bytes signature:
|
|
|
12 |
* "SvL\x1b"
|
|
|
13 |
*
|
|
|
14 |
* Then "LANG BLOCKS" follow. Each LANG BLOCK is prefixed with 4 bytes:
|
|
|
15 |
* II LL - II is the LANG identifier ("EN", "PL", etc) and LL is the size
|
|
|
16 |
* of the block (65535 bytes max).
|
|
|
17 |
*
|
|
|
18 |
* Inside a LANG BLOCK is a set of strings:
|
|
|
19 |
*
|
|
|
20 |
* II L S where II is the string's 16-bit identifier, L is its length (1-255)
|
|
|
21 |
* and S is the actual string. All strings are ASCIIZ-formatted (ie.
|
|
|
22 |
* end with a NULL terminator).
|
|
|
23 |
*
|
|
|
24 |
* The list of strings ends with a single 0-long string.
|
|
|
25 |
*/
|
|
|
26 |
|
|
|
27 |
|
|
|
28 |
#include <stdio.h>
|
|
|
29 |
#include <stdlib.h>
|
|
|
30 |
#include <string.h>
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
struct bitmap {
|
|
|
35 |
unsigned char bits[8192];
|
|
|
36 |
};
|
|
|
37 |
|
|
|
38 |
static void bitmap_set(struct bitmap *b, unsigned short id) {
|
|
|
39 |
b->bits[id / 8] |= 1 << (id & 7);
|
|
|
40 |
}
|
|
|
41 |
|
|
|
42 |
static int bitmap_get(const struct bitmap *b, unsigned short id) {
|
|
|
43 |
return(b->bits[id / 8] & (1 << (id & 7)));
|
|
|
44 |
}
|
|
|
45 |
|
|
|
46 |
static void bitmap_init(struct bitmap *b) {
|
|
|
47 |
memset(b, 0, sizeof(struct bitmap));
|
|
|
48 |
}
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
|
52 |
/* read a single line from fd and fills it into dst, returns line length
|
|
|
53 |
* ending CR/LF is trimmed, as well as any trailing spaces */
|
|
|
54 |
static unsigned short readl(char *dst, size_t dstsz, FILE *fd) {
|
|
|
55 |
unsigned short l, lastnonspace = 0;
|
|
|
56 |
|
|
|
57 |
if (fgets(dst, dstsz, fd) == NULL) return(0xffff); /* EOF */
|
|
|
58 |
/* trim at first CR or LF and return len */
|
|
|
59 |
for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {
|
|
|
60 |
if (dst[l] != ' ') lastnonspace = l;
|
|
|
61 |
}
|
|
|
62 |
|
|
|
63 |
if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */
|
|
|
64 |
dst[l] = 0;
|
|
|
65 |
|
|
|
66 |
return(l);
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
/* parse a line in format "1.50:somestring". fills id and returns a pointer to
|
|
|
71 |
* the actual string part on success, or NULL on error */
|
|
|
72 |
static char *parseline(unsigned short *id, char *s) {
|
|
|
73 |
int i;
|
|
|
74 |
int dotpos = 0, colpos = 0, gotdigits = 0;
|
|
|
75 |
|
|
|
76 |
/* I must have a . and a : in the first 9 bytes */
|
|
|
77 |
for (i = 0;; i++) {
|
|
|
78 |
if (s[i] == '.') {
|
|
|
79 |
if ((dotpos != 0) || (gotdigits == 0)) break;
|
|
|
80 |
dotpos = i;
|
|
|
81 |
gotdigits = 0;
|
|
|
82 |
} else if (s[i] == ':') {
|
|
|
83 |
if (gotdigits != 0) colpos = i;
|
|
|
84 |
break;
|
|
|
85 |
} else if ((s[i] < '0') || (s[i] > '9')) {
|
|
|
86 |
break;
|
|
|
87 |
}
|
|
|
88 |
gotdigits++;
|
|
|
89 |
}
|
|
|
90 |
/* did I collect everything? */
|
|
|
91 |
if ((dotpos == 0) || (colpos == 0)) return(NULL);
|
|
|
92 |
if (s[colpos + 1] == 0) return(NULL);
|
|
|
93 |
|
|
|
94 |
*id = atoi(s);
|
|
|
95 |
*id <<= 8;
|
|
|
96 |
*id |= atoi(s + dotpos + 1);
|
|
|
97 |
|
|
|
98 |
/* printf("parseline(): %04X = '%s'\r\n", *id, s + colpos + 1); */
|
|
|
99 |
|
|
|
100 |
return(s + colpos + 1);
|
|
|
101 |
}
|
|
|
102 |
|
|
|
103 |
|
|
|
104 |
/* opens a CATS-style file and compiles it into a ressources lang block */
|
|
|
105 |
static unsigned short gen_langstrings(unsigned char *buff, const char *langid, struct bitmap *b, const struct bitmap *refb, const unsigned char *refblock) {
|
|
|
106 |
unsigned short len = 0, linelen;
|
|
|
107 |
FILE *fd;
|
|
|
108 |
char fname[] = "XX.TXT";
|
|
|
109 |
char linebuf[512];
|
|
|
110 |
char *ptr;
|
|
|
111 |
unsigned short id, linecount;
|
|
|
112 |
|
|
|
113 |
bitmap_init(b);
|
|
|
114 |
|
|
|
115 |
memcpy(fname + strlen(fname) - 6, langid, 2);
|
|
|
116 |
|
|
|
117 |
fd = fopen(fname, "rb");
|
|
|
118 |
if (fd == NULL) {
|
|
|
119 |
printf("ERROR: FAILED TO OPEN '%s'\r\n", fname);
|
|
|
120 |
return(0);
|
|
|
121 |
}
|
|
|
122 |
|
|
|
123 |
for (linecount = 1;; linecount++) {
|
|
|
124 |
|
|
|
125 |
linelen = readl(linebuf, sizeof(linebuf), fd);
|
|
|
126 |
if (linelen == 0xffff) break; /* EOF */
|
|
|
127 |
if ((linelen == 0) || (linebuf[0] == '#')) continue;
|
|
|
128 |
|
|
|
129 |
/* read id and get ptr to actual string ("1.15:string") */
|
|
|
130 |
ptr = parseline(&id, linebuf);
|
|
|
131 |
if (ptr == NULL) {
|
|
|
132 |
printf("ERROR: line #%u of %s is malformed\r\n", linecount, fname);
|
|
|
133 |
len = 0;
|
|
|
134 |
break;
|
|
|
135 |
}
|
|
|
136 |
|
|
|
137 |
/* write string into block (II L S) */
|
|
|
138 |
memcpy(buff + len, &id, 2);
|
|
|
139 |
len += 2;
|
|
|
140 |
buff[len++] = strlen(ptr) + 1;
|
|
|
141 |
memcpy(buff + len, ptr, strlen(ptr) + 1);
|
|
|
142 |
len += strlen(ptr) + 1;
|
|
|
143 |
|
|
|
144 |
/* if reference bitmap provided: check that the id is valid */
|
|
|
145 |
if ((refb != NULL) && (bitmap_get(refb, id) == 0)) {
|
|
|
146 |
printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\r\n", fname, linecount, id >> 8, id & 0xff);
|
|
|
147 |
}
|
|
|
148 |
|
|
|
149 |
/* make sure this id is not already present */
|
|
|
150 |
if (bitmap_get(b, id) == 0) {
|
|
|
151 |
/* set bit in bitmap to remember I have this string */
|
|
|
152 |
bitmap_set(b, id);
|
|
|
153 |
} else {
|
|
|
154 |
printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\r\n", fname, linecount, id >> 8, id & 0xff);
|
|
|
155 |
}
|
|
|
156 |
}
|
|
|
157 |
|
|
|
158 |
fclose(fd);
|
|
|
159 |
|
|
|
160 |
/* if refblock provided, pull missing strings from it */
|
|
|
161 |
if (refblock != NULL) {
|
|
|
162 |
for (;;) {
|
|
|
163 |
id = *((unsigned short *)refblock);
|
|
|
164 |
if ((id == 0) && (refblock[2] == 0)) break;
|
|
|
165 |
if (bitmap_get(b, id) == 0) {
|
|
|
166 |
printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\r\n", fname, id >> 8, id & 0xff);
|
|
|
167 |
/* copy missing string from refblock */
|
|
|
168 |
memcpy(buff + len, refblock, refblock[2] + 3);
|
|
|
169 |
len += refblock[2] + 3;
|
|
|
170 |
}
|
|
|
171 |
refblock += refblock[2] + 3;
|
|
|
172 |
}
|
|
|
173 |
}
|
|
|
174 |
|
|
|
175 |
/* write the block terminator (0-long string) */
|
|
|
176 |
buff[len++] = 0; /* id */
|
|
|
177 |
buff[len++] = 0; /* id */
|
|
|
178 |
buff[len++] = 0; /* len */
|
|
|
179 |
|
|
|
180 |
return(len);
|
|
|
181 |
}
|
|
|
182 |
|
|
|
183 |
|
|
|
184 |
#define MEMBLOCKSZ 65500
|
|
|
185 |
|
|
|
186 |
int main(int argc, char **argv) {
|
|
|
187 |
FILE *fd;
|
|
|
188 |
int ecode = 0;
|
|
|
189 |
char *buff, *refblock;
|
|
|
190 |
static struct bitmap bufbitmap;
|
|
|
191 |
static struct bitmap refbitmap;
|
|
|
192 |
unsigned short i;
|
|
|
193 |
|
|
|
194 |
if (argc < 2) {
|
|
|
195 |
puts("usage: tlumacz en fr pl etc");
|
|
|
196 |
return(1);
|
|
|
197 |
}
|
|
|
198 |
|
|
|
199 |
buff = malloc(MEMBLOCKSZ);
|
|
|
200 |
refblock = malloc(MEMBLOCKSZ);
|
|
|
201 |
if ((buff == NULL) || (refblock == NULL)) {
|
|
|
202 |
puts("out of memory");
|
|
|
203 |
return(1);
|
|
|
204 |
}
|
|
|
205 |
|
|
|
206 |
fd = fopen("svarcom.lng", "wb");
|
|
|
207 |
if (fd == NULL) {
|
|
|
208 |
puts("ERR: failed to open or create SVARCOM.LNG");
|
|
|
209 |
return(1);
|
|
|
210 |
}
|
|
|
211 |
|
|
|
212 |
/* write sig */
|
|
|
213 |
fwrite("SvL\x1b", 1, 4, fd);
|
|
|
214 |
|
|
|
215 |
/* write lang blocks */
|
|
|
216 |
for (i = 1; i < argc; i++) {
|
|
|
217 |
unsigned short sz;
|
|
|
218 |
char id[3];
|
|
|
219 |
|
|
|
220 |
if (strlen(argv[i]) != 2) {
|
|
|
221 |
printf("INVALID LANG SPECIFIED: %s\r\n", argv[i]);
|
|
|
222 |
ecode = 1;
|
|
|
223 |
break;
|
|
|
224 |
}
|
|
|
225 |
|
|
|
226 |
id[0] = argv[i][0];
|
|
|
227 |
id[1] = argv[i][1];
|
|
|
228 |
id[2] = 0;
|
|
|
229 |
if (id[0] >= 'a') id[0] -= 'a' - 'A';
|
|
|
230 |
if (id[1] >= 'a') id[1] -= 'a' - 'A';
|
|
|
231 |
|
|
|
232 |
sz = gen_langstrings(buff, id, &bufbitmap, (i != 1)?&refbitmap:NULL, (i != 1)?refblock:NULL);
|
|
|
233 |
if (sz == 0) {
|
|
|
234 |
printf("ERROR COMPUTING LANG '%s'\r\n", id);
|
|
|
235 |
ecode = 1;
|
|
|
236 |
break;
|
|
|
237 |
} else {
|
|
|
238 |
printf("computed %s lang block of %u bytes\r\n", id, sz);
|
|
|
239 |
}
|
|
|
240 |
/* write lang ID to file, followed by block size and then the actual block */
|
|
|
241 |
if ((fwrite(id, 1, 2, fd) != 2) ||
|
|
|
242 |
(fwrite(&sz, 1, 2, fd) != 2) ||
|
|
|
243 |
(fwrite(buff, 1, sz, fd) != sz)) {
|
|
|
244 |
printf("ERROR WRITING TO OUTPUT FILE\r\n");
|
|
|
245 |
ecode = 1;
|
|
|
246 |
break;
|
|
|
247 |
}
|
|
|
248 |
/* compute the default block for reference language */
|
|
|
249 |
if (i == 1) {
|
|
|
250 |
unsigned short x;
|
|
|
251 |
FILE *fd2;
|
|
|
252 |
fd2 = fopen("DEFLANG.C", "wb");
|
|
|
253 |
if (fd2 == NULL) {
|
|
|
254 |
puts("ERROR: FAILED TO OPEN OR CREATE DEFLANG.C");
|
|
|
255 |
break;
|
|
|
256 |
}
|
|
|
257 |
fprintf(fd2, "/* THIS FILE HAS BEEN AUTOGENERATE BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
|
|
|
258 |
fprintf(fd2, "static char svarlang_mem[%u] = {\r\n", sz * 2);
|
|
|
259 |
for (x = 0; x < sz; x++) {
|
|
|
260 |
fprintf(fd2, "%u", buff[x]);
|
|
|
261 |
if (x + 1 < sz) fprintf(fd2, ",");
|
|
|
262 |
if ((x & 15) == 15) fprintf(fd2, "\r\n");
|
|
|
263 |
}
|
|
|
264 |
fprintf(fd2, "};\r\n");
|
|
|
265 |
fclose(fd2);
|
|
|
266 |
/* remember reference data for other languages */
|
|
|
267 |
memcpy(refblock, buff, MEMBLOCKSZ);
|
|
|
268 |
memcpy(&refbitmap, &bufbitmap, sizeof(struct bitmap));
|
|
|
269 |
}
|
|
|
270 |
}
|
|
|
271 |
|
|
|
272 |
fclose(fd);
|
|
|
273 |
|
|
|
274 |
return(ecode);
|
|
|
275 |
}
|