WebSVN – SvarDOS – Blame – /svarlang.lib/trunk/tlumacz.c

Rev	Author	Line No.	Line
597	mateuszvis	1	`/*`
2083	mateusz.vi	2	`* Copyright (C) 2021-2024 Mateusz Viste`
597	mateuszvis	3	`*`
1295	mateusz.vi	4	`* Dictionary-based lookups contributed by Bernd Boeckmann, 2023`
		5	`*`
597	mateuszvis	6	`* usage: tlumacz en fr pl etc`
		7	`*`
2083	mateusz.vi	8	`* computes:`
		9	`* OUT.LNG -> contains all language resources.`
		10	`* OUTC.LNG -> same as OUT.LNG but with compressed strings (slower to load).`
597	mateuszvis	11	`*`
2083	mateusz.vi	12	`* === COMPRESSION ===========================================================`
		13	`* The compression scheme is very simple. It is applied only to strings (ie.`
2102	mateusz.vi	14	`* not the dictionnary) and it is basically a stream of 16-bit words (tokens).`
		15	`*`
		16	`* Token format is LLLL OOOO OOOO OOOO, where:`
		17	`* OOOO OOOO OOOO is the back reference offset (number of bytes-1 to rewind)`
		18	`* LLLL is the number of bytes (-1) that have to be copied from the offset.`
		19	`*`
		20	`* However, if LLLL is zero then the token's format is different:`
		21	`* 0000 RRRR BBBB BBBB`
		22	`*`
		23	`* The above form occurs when uncompressible data is encountered:`
		24	`* BBBB BBBB is the literal value of a byte to be copied`
		25	`* RRRR is the number of RAW (uncompressible) WORDS that follow (possibly 0)`
		26	`*`
2083	mateusz.vi	27	`* where each WORD value contains the following bits "LLLL OOOO OOOO OOOO":`
		28	`*`
		29	`* OOOO OOOO OOOO = a backreference offset ("look that many bytes back")`
		30	`* LLLL = the number of bytes to copy from the backreference`
		31	`*`
		32	`* To recognize a compressed lang block one has to look at the id of the block`
		33	`* (16-bit language id). If its highest bit is set (0x8000) then the lang block`
		34	`* is compressed.`
597	mateuszvis	35	`*/`
		36
		37
		38	`#include <stdio.h>`
		39	`#include <stdlib.h>`
		40	`#include <string.h>`
1290	bernd.boec	41	`#include <ctype.h>`
597	mateuszvis	42
1248	mateusz.vi	43	`#include "svarlang.h"`
597	mateuszvis	44
1290	bernd.boec	45	`#define STRINGS_CAP 65000 /* string storage size in characters */`
1293	mateusz.vi	46	`#define DICT_CAP 10000 /* dictionary size in elements */`
597	mateuszvis	47
2014	bernd.boec	48	`enum { /* DEFLANG output format */`
		49	`C_OUTPUT,`
2108	mateusz.vi	50	`NO_OUTPUT,`
2014	bernd.boec	51	`ASM_OUTPUT,`
		52	`NASM_OUTPUT`
		53	`};`
		54
		55
597	mateuszvis	56	`/* read a single line from fd and fills it into dst, returns line length`
		57	`* ending CR/LF is trimmed, as well as any trailing spaces */`
		58	`static unsigned short readl(char dst, size_t dstsz, FILE fd) {`
		59	`unsigned short l, lastnonspace = 0;`
		60
1290	bernd.boec	61	`if (fgets(dst, (int)dstsz, fd) == NULL) return(0xffff); /* EOF */`
597	mateuszvis	62	`/* trim at first CR or LF and return len */`
		63	`for (l = 0; (dst[l] != 0) && (dst[l] != '\r') && (dst[l] != '\n'); l++) {`
		64	`if (dst[l] != ' ') lastnonspace = l;`
		65	`}`
		66
		67	`if (lastnonspace < l) l = lastnonspace + 1; /* rtrim */`
		68	`dst[l] = 0;`
		69
		70	`return(l);`
		71	`}`
		72
		73
1114	mateusz.vi	74	`/* parse a line in format "[?]1.50:somestring". fills id and returns a pointer to`
597	mateuszvis	75	`* the actual string part on success, or NULL on error */`
1114	mateusz.vi	76	`static const char parseline(unsigned short id, const char *s) {`
597	mateuszvis	77	`int i;`
		78	`int dotpos = 0, colpos = 0, gotdigits = 0;`
		79
1114	mateusz.vi	80	`/* strings prefixed by '?' are flagged as "dirty": ignore this flag here */`
		81	`if (*s == '?') s++;`
		82
597	mateuszvis	83	`/* I must have a . and a : in the first 9 bytes */`
		84	`for (i = 0;; i++) {`
		85	`if (s[i] == '.') {`
		86	`if ((dotpos != 0) \|\| (gotdigits == 0)) break;`
		87	`dotpos = i;`
		88	`gotdigits = 0;`
		89	`} else if (s[i] == ':') {`
		90	`if (gotdigits != 0) colpos = i;`
		91	`break;`
		92	`} else if ((s[i] < '0') \|\| (s[i] > '9')) {`
		93	`break;`
		94	`}`
		95	`gotdigits++;`
		96	`}`
		97	`/* did I collect everything? */`
		98	`if ((dotpos == 0) \|\| (colpos == 0)) return(NULL);`
		99
		100	`*id = atoi(s);`
		101	`*id <<= 8;`
		102	`*id \|= atoi(s + dotpos + 1);`
		103
		104	`/* printf("parseline(): %04X = '%s'\r\n", id, s + colpos + 1); /`
		105
		106	`return(s + colpos + 1);`
		107	`}`
		108
		109
639	mateusz.vi	110	`/* converts escape sequences like "\n" or "\t" into actual bytes, returns`
		111	`* the new length of the string. */`
		112	`static unsigned short unesc_string(char *linebuff) {`
		113	`unsigned short i;`
		114	`for (i = 0; linebuff[i] != 0; i++) {`
		115	`if (linebuff[i] != '\\') continue;`
1290	bernd.boec	116	`memmove(linebuff + i, linebuff + i + 1, strlen(linebuff + i));`
639	mateusz.vi	117	`if (linebuff[i] == 0) break;`
		118	`switch (linebuff[i]) {`
1248	mateusz.vi	119	`case 'e':`
		120	`linebuff[i] = 0x1B; /* ESC code, using hex because '\e' is not ANSI C */`
		121	`break;`
639	mateusz.vi	122	`case 'n':`
		123	`linebuff[i] = '\n';`
		124	`break;`
		125	`case 'r':`
		126	`linebuff[i] = '\r';`
		127	`break;`
		128	`case 't':`
		129	`linebuff[i] = '\t';`
		130	`break;`
		131	`}`
		132	`}`
		133	`return(i);`
		134	`}`
		135
1290	bernd.boec	136	`#pragma pack(1)`
1296	mateusz.vi	137	`struct dict_entry {`
1295	mateusz.vi	138	`unsigned short id;`
		139	`unsigned short offset;`
1296	mateusz.vi	140	`};`
1290	bernd.boec	141	`#pragma pack()`
639	mateusz.vi	142
1296	mateusz.vi	143	`struct svl_lang {`
1290	bernd.boec	144	`char id[2];`
		145	`unsigned short num_strings;`
		146
1296	mateusz.vi	147	`struct dict_entry *dict;`
1290	bernd.boec	148	`size_t dict_cap;`
		149
		150	`char *strings;`
		151	`char *strings_end;`
		152	`size_t strings_cap;`
		153
1296	mateusz.vi	154	`};`
1290	bernd.boec	155
		156
1296	mateusz.vi	157	`static struct svl_lang *svl_lang_new(const char langid[2], size_t dict_cap, size_t strings_cap) {`
		158	`struct svl_lang *l;`
1290	bernd.boec	159
1296	mateusz.vi	160	`l = malloc(sizeof(struct svl_lang));`
1295	mateusz.vi	161	`if (!l) return(NULL);`
1290	bernd.boec	162
		163	`l->id[0] = (char)toupper(langid[0]);`
		164	`l->id[1] = (char)toupper(langid[1]);`
		165
1296	mateusz.vi	166	`l->dict = malloc(dict_cap * sizeof(struct dict_entry));`
1295	mateusz.vi	167	`if (!l->dict) return(NULL);`
		168
1290	bernd.boec	169	`l->dict_cap = dict_cap;`
		170
		171	`l->num_strings = 0;`
		172	`l->strings = l->strings_end = malloc(strings_cap);`
		173	`if (!l->strings) {`
		174	`free(l->dict);`
1295	mateusz.vi	175	`return(NULL);`
1290	bernd.boec	176	`}`
		177	`l->strings_cap = strings_cap;`
1295	mateusz.vi	178
		179	`return(l);`
1290	bernd.boec	180	`}`
		181
		182
		183	`/* compacts the dict and string buffer */`
1296	mateusz.vi	184	`static void svl_compact_lang(struct svl_lang *l) {`
1290	bernd.boec	185	`size_t bytes;`
		186	`bytes = l->strings_end - l->strings;`
		187	`if (bytes < l->strings_cap) {`
		188	`l->strings = l->strings_end = realloc(l->strings, bytes);`
		189	`l->strings_end += bytes;`
		190	`l->strings_cap = bytes;`
		191	`}`
		192	`l->dict_cap = l->num_strings;`
1296	mateusz.vi	193	`l->dict = realloc(l->dict, l->dict_cap * sizeof(struct dict_entry));`
1290	bernd.boec	194	`}`
		195
		196
1296	mateusz.vi	197	`static void svl_lang_free(struct svl_lang *l) {`
1290	bernd.boec	198	`l->num_strings = 0;`
		199	`if (l->dict) {`
		200	`free(l->dict);`
		201	`l->dict = NULL;`
		202	`}`
		203	`if (l->strings) {`
		204	`free(l->strings);`
		205	`l->strings = l->strings_end = NULL;`
		206	`}`
		207	`l->dict_cap = 0;`
		208	`l->strings_cap = 0;`
		209	`}`
		210
		211
1296	mateusz.vi	212	`static size_t svl_strings_bytes(const struct svl_lang *l) {`
1295	mateusz.vi	213	`return(l->strings_end - l->strings);`
1290	bernd.boec	214	`}`
		215
		216
1296	mateusz.vi	217	`static size_t svl_dict_bytes(const struct svl_lang *l) {`
		218	`return(l->num_strings * sizeof(struct dict_entry));`
1290	bernd.boec	219	`}`
		220
		221
1296	mateusz.vi	222	`static int svl_add_str(struct svl_lang l, unsigned short id, const char s) {`
1290	bernd.boec	223	`size_t len = strlen(s) + 1;`
		224	`size_t cursor;`
		225
1296	mateusz.vi	226	`if ((l->strings_cap < svl_strings_bytes(l) + len) \|\| (l->dict_cap < (l->num_strings + 1) * sizeof(struct dict_entry))) {`
1295	mateusz.vi	227	`return(0);`
1290	bernd.boec	228	`}`
1293	mateusz.vi	229
1290	bernd.boec	230	`/* find dictionary insert position, search backwards in assumption`
		231	`that in translation files, strings are generally ordered ascending */`
		232	`for (cursor = l->num_strings; cursor > 0 && l->dict[cursor-1].id > id; cursor--);`
		233
1296	mateusz.vi	234	`memmove(&(l->dict[cursor+1]), &(l->dict[cursor]), sizeof(struct dict_entry) * (l->num_strings - cursor));`
1290	bernd.boec	235	`l->dict[cursor].id = id;`
		236	`l->dict[cursor].offset = l->strings_end - l->strings;`
		237
		238	`memcpy(l->strings_end, s, len);`
		239	`l->strings_end += len;`
		240	`l->num_strings++;`
		241
1295	mateusz.vi	242	`return(1);`
1290	bernd.boec	243	`}`
		244
		245
1296	mateusz.vi	246	`static int svl_find(const struct svl_lang *l, unsigned short id) {`
1295	mateusz.vi	247	`size_t left = 0, right = l->num_strings - 1, x;`
		248	`unsigned short v;`
1290	bernd.boec	249
1295	mateusz.vi	250	`if (l->num_strings == 0) return(0);`
1290	bernd.boec	251
1295	mateusz.vi	252	`while (left <= right ) {`
		253	`x = left + ( (right - left ) >> 2 );`
		254	`v = l->dict[x].id;`
1296	mateusz.vi	255	`if ( id == v ) return(1); /* found! */`
		256
		257	`if (id > v) {`
1295	mateusz.vi	258	`left = x + 1;`
		259	`} else {`
		260	`right = x - 1;`
		261	`}`
		262	`}`
		263	`return(0);`
1290	bernd.boec	264	`}`
		265
1295	mateusz.vi	266
1061	mateusz.vi	267	`/* opens a CATS-style file and compiles it into a ressources lang block`
		268	`* returns 0 on error, or the size of the generated data block otherwise */`
1296	mateusz.vi	269	`static unsigned short svl_lang_from_cats_file(struct svl_lang l, struct svl_lang refl) {`
1290	bernd.boec	270	`unsigned short linelen;`
597	mateuszvis	271	`FILE *fd;`
1290	bernd.boec	272	`char fname[] = "xx.txt";`
623	mateuszvis	273	`static char linebuf[8192];`
1114	mateusz.vi	274	`const char *ptr;`
1290	bernd.boec	275	`unsigned short id, maxid=0, maxid_line, linecount;`
		276	`int i;`
597	mateuszvis	277
1290	bernd.boec	278	`fname[strlen(fname) - 6] = (char)tolower( l->id[0] );`
		279	`fname[strlen(fname) - 5] = (char)tolower( l->id[1] );`
597	mateuszvis	280
		281	`fd = fopen(fname, "rb");`
		282	`if (fd == NULL) {`
2143	mateusz.vi	283	`printf("ERROR: FAILED TO OPEN '%s'\n", fname);`
597	mateuszvis	284	`return(0);`
		285	`}`
		286
		287	`for (linecount = 1;; linecount++) {`
		288	`linelen = readl(linebuf, sizeof(linebuf), fd);`
		289	`if (linelen == 0xffff) break; /* EOF */`
		290	`if ((linelen == 0) \|\| (linebuf[0] == '#')) continue;`
		291
639	mateusz.vi	292	`/* convert escaped chars to actual bytes (\n -> newline, etc) */`
		293	`linelen = unesc_string(linebuf);`
		294
597	mateuszvis	295	`/* read id and get ptr to actual string ("1.15:string") */`
		296	`ptr = parseline(&id, linebuf);`
1272	mateusz.vi	297
		298	`/* handle malformed lines */`
597	mateuszvis	299	`if (ptr == NULL) {`
2143	mateusz.vi	300	`printf("WARNING: %s[#%u] is malformed (linelen = %u):\n", fname, linecount, linelen);`
623	mateuszvis	301	`puts(linebuf);`
1272	mateusz.vi	302	`continue;`
597	mateuszvis	303	`}`
1272	mateusz.vi	304
		305	`/* ignore empty strings (but emit a warning) */`
		306	`if (ptr[0] == 0) {`
2143	mateusz.vi	307	`printf("WARNING: %s[#%u] ignoring empty string %u.%u\n", fname, linecount, id >> 8, id & 0xff);`
1271	bernd.boec	308	`continue;`
		309	`}`
597	mateuszvis	310
1114	mateusz.vi	311	`/* warn about dirty lines */`
		312	`if (linebuf[0] == '?') {`
2143	mateusz.vi	313	`printf("WARNING: %s[#%u] string id %u.%u is flagged as 'dirty'\n", fname, linecount, id >> 8, id & 0xff);`
1114	mateusz.vi	314	`}`
		315
1290	bernd.boec	316	`/* add the string contained in current line, if conditions are met */`
		317	`if (!svl_find(l, id)) {`
1295	mateusz.vi	318	`if ((refl == NULL) \|\| (svl_find(refl, id))) {`
1290	bernd.boec	319	`if (!svl_add_str(l, id, ptr)) {`
2014	bernd.boec	320	`fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);`
1290	bernd.boec	321	`fclose(fd);`
1295	mateusz.vi	322	`return(0);`
1290	bernd.boec	323	`}`
		324	`if (id >= maxid) {`
		325	`maxid = id;`
		326	`maxid_line = linecount;`
1295	mateusz.vi	327	`} else {`
2143	mateusz.vi	328	`printf("WARNING:%s[#%u] file unsorted - line %u has higher id %u.%u\n", fname, linecount, maxid_line, maxid >> 8, maxid & 0xff);`
1290	bernd.boec	329	`}`
1295	mateusz.vi	330	`} else {`
2143	mateusz.vi	331	`printf("WARNING: %s[#%u] has an invalid id (%u.%u not present in ref lang)\n", fname, linecount, id >> 8, id & 0xff);`
1290	bernd.boec	332	`}`
1295	mateusz.vi	333	`} else {`
2143	mateusz.vi	334	`printf("WARNING: %s[#%u] has a duplicated id (%u.%u)\n", fname, linecount, id >> 8, id & 0xff);`
597	mateuszvis	335	`}`
		336	`}`
		337
		338	`fclose(fd);`
		339
1290	bernd.boec	340	`/* if reflang provided, pull missing strings from it */`
		341	`if (refl != NULL) {`
		342	`for (i = 0; i < refl->num_strings; i++) {`
		343	`id = refl->dict[i].id;`
		344	`if (!svl_find(l, id)) {`
2143	mateusz.vi	345	`printf("WARNING: %s is missing string %u.%u (pulled from ref lang)\n", fname, id >> 8, id & 0xff);`
1291	bernd.boec	346	`if (!svl_add_str(l, id, refl->strings + refl->dict[i].offset)) {`
2014	bernd.boec	347	`fprintf(stderr, "ERROR: %s[#%u] output size limit exceeded\r\n", fname, linecount);`
1295	mateusz.vi	348	`return(0);`
1290	bernd.boec	349	`}`
597	mateuszvis	350	`}`
		351	`}`
		352	`}`
		353
1290	bernd.boec	354	`return(svl_strings_bytes(l));`
		355	`}`
597	mateuszvis	356
1290	bernd.boec	357
1295	mateusz.vi	358	`static int svl_write_header(unsigned short num_strings, FILE *fd) {`
		359	`return((fwrite("SvL\x1a", 1, 4, fd) == 4) && (fwrite(&num_strings, 1, 2, fd) == 2));`
597	mateuszvis	360	`}`
		361
		362
2102	mateusz.vi	363
		364	`/* write qlen literal bytes into dst, returns amount of "compressed" bytes */`
		365	`static unsigned short mvcomp_litqueue_dump(unsigned short *dst, const unsigned char q, unsigned short qlen) {`
		366	`unsigned short complen = 0;`
		367
		368	`AGAIN:`
		369
		370	`/* are we done? (also take care of guys calling me in for jokes) */`
		371	`if (qlen == 0) return(complen);`
		372
		373	`qlen--; /* now it's between 0 and 30 */`
		374	`/* write the length and first char */`
2117	mateusz.vi	375	`**dst = (unsigned short)((qlen / 2) << 8) \| q[0];`
2102	mateusz.vi	376	`*dst += 1;`
		377	`q++;`
		378	`complen += 2;`
		379
		380	`/* anything left? */`
		381	`if (qlen == 0) return(complen);`
		382
		383	`/* write the pending words */`
		384	`if (qlen > 1) {`
		385	`memcpy(dst, q, (qlen/2)2);`
		386	`*dst += qlen / 2;`
		387	`q += (qlen / 2) * 2;`
		388	`complen += (qlen / 2) * 2;`
		389	`qlen -= (qlen / 2) * 2;`
		390	`}`
		391
		392	`/* one byte might still be left if it did not fit inside a word */`
		393	`goto AGAIN;`
		394	`}`
		395
		396
2117	mateusz.vi	397	`/* compare up to n bytes of locations s1 and s2, returns the amount of same bytes (0..n) */`
		398	`static unsigned short comparemem(const unsigned char s1, const unsigned char s2, unsigned short n) {`
		399	`unsigned short i;`
		400	`for (i = 0; (i < n) && (s1[i] == s2[i]); i++);`
		401	`return(i);`
		402	`}`
		403
		404
		405	`/* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size`
		406	`* updates len with the number of input bytes left unprocessed */`
2123	mateusz.vi	407	`static unsigned short mvcomp(void dstbuf, size_t dstbufsz, const unsigned char src, size_t len, unsigned short maxbytesahead) {`
2083	mateusz.vi	408	`unsigned short complen = 0;`
2117	mateusz.vi	409	`unsigned short *dst = dstbuf;`
2083	mateusz.vi	410	`unsigned short bytesprocessed = 0;`
2102	mateusz.vi	411	`unsigned char litqueue[32];`
		412	`unsigned char litqueuelen = 0;`
2083	mateusz.vi	413
2123	mateusz.vi	414	`*maxbytesahead = 0;`
		415
2083	mateusz.vi	416	`/* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */`
2117	mateusz.vi	417	`while (*len > 0) {`
2083	mateusz.vi	418	`unsigned short matchlen;`
2106	mateusz.vi	419	`unsigned short minmatch;`
2083	mateusz.vi	420	`unsigned short offset;`
2084	mateusz.vi	421	`matchlen = 16;`
2117	mateusz.vi	422	`if (len < matchlen) matchlen = (unsigned short)(len);`
2083	mateusz.vi	423
2123	mateusz.vi	424	`/* monitor the amount of bytes that the compressed stream is "ahead" of`
		425	`* uncompressed data, this is an information used later to size a proper`
		426	`* buffer for in-place depacking */`
		427	`if (complen + litqueuelen + 2 > bytesprocessed) {`
		428	`unsigned short mvstreamlen = complen + litqueuelen + 2;`
		429	`if (maxbytesahead < (mvstreamlen - bytesprocessed)) maxbytesahead = mvstreamlen - bytesprocessed;`
		430	`}`
		431
2117	mateusz.vi	432	`/* abort if no space in output buffer, but do NOT break a literal queue */`
		433	`if ((complen >= dstbufsz - 32) && (litqueuelen == 0)) return(complen);`
		434
2106	mateusz.vi	435	`/* look for a minimum match of 2 bytes, unless I have some pending literal bytes`
		436	`* awaiting, in which case I am going through a new data pattern and it is more`
2117	mateusz.vi	437	`* efficient to wait for a longer match before breaking the literal string */`
		438	`if (litqueuelen & 1) {`
		439	`minmatch = 3; /* breaking an uneven queue is less expensive */`
		440	`} else if (litqueuelen > 0) {`
		441	`goto NOMATCH; /* breaking an even-sized literal queue is never a good idea */`
2106	mateusz.vi	442	`} else {`
		443	`minmatch = 2;`
		444	`}`
		445
2117	mateusz.vi	446	`if (matchlen >= minmatch) {`
		447	`/* start at -1 and try to match something moving backward. note that`
		448	`* matching a string longer than the offset is perfectly valid, this`
		449	`* allows for encoding self-duplicating strings (see MVCOMP.TXT) */`
2086	mateusz.vi	450	`unsigned short maxoffset = 4096;`
2117	mateusz.vi	451	`unsigned short longestmatch = 0;`
		452	`unsigned short longestmatchoffset = 0;`
2086	mateusz.vi	453	`if (maxoffset > bytesprocessed) maxoffset = bytesprocessed;`
2083	mateusz.vi	454
2112	mateusz.vi	455	`for (offset = 1; offset <= maxoffset; offset++) {`
2117	mateusz.vi	456	`unsigned short matchingbytes;`
		457	`/* quick skip if first two bytes to not match (never interested in 1-byte matches) */`
		458	`if (((const unsigned short )src) != (const unsigned short )(src - offset)) continue;`
		459	`/* compute the exact number of bytes that match */`
		460	`matchingbytes = comparemem(src, src - offset, matchlen);`
		461	`if (matchingbytes == matchlen) {`
2086	mateusz.vi	462	`//printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]);`
2083	mateusz.vi	463	`goto FOUND;`
		464	`}`
2117	mateusz.vi	465	`if (matchingbytes > longestmatch) {`
		466	`longestmatch = matchingbytes;`
		467	`longestmatchoffset = offset ;`
		468	`}`
2083	mateusz.vi	469	`}`
2117	mateusz.vi	470	`/* is the longest match interesting? */`
		471	`if (longestmatch >= minmatch) {`
		472	`matchlen = longestmatch;`
		473	`offset = longestmatchoffset;`
		474	`goto FOUND;`
		475	`}`
2083	mateusz.vi	476	`}`
		477
2117	mateusz.vi	478	`NOMATCH:`
		479
2102	mateusz.vi	480	`/* if here: no match found, write a literal byte to queue */`
		481	`litqueue[litqueuelen++] = *src;`
2083	mateusz.vi	482	`src++;`
		483	`bytesprocessed++;`
2117	mateusz.vi	484	`*len -= 1;`
2102	mateusz.vi	485
		486	`/* dump literal queue to dst if max length reached */`
		487	`if (litqueuelen == 31) {`
		488	`complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);`
		489	`litqueuelen = 0;`
		490	`}`
2083	mateusz.vi	491	`continue;`
		492
		493	`FOUND: /* found a match of matchlen bytes at -offset */`
2102	mateusz.vi	494
		495	`/* dump awaiting literal queue to dst first */`
		496	`if (litqueuelen != 0) {`
		497	`complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);`
		498	`litqueuelen = 0;`
		499	`}`
		500
2117	mateusz.vi	501	`*dst = (unsigned short)((matchlen - 1) << 12) \| (offset - 1);`
2083	mateusz.vi	502	`dst++;`
		503	`src += matchlen;`
		504	`bytesprocessed += matchlen;`
2117	mateusz.vi	505	`*len -= matchlen;`
2083	mateusz.vi	506	`complen += 2;`
		507	`}`
		508
2102	mateusz.vi	509	`/* dump awaiting literal queue to dst first */`
		510	`if (litqueuelen != 0) {`
		511	`complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);`
		512	`litqueuelen = 0;`
		513	`}`
		514
2083	mateusz.vi	515	`return(complen);`
		516	`}`
		517
		518
		519	`/* write the language block (id, dict, strings) into the LNG file.`
		520	`* strings are compressed if compflag != 0 */`
2123	mateusz.vi	521	`static int svl_write_lang(const struct svl_lang l, FILE fd, int compflag, unsigned short *buffrequired) {`
1290	bernd.boec	522	`unsigned short strings_bytes = svl_strings_bytes(l);`
2083	mateusz.vi	523	`unsigned short langid = ((unsigned short )(&l->id));`
		524	`const char *stringsptr = l->strings;`
597	mateuszvis	525
2083	mateusz.vi	526	`/* if compressed then do the magic */`
		527	`if (compflag) {`
		528	`static char compstrings[65000];`
2089	mateusz.vi	529	`unsigned short comp_bytes;`
2117	mateusz.vi	530	`size_t stringslen = strings_bytes;`
2123	mateusz.vi	531	`unsigned short mvcompbytesahead;`
		532	`comp_bytes = mvcomp(compstrings, sizeof(compstrings), l->strings, &stringslen, &mvcompbytesahead);`
		533	`if (mvcompbytesahead + stringslen > *buffrequired) {`
		534	`*buffrequired = mvcompbytesahead + stringslen;`
		535	`}`
2089	mateusz.vi	536	`if (comp_bytes < strings_bytes) {`
2123	mateusz.vi	537	`printf("lang %c%c mvcomp-ressed (%u bytes -> %u bytes) mvcomp stream at most %u bytes ahead of raw data (%u bytes needed for in-place decomp)\n", l->id[0], l->id[1], strings_bytes, comp_bytes, mvcompbytesahead, strings_bytes + mvcompbytesahead);`
2089	mateusz.vi	538	`langid \|= 0x8000; /* LNG langblock flag that means "this lang is compressed" */`
		539	`strings_bytes = comp_bytes;`
		540	`stringsptr = compstrings;`
		541	`} else {`
		542	`printf("lang %c%c left UNCOMPRESSED (uncomp=%u bytes ; mvcomp=%u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);`
		543	`}`
2083	mateusz.vi	544	`}`
		545
		546	`return((fwrite(&langid, 1, 2, fd) == 2) &&`
1290	bernd.boec	547	`(fwrite(&strings_bytes, 1, 2, fd) == 2) &&`
		548	`(fwrite(l->dict, 1, svl_dict_bytes(l), fd) == svl_dict_bytes(l)) &&`
2083	mateusz.vi	549	`(fwrite(stringsptr, 1, strings_bytes, fd) == strings_bytes));`
1290	bernd.boec	550	`}`
		551
		552
1296	mateusz.vi	553	`static int svl_write_c_source(const struct svl_lang l, const char fn, unsigned short biggest_langsz) {`
1290	bernd.boec	554	`FILE *fd;`
		555	`int i;`
		556	`unsigned short strings_bytes = svl_strings_bytes(l);`
		557	`unsigned short nextnlat = 0;`
1295	mateusz.vi	558	`unsigned short allocsz;`
1290	bernd.boec	559
		560	`fd = fopen(fn, "wb");`
		561	`if (fd == NULL) {`
1295	mateusz.vi	562	`return(0);`
		563	`}`
1290	bernd.boec	564
2123	mateusz.vi	565	`allocsz = biggest_langsz + (biggest_langsz / 20);`
		566	`printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);`
1295	mateusz.vi	567	`fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");`
		568	`fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);`
		569	`fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);`
		570	`fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);`
		571
		572	`for (i = 0; i < strings_bytes; i++) {`
		573	`if (!fprintf(fd, "0x%02x", l->strings[i])) {`
		574	`fclose(fd);`
		575	`return(0);`
1290	bernd.boec	576	`}`
		577
1295	mateusz.vi	578	`if (i + 1 < strings_bytes) fprintf(fd, ",");`
		579	`nextnlat++;`
		580	`if (l->strings[i] == '\0' \|\| nextnlat == 16) {`
1290	bernd.boec	581	`fprintf(fd, "\r\n");`
1295	mateusz.vi	582	`nextnlat = 0;`
1290	bernd.boec	583	`}`
1295	mateusz.vi	584	`}`
		585	`fprintf(fd, "};\r\n\r\n");`
1290	bernd.boec	586
1295	mateusz.vi	587	`fprintf(fd, "unsigned short svarlang_dict[%u] = {\r\n", l->num_strings * 2);`
		588	`for (i = 0; i < l->num_strings; i++) {`
		589	`if (!fprintf(fd, "0x%04x,0x%04x", l->dict[i].id, l->dict[i].offset)) {`
		590	`fclose(fd);`
		591	`return(0);`
		592	`}`
		593	`if (i + 1 < l->num_strings) fprintf(fd, ",");`
		594	`fprintf(fd, "\r\n");`
1290	bernd.boec	595	`}`
1295	mateusz.vi	596	`fprintf(fd, "};\r\n");`
1290	bernd.boec	597
1295	mateusz.vi	598	`fclose(fd);`
		599
		600	`return(1);`
1290	bernd.boec	601	`}`
		602
		603
2014	bernd.boec	604	`static int svl_write_asm_source(const struct svl_lang l, const char fn, unsigned short biggest_langsz, int format) {`
		605	`FILE *fd;`
		606	`int i;`
		607	`unsigned short strings_bytes = svl_strings_bytes(l);`
		608	`unsigned short nextnlat = 0;`
		609	`unsigned short allocsz;`
		610
		611	`const char *public = (format == ASM_OUTPUT) ? "public" : "global";`
		612
		613	`fd = fopen(fn, "wb");`
		614	`if (fd == NULL) {`
		615	`return(0);`
		616	`}`
		617
		618	`allocsz = biggest_langsz + (biggest_langsz / 20);`
		619	`printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);`
		620	`fprintf(fd, "; THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY)\r\n");`
		621	`fprintf(fd, "%s svarlang_memsz\r\n", public);`
		622	`fprintf(fd, "svarlang_memsz dw %u\r\n", allocsz);`
		623	`fprintf(fd, "%s svarlang_string_count\r\n", public);`
		624	`fprintf(fd, "svarlang_string_count dw %u\r\n\r\n", l->num_strings);`
		625	`fprintf(fd, "%s svarlang_mem\r\n", public);`
		626	`fprintf(fd, "svarlang_mem:\r\n");`
		627
		628	`if (strings_bytes > 0) fprintf(fd, "db ");`
		629
		630	`for (i = 0; i < strings_bytes; i++) {`
2016	bernd.boec	631	`if (!fprintf(fd, "%u", l->strings[i])) {`
2014	bernd.boec	632	`fclose(fd);`
		633	`return(0);`
		634	`}`
		635
		636	`nextnlat++;`
		637	`if (l->strings[i] == '\0' \|\| nextnlat == 16) {`
		638	`fprintf(fd, "\r\n");`
		639	`if (i + 1 < strings_bytes ) fprintf(fd, "db ");`
		640	`nextnlat = 0;`
		641	`}`
		642	`else {`
		643	`fprintf(fd, ",");`
		644	`}`
		645	`}`
		646
		647	`fprintf(fd, "\r\n%s svarlang_dict\r\n", public);`
		648	`fprintf(fd, "svarlang_dict:\r\n");`
		649	`for (i = 0; i < l->num_strings; i++) {`
2016	bernd.boec	650	`if (!fprintf(fd, "dw %u,%u\r\n", l->dict[i].id, l->dict[i].offset)) {`
2014	bernd.boec	651	`fclose(fd);`
		652	`return(0);`
		653	`}`
		654	`}`
		655
		656	`fclose(fd);`
		657
		658	`return(1);`
		659	`}`
		660
		661
597	mateuszvis	662	`int main(int argc, char **argv) {`
2090	mateusz.vi	663	`FILE *fd;`
597	mateuszvis	664	`int ecode = 0;`
2014	bernd.boec	665	`int i, output_format = C_OUTPUT;`
2092	mateusz.vi	666	`int mvcomp_enabled = 1;`
2094	mateusz.vi	667	`int excref = 0;`
1061	mateusz.vi	668	`unsigned short biggest_langsz = 0;`
2015	bernd.boec	669	`struct svl_lang lang = NULL, reflang = NULL;`
597	mateuszvis	670
		671	`if (argc < 2) {`
1247	mateusz.vi	672	`puts("tlumacz ver " SVARLANGVER " - this tool is part of the SvarLANG project.");`
		673	`puts("converts a set of CATS-style translations in files EN.TXT, PL.TXT, etc");`
2094	mateusz.vi	674	`puts("into a single resource file (OUT.LNG). Also generates a deflang source");`
		675	`puts("file that contains a properly sized buffer pre-filled with the first");`
		676	`puts("(reference) language.");`
1247	mateusz.vi	677	`puts("");`
2108	mateusz.vi	678	`puts("usage: tlumacz [/c\|/asm\|/nasm\|/nodef] [/nocomp] [/excref] en fr pl ...");`
2092	mateusz.vi	679	`puts("");`
2094	mateusz.vi	680	`puts("/c generates deflang.c (default)");`
		681	`puts("/asm deflang ASM output");`
		682	`puts("/nasm deflang NASM output");`
2108	mateusz.vi	683	`puts("/nodef does NOT generate a deflang source file (only an LNG file)");`
2113	mateusz.vi	684	`puts("/nocomp disables the MVCOMP compression of strings in the LNG file");`
2094	mateusz.vi	685	`puts("/excref excludes ref lang from the LNG file (inserted to deflang only)");`
597	mateuszvis	686	`return(1);`
		687	`}`
		688
601	mateuszvis	689	`fd = fopen("out.lng", "wb");`
597	mateuszvis	690	`if (fd == NULL) {`
2014	bernd.boec	691	`fprintf(stderr, "ERROR: FAILED TO CREATE OR OPEN OUT.LNG");`
597	mateuszvis	692	`return(1);`
		693	`}`
		694
		695	`/* write lang blocks */`
		696	`for (i = 1; i < argc; i++) {`
		697	`unsigned short sz;`
		698	`char id[3];`
		699
2014	bernd.boec	700	`if (!strcmp(argv[i], "/c")) {`
		701	`output_format = C_OUTPUT;`
		702	`continue;`
2090	mateusz.vi	703	`} else if (!strcmp(argv[i], "/asm")) {`
2014	bernd.boec	704	`output_format = ASM_OUTPUT;`
		705	`continue;`
		706	`} else if(!strcmp(argv[i], "/nasm")) {`
		707	`output_format = NASM_OUTPUT;`
		708	`continue;`
2092	mateusz.vi	709	`} else if(!strcmp(argv[i], "/nocomp")) {`
		710	`mvcomp_enabled = 0;`
2090	mateusz.vi	711	`continue;`
2108	mateusz.vi	712	`} else if(!strcmp(argv[i], "/nodef")) {`
		713	`output_format = NO_OUTPUT;`
		714	`continue;`
2094	mateusz.vi	715	`} else if(!strcmp(argv[i], "/excref")) {`
		716	`excref = 1;`
		717	`continue;`
2014	bernd.boec	718	`}`
		719
597	mateuszvis	720	`if (strlen(argv[i]) != 2) {`
2014	bernd.boec	721	`fprintf(stderr, "INVALID LANG SPECIFIED: %s\r\n", argv[i]);`
597	mateuszvis	722	`ecode = 1;`
2015	bernd.boec	723	`goto exit_main;`
597	mateuszvis	724	`}`
		725	`id[0] = argv[i][0];`
		726	`id[1] = argv[i][1];`
		727	`id[2] = 0;`
		728
1290	bernd.boec	729	`if ((lang = svl_lang_new(id, DICT_CAP, STRINGS_CAP)) == NULL) {`
2014	bernd.boec	730	`fprintf(stderr, "OUT OF MEMORY\r\n");`
2015	bernd.boec	731	`ecode = 1;`
		732	`goto exit_main;`
1290	bernd.boec	733	`}`
		734
		735	`sz = svl_lang_from_cats_file(lang, reflang);`
597	mateuszvis	736	`if (sz == 0) {`
2014	bernd.boec	737	`fprintf(stderr, "ERROR COMPUTING LANG '%s'\r\n", id);`
597	mateuszvis	738	`ecode = 1;`
2015	bernd.boec	739	`goto exit_main;`
597	mateuszvis	740	`} else {`
		741	`printf("computed %s lang block of %u bytes\r\n", id, sz);`
1061	mateusz.vi	742	`if (sz > biggest_langsz) biggest_langsz = sz;`
597	mateuszvis	743	`}`
1290	bernd.boec	744	`svl_compact_lang(lang);`
		745
		746	`/* write header if first (reference) language */`
2090	mateusz.vi	747	`if (!reflang) {`
1290	bernd.boec	748	`if (!svl_write_header(lang->num_strings, fd)) {`
2014	bernd.boec	749	`fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");`
1290	bernd.boec	750	`ecode = 1;`
2015	bernd.boec	751	`goto exit_main;`
1290	bernd.boec	752	`}`
		753	`}`
1293	mateusz.vi	754
1290	bernd.boec	755	`/* write lang ID to file, followed string table size, and then`
2094	mateusz.vi	756	`the dictionary and string table for current language`
		757	`skip this for reference language if /excref given */`
		758	`if ((reflang != NULL) \|\| (excref == 0)) {`
2123	mateusz.vi	759	`/* also updates the biggest_langsz variable to accomodate enough space`
		760	`* for in-place decompression of mvcomp-compressed lang blocks */`
		761	`if (!svl_write_lang(lang, fd, mvcomp_enabled, &biggest_langsz)) {`
2094	mateusz.vi	762	`fprintf(stderr, "ERROR WRITING TO OUTPUT FILE\r\n");`
		763	`ecode = 1;`
		764	`goto exit_main;`
		765	`}`
2095	mateusz.vi	766	`} else {`
		767	`puts("ref language NOT saved in the LNG file (/excref)");`
597	mateuszvis	768	`}`
1290	bernd.boec	769
1061	mateusz.vi	770	`/* remember reference data for other languages */`
2014	bernd.boec	771	`if (!reflang) {`
1290	bernd.boec	772	`reflang = lang;`
1295	mateusz.vi	773	`} else {`
1290	bernd.boec	774	`svl_lang_free(lang);`
		775	`lang = NULL;`
		776	`}`
597	mateuszvis	777	`}`
		778
2014	bernd.boec	779	`if (!reflang) {`
		780	`fprintf(stderr, "ERROR: NO LANGUAGE GIVEN\r\n");`
2015	bernd.boec	781	`ecode = 1;`
		782	`goto exit_main;`
1061	mateusz.vi	783	`}`
		784
2108	mateusz.vi	785	`/* compute the deflang file containing a dump of the reference lang block */`
2014	bernd.boec	786	`if (output_format == C_OUTPUT) {`
		787	`if (!svl_write_c_source(reflang, "deflang.c", biggest_langsz)) {`
		788	`fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.C\r\n");`
		789	`ecode = 1;`
2083	mateusz.vi	790	`}`
2108	mateusz.vi	791	`} else if ((output_format == ASM_OUTPUT) \|\| (output_format == NASM_OUTPUT)) {`
2014	bernd.boec	792	`if (!svl_write_asm_source(reflang, "deflang.inc", biggest_langsz, output_format)) {`
		793	`fprintf(stderr, "ERROR: FAILED TO OPEN OR CREATE DEFLANG.INC\r\n");`
		794	`ecode = 1;`
		795	`}`
1290	bernd.boec	796	`}`
		797
2015	bernd.boec	798	`exit_main:`
2108	mateusz.vi	799	`if (lang && (lang != reflang)) {`
2015	bernd.boec	800	`svl_lang_free(lang);`
		801	`}`
		802	`if (reflang) {`
		803	`svl_lang_free(reflang);`
		804	`reflang = NULL;`
2083	mateusz.vi	805	`lang = NULL;`
2015	bernd.boec	806	`}`
2014	bernd.boec	807
2015	bernd.boec	808	`fclose(fd);`
		809
597	mateuszvis	810	`return(ecode);`
		811	`}`

Subversion Repositories SvarDOS

(root)/svarlang.lib/trunk/tlumacz.c – Rev 2143