Subversion Repositories SvarDOS

Rev

Rev 2113 | Rev 2123 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2113 Rev 2117
Line 370... Line 370...
370
  /* are we done? (also take care of guys calling me in for jokes) */
370
  /* are we done? (also take care of guys calling me in for jokes) */
371
  if (qlen == 0) return(complen);
371
  if (qlen == 0) return(complen);
372
 
372
 
373
  qlen--; /* now it's between 0 and 30 */
373
  qlen--; /* now it's between 0 and 30 */
374
  /* write the length and first char */
374
  /* write the length and first char */
375
  **dst = ((qlen / 2) << 8) | q[0];
375
  **dst = (unsigned short)((qlen / 2) << 8) | q[0];
376
  *dst += 1;
376
  *dst += 1;
377
  q++;
377
  q++;
378
  complen += 2;
378
  complen += 2;
379
 
379
 
380
  /* anything left? */
380
  /* anything left? */
Line 392... Line 392...
392
  /* one byte might still be left if it did not fit inside a word */
392
  /* one byte might still be left if it did not fit inside a word */
393
  goto AGAIN;
393
  goto AGAIN;
394
}
394
}
395
 
395
 
396
 
396
 
-
 
397
/* compare up to n bytes of locations s1 and s2, returns the amount of same bytes (0..n) */
-
 
398
static unsigned short comparemem(const unsigned char *s1, const unsigned char *s2, unsigned short n) {
-
 
399
  unsigned short i;
-
 
400
  for (i = 0; (i < n) && (s1[i] == s2[i]); i++);
-
 
401
  return(i);
-
 
402
}
-
 
403
 
-
 
404
 
397
/* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size */
405
/* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size
-
 
406
 * updates len with the number of input bytes left unprocessed */
398
static unsigned short mvcomp(char *dstbuf, const char *src, unsigned short len) {
407
static unsigned short mvcomp(void *dstbuf, size_t dstbufsz, const unsigned char *src, size_t *len) {
399
  unsigned short complen = 0;
408
  unsigned short complen = 0;
400
  unsigned short *dst = (void *)dstbuf;
409
  unsigned short *dst = dstbuf;
401
  unsigned short bytesprocessed = 0;
410
  unsigned short bytesprocessed = 0;
402
  unsigned char litqueue[32];
411
  unsigned char litqueue[32];
403
  unsigned char litqueuelen = 0;
412
  unsigned char litqueuelen = 0;
404
 
413
 
405
  /* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */
414
  /* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */
406
  while (len > 0) {
415
  while (*len > 0) {
407
    unsigned short matchlen;
416
    unsigned short matchlen;
408
    unsigned short minmatch;
417
    unsigned short minmatch;
409
    unsigned short offset;
418
    unsigned short offset;
410
    matchlen = 16;
419
    matchlen = 16;
411
    if (len < matchlen) matchlen = len;
420
    if (*len < matchlen) matchlen = (unsigned short)(*len);
-
 
421
 
-
 
422
    /* abort if no space in output buffer, but do NOT break a literal queue */
-
 
423
    if ((complen >= dstbufsz - 32) && (litqueuelen == 0)) return(complen);
412
 
424
 
413
    /* look for a minimum match of 2 bytes, unless I have some pending literal bytes
425
    /* look for a minimum match of 2 bytes, unless I have some pending literal bytes
414
     * awaiting, in which case I am going through a new data pattern and it is more
426
     * awaiting, in which case I am going through a new data pattern and it is more
415
     * efficient to wait for a 3-bytes match before breaking the literal string */
427
     * efficient to wait for a longer match before breaking the literal string */
416
    if (litqueuelen != 0) {
428
    if (litqueuelen & 1) {
-
 
429
      minmatch = 3; /* breaking an uneven queue is less expensive */
417
      minmatch = 3;
430
    } else if (litqueuelen > 0) {
-
 
431
      goto NOMATCH; /* breaking an even-sized literal queue is never a good idea */
418
    } else {
432
    } else {
419
      minmatch = 2;
433
      minmatch = 2;
420
    }
434
    }
421
 
435
 
422
    for (; matchlen >= minmatch; matchlen--) {
436
    if (matchlen >= minmatch) {
423
      /* start at -1 and try to match something moving backward */
437
      /* start at -1 and try to match something moving backward. note that
-
 
438
       * matching a string longer than the offset is perfectly valid, this
-
 
439
       * allows for encoding self-duplicating strings (see MVCOMP.TXT) */
424
      unsigned short maxoffset = 4096;
440
      unsigned short maxoffset = 4096;
-
 
441
      unsigned short longestmatch = 0;
-
 
442
      unsigned short longestmatchoffset = 0;
425
      if (maxoffset > bytesprocessed) maxoffset = bytesprocessed;
443
      if (maxoffset > bytesprocessed) maxoffset = bytesprocessed;
426
 
444
 
427
      for (offset = 1; offset <= maxoffset; offset++) {
445
      for (offset = 1; offset <= maxoffset; offset++) {
-
 
446
        unsigned short matchingbytes;
-
 
447
        /* quick skip if first two bytes to not match (never interested in 1-byte matches) */
-
 
448
        if (*((const unsigned short *)src) != *(const unsigned short *)(src - offset)) continue;
-
 
449
        /* compute the exact number of bytes that match */
428
        if (memcmp(src, src - offset, matchlen) == 0) {
450
        matchingbytes = comparemem(src, src - offset, matchlen);
-
 
451
        if (matchingbytes == matchlen) {
429
          //printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]);
452
          //printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]);
430
          goto FOUND;
453
          goto FOUND;
431
        }
454
        }
-
 
455
        if (matchingbytes > longestmatch) {
-
 
456
          longestmatch = matchingbytes;
-
 
457
          longestmatchoffset = offset ;
-
 
458
        }
-
 
459
      }
-
 
460
      /* is the longest match interesting? */
-
 
461
      if (longestmatch >= minmatch) {
-
 
462
        matchlen = longestmatch;
-
 
463
        offset = longestmatchoffset;
-
 
464
        goto FOUND;
432
      }
465
      }
433
    }
466
    }
434
 
467
 
-
 
468
    NOMATCH:
-
 
469
 
435
    /* if here: no match found, write a literal byte to queue */
470
    /* if here: no match found, write a literal byte to queue */
436
    litqueue[litqueuelen++] = *src;
471
    litqueue[litqueuelen++] = *src;
437
    src++;
472
    src++;
438
    bytesprocessed++;
473
    bytesprocessed++;
439
    len--;
474
    *len -= 1;
440
 
475
 
441
    /* dump literal queue to dst if max length reached */
476
    /* dump literal queue to dst if max length reached */
442
    if (litqueuelen == 31) {
477
    if (litqueuelen == 31) {
443
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
478
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
444
      litqueuelen = 0;
479
      litqueuelen = 0;
Line 451... Line 486...
451
    if (litqueuelen != 0) {
486
    if (litqueuelen != 0) {
452
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
487
      complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen);
453
      litqueuelen = 0;
488
      litqueuelen = 0;
454
    }
489
    }
455
 
490
 
456
    *dst = ((matchlen - 1) << 12) | (offset - 1);
491
    *dst = (unsigned short)((matchlen - 1) << 12) | (offset - 1);
457
    dst++;
492
    dst++;
458
    src += matchlen;
493
    src += matchlen;
459
    bytesprocessed += matchlen;
494
    bytesprocessed += matchlen;
460
    len -= matchlen;
495
    *len -= matchlen;
461
    complen += 2;
496
    complen += 2;
462
  }
497
  }
463
 
498
 
464
  /* dump awaiting literal queue to dst first */
499
  /* dump awaiting literal queue to dst first */
465
  if (litqueuelen != 0) {
500
  if (litqueuelen != 0) {
Line 480... Line 515...
480
 
515
 
481
  /* if compressed then do the magic */
516
  /* if compressed then do the magic */
482
  if (compflag) {
517
  if (compflag) {
483
    static char compstrings[65000];
518
    static char compstrings[65000];
484
    unsigned short comp_bytes;
519
    unsigned short comp_bytes;
-
 
520
    size_t stringslen = strings_bytes;
485
    comp_bytes = mvcomp(compstrings, l->strings, strings_bytes);
521
    comp_bytes = mvcomp(compstrings, sizeof(compstrings), l->strings, &stringslen);
486
    if (comp_bytes < strings_bytes) {
522
    if (comp_bytes < strings_bytes) {
487
      printf("lang %c%c mvcomp-ressed (%u bytes -> %u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
523
      printf("lang %c%c mvcomp-ressed (%u bytes -> %u bytes)\n", l->id[0], l->id[1], strings_bytes, comp_bytes);
488
      langid |= 0x8000; /* LNG langblock flag that means "this lang is compressed" */
524
      langid |= 0x8000; /* LNG langblock flag that means "this lang is compressed" */
489
      strings_bytes = comp_bytes;
525
      strings_bytes = comp_bytes;
490
      stringsptr = compstrings;
526
      stringsptr = compstrings;
Line 510... Line 546...
510
  fd = fopen(fn, "wb");
546
  fd = fopen(fn, "wb");
511
  if (fd == NULL) {
547
  if (fd == NULL) {
512
    return(0);
548
    return(0);
513
  }
549
  }
514
 
550
 
-
 
551
  /* the maximum mvcomp overhead is 3.23% (ie. an increase of 1 byte for every
-
 
552
   * 31 bytes of source, because 31 source bytes are encoded as a 32 bytes
-
 
553
   * sequence). The necessary buffer for in-place decompression is therefore
-
 
554
   * FILESIZE + (FILESIZE / 31) + 1 */
515
  allocsz = biggest_langsz + (biggest_langsz / 20);
555
  allocsz = biggest_langsz + (biggest_langsz / 31) + 1;
516
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (5%% safety margin)\n", biggest_langsz, allocsz);
556
  printf("biggest lang block is %u bytes -> allocating a %u bytes buffer (3.23%% margin for inplace mvcomp decompression)\n", biggest_langsz, allocsz);
517
  fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
557
  fprintf(fd, "/* THIS FILE HAS BEEN GENERATED BY TLUMACZ (PART OF THE SVARLANG LIBRARY) */\r\n");
518
  fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
558
  fprintf(fd, "const unsigned short svarlang_memsz = %uu;\r\n", allocsz);
519
  fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
559
  fprintf(fd, "const unsigned short svarlang_string_count = %uu;\r\n\r\n", l->num_strings);
520
  fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
560
  fprintf(fd, "char svarlang_mem[%u] = {\r\n", allocsz);
521
 
561