BPE Compression Code converted to Python error

Programming related discussions related to game research
eatrawmeat391
Posts: 9
Joined: Sun Jul 17, 2016 5:23 am

BPE Compression Code converted to Python error

Post by eatrawmeat391 »

I tried to convert aluigi's yuke_bpe.c code to python for decompression BPE files in memory.The code works fine for most of the files,however there is one file that it fails to decompress properly.This is not the bug from his source since quickbms unpacks them properly.
Can anyone tell me what is wrong with this converted code?

Code: Select all

from binascii import hexlify
from io import BytesIO
import numpy

def read_int(content,offset,size):
    string   = content[offset:offset+size]
    string   = string[::-1]
    hex_data = hexlify(string)   
    return int(hex_data, 16)
   
def xgetc(input_string):
    try:
        # ord return an int of a ASCII character
        return ord(input_string.read(1))
    except:
        # If reaches EOF then ord will take a string size of 0 which raises Exception
        return -1   
   
def memset(bytes, char ,time):
    # chr converts an int to a ASCII character
    # 'ac' * 4 = 'acacacac'
    replace_string = chr(char) * time
    # bytes is a file like object
    offset = bytes.tell()
    bytes.seek(0)
    bytes.write(replace_string)
    bytes.seek(offset)
    return
   
def yuke_bpe(input_buffer,unbpe_size,fillout_size):
    in_buf = BytesIO(input_buffer)
    in_buf.seek(0)
    out_buf = BytesIO()
    stack = numpy.empty((512+4096), dtype=numpy.uint8) # stack can also be a list if you don't like numpy
    count = 0
    while True:
        i = 0
        while True:
            c = xgetc(in_buf)
            if c < 0:
                break
            if c > 127:
                c -= 127
                while c > 0 and i < 256:
                    stack[i * 2] = i
                    c -= 1
                    i += 1
            c += 1
            while c > 0 and i < 256:
                n = xgetc(in_buf)
                if n < 0:
                    break
                stack[i * 2] = n
                if i != n:
                    n = xgetc(in_buf)
                    if n < 0:
                        break
                    stack[(i * 2) + 1] = n
                c -= 1
                i += 1
            if not i < 256:
                break
               
        n = xgetc(in_buf)
        if n < 0:
            break
        size = n
        n = xgetc(in_buf)
        if n < 0:
            break
        size |= (n << 8)
       
        while (size | count) != 0:
            if count != 0:
                count -= 1
                n = stack[count + 512]
            else:
                n = xgetc(in_buf)
                if n < 0:
                    break
                size -= 1
            c = stack[n * 2]
            if n == c:
                if len(out_buf.getvalue()) >= unbpe_size:
                    return out_buf.getvalue()
                out_buf.write(chr(n))
            else:
                if (count + 512 + 2) > (512 + 4096): # (512 + 4096 is the sizeof(stack))
                    return out_buf.getvalue()
                stack[count + 512] = stack[(n * 2) + 1];
                stack[count + 512 + 1] = c
                count += 2
    if fillout_size != 0: # this is what is wanted by the format
        memset(out_buf,0,(unbpe_size-len(out_buf.getvalue())))
    return out_buf.getvalue() # getvalue returns all of the bytes in a BytesIO without changing the cursor
   
def extract_bpe(input_buffer):
    unbpe_size = read_int(input_buffer, 0x0C, 4)
    unbpe_data = yuke_bpe(input_buffer[0x10:len(input_buffer)], unbpe_size, 1)
    return unbpe_data


Original Code

Code: Select all

/*
  by Luigi Auriemma

reversed from asmodean's unrrbpe.exe
*/

#include <string.h>

static int xgetc(unsigned char **in, unsigned char *inl) {
    int     ret;
    if(*in >= inl) return(-1);
    ret = **in;
    (*in)++;
    return(ret);
}

int yuke_bpe(unsigned char *in, int insz, unsigned char *out, int outsz, int fill_outsz) {
    unsigned char   stack[512 + 4096];
    int             c,
                    count,
                    i,
                    size,
                    n;

    unsigned char   *inl,
                    *o,
                    *outl;

    inl  = in + insz;
    o    = out;
    outl = out + outsz;

    count = 0;
    for(;;) {
        i = 0;
        do {
            if((c = xgetc(&in, inl)) < 0) break;
            if(c > 127) {
                c -= 127;
                while((c > 0) && (i < 256)) {
                    stack[i * 2] = i;
                    c--;
                    i++;
                }
            }
            c++;
            while((c > 0) && (i < 256)) {
                if((n = xgetc(&in, inl)) < 0) break;
                stack[i * 2] = n;
                if(i != n) {
                    if((n = xgetc(&in, inl)) < 0) break;
                    stack[(i * 2) + 1] = n;
                }
                c--;
                i++;
            }
        } while(i < 256);

        if((n = xgetc(&in, inl)) < 0) break;
        size = n;
        if((n = xgetc(&in, inl)) < 0) break;
        size |= (n << 8);

        while(size || count) {
            if(count) {
                count--;
                n = stack[count + 512];
            } else {
                if((n = xgetc(&in, inl)) < 0) break;
                size--;
            }
            c = stack[n * 2];
            if(n == c) {
                if(o >= outl) return(-1);
                *o++ = n;
            } else {
                if((count + 512 + 2) > sizeof(stack)) return(-1);
                stack[count + 512] = stack[(n * 2) + 1];
                stack[count + 512 + 1] = c;
                count += 2;
            }
        }
    }
    if(fill_outsz) {    // this is what is wanted by the format
        memset(o, 0, outl - o);
        o = outl;
    }
    return(o - out);
}
eatrawmeat391
Posts: 9
Joined: Sun Jul 17, 2016 5:23 am

Re: BPE Compression Code converted to Python error

Post by eatrawmeat391 »

I have found out where the problem is.I forgot that o is an increasing pointer.The code worked when I corrected the memset function.

Code: Select all

def memset(bytes, char ,time):
    # chr converts an int to a ASCII character
    # 'ac' * 4 = 'acacacac'
    replace_string = chr(char) * time
    bytes.write(replace_string)
    return

Thanks for the people who have looked at my files