// sflx-decode.c : Houses sflx, which takes as input a buffer containing a sflx
// sound file and writes out raw PCM audio at 11025Hz into an outgoing buffer.
//
// Written by Eric C. Peterson, licensed under UoI/NCSA.
// Revision history:
// 	+ 2009 Feb 09 : Initial release

#include <string.h>     // for memcpy
#include <stdio.h>      // for fprintf and stderr

#include <sys/stat.h>   // the following were just for testing
#include <stdlib.h>
#include <assert.h>

#define MAX_BLOCKSIZE 256 // well, _i_ haven't seen one larger than this :P

// utility function.  linearly interpolates the string of (size)-many bytes in
// ptr to get a string of (size*2)-many bytes
void interpolate2(unsigned char* incoming, int size) {
    unsigned char buffer[MAX_BLOCKSIZE * 4];
    unsigned char *bufferptr = buffer, *dataptr = incoming;

    // while we still have bytes left...
    while (dataptr - incoming < size) {
        // ... interpolate
        *bufferptr = *dataptr;
        *(bufferptr+1) = (*dataptr + *(dataptr+1))/2;
        bufferptr += 2;
        dataptr++;
    }

    memcpy(incoming, buffer, size*2);

    return;
}

// same thing, but outputs a string of length (size*4) bytes
void interpolate4(unsigned char* incoming, int size) {
    unsigned char buffer[MAX_BLOCKSIZE * 4];
    unsigned char *bufferptr = buffer, *dataptr = incoming;

    // while we still have bytes left...
    while (dataptr - incoming < size) {
        // ... interpolate
        *bufferptr = *dataptr;
        *(bufferptr+1) = (*dataptr * 3 + *(dataptr+1))/4;
        *(bufferptr+2) = (*dataptr + *(dataptr+1))/2;
        *(bufferptr+3) = (*dataptr + *(dataptr+1) * 3)/4;
        bufferptr += 4;
        dataptr++;
    }

    memcpy(incoming, buffer, size*4);

    return;
}

// factored out code that does the table and dpcm decoding, look for invocation
// in the block decoding loop in sflx.
#define MAX_DELTAS      16      // 4 bits max means 16 deltas max
#define SILENCE         0x80    // pcm is shifted by 128, this is secretly zero
int process_dpcm(char *inptr, char *frame, int framesize,
               unsigned int *accumulator, int bits_per_word) {
    long deltas[MAX_DELTAS];
    int frame_index = 0;
    int ret = 0;

    // grab our deltas
    int delta_index;
    for (delta_index = 0; delta_index < (1 << bits_per_word); delta_index++) {
        deltas[delta_index] = (inptr[ret++] << 1) - SILENCE;
    }

    do {
        int word_index;
        int in_word = *(inptr + ret);
        ret++;

        for (word_index = 0; word_index < (8 / bits_per_word); word_index++) {
            // build the new byte from the appropriate delta and
            // whatever's stored in the DPCM accumulator
            int new_byte =
                deltas[(in_word >> word_index*bits_per_word) &
                       ((1 << bits_per_word) - 1)] + *accumulator;

            // clip it, clip it good
            if (new_byte > 127)
                new_byte = 127;
            else if (new_byte < -127)
                new_byte = -127;

            // store our processed byte to the accumlator and to
            // the frame, shift our frame position
            *accumulator = new_byte;
            frame[frame_index++] = new_byte + SILENCE;
        }
    } while (frame_index < framesize);

    return ret;
}

// a number of flag masks used in the sflx format
#define HALF_FRAME      0x40
#define QUARTER_FRAME   0x80
#define CONTROL_MASK    0x0f
#define CONTROL_ZEROES  0x00
#define CONTROL_NOP     0x01
#define CONTROL_RAW     0x05
#define CONTROL_DPCM1   0x02 // use one bit to choose a delta
#define CONTROL_DPCM2   0x03 // use two bits to choose a delta
#define CONTROL_DPCM4   0x04 // use four bits to choose a delta

// sits at the start of a sflx file.  the __attribute__ compiler tag is to
// signal gcc not to pad this out to 16 bytes.  don't know if there's an msvc
// equivalent, since that's what MechVM seems to be using.
typedef struct {
    char title[4];
    long filesize;
    long block_count;
    short blocksize;
} __attribute__((packed)) SFLX_HEADER;

// decodes an SFLX file sitting in incoming, writes pcm to outgoing
char* sflx(char *incoming, char *outgoing) {
    // keep track of where we're writing to and reading from
    char* outptr = outgoing, *inptr = incoming;
    // used for the D in DPCM :)
    int accumulator = 0;
    // the rest will get set up in the header
    long iter = 0, blocksize = 0;

    // decode the header to see how many blocks we're expecting
    SFLX_HEADER *header = (SFLX_HEADER*)incoming;
    blocksize = header->blocksize;
    iter = header->block_count;
    inptr += sizeof(SFLX_HEADER);

    // now we loop through the blocks
    do {
        unsigned char frame[MAX_BLOCKSIZE * 4];
        int framesize;
        int header = *inptr;

        // the basic structure of the block is to have a header at the front,
        // which contains info about ...
        //  1) what sample rate we're storing, and
        //  2) what decoding method to use.
        // this is the information that we strip out first.

        // use the header to decode how large we want our frame to be
        if (header & HALF_FRAME)
            framesize = blocksize / 2;
        else if (header & QUARTER_FRAME)
            framesize = blocksize / 4;
        else
            framesize = blocksize;

        inptr++; // start looking at actual data
        switch (*(inptr-1) & CONTROL_MASK) { // various decoding methods follow:

            case CONTROL_ZEROES:
                // this control code means we fill the frame with silence
                memset(frame, SILENCE, framesize);
                accumulator = 0;
            break;

            case CONTROL_NOP:
                // don't do anything.
            break;

            // from here, all the dpcm blocks are similarly structured.
            // following their one-byte header, which contains information
            // about the granularity of the sound to follow (to be explained in
            // a moment), they contain a table of signed values we call deltas.
            // after we read in the delta table, all the data that follows is
            // broken down into 1, 2, or 4-bit "words".  each of these words
            // selects which delta value to use, which we add to whatever our
            // pcm was just sitting at (hence "delta") and then loop.

            case CONTROL_DPCM1:
                inptr += process_dpcm(inptr, frame, framesize, &accumulator, 1);
            break;

            case CONTROL_DPCM2:
                inptr += process_dpcm(inptr, frame, framesize, &accumulator, 2);
            break;

            case CONTROL_DPCM4:
                inptr += process_dpcm(inptr, frame, framesize, &accumulator, 4);
            break;

            case CONTROL_RAW:
                memcpy(frame, inptr, framesize);
                inptr += framesize;
                accumulator = frame[framesize - 1] - SILENCE;
            break;

            // shouldn't ever get here, i don't think.
            default:
                fprintf(stderr, "Error decoding SFLX file (%d).\n", *(inptr-1) & CONTROL_MASK);
            break;
        }

        // now we need to rescale if this segment was sampled at a lower rate
        if (header & HALF_FRAME)
            interpolate2(frame, blocksize);
        else if (header & QUARTER_FRAME)
            interpolate4(frame, blocksize);

        // finally, store the data to outptr
        memcpy(outptr, frame, blocksize);
        outptr += blocksize;
    } while (--iter);

    return outgoing;
}

// and a small main routine to demonstrate
int main(int argc, char** argv) {
    if (argc != 3) {
        fprintf(stderr, "Reads SFLX from file <arg1> and writes PCM to file <arg2>.\n");
        exit(-1);
    }

    FILE *fh_in = fopen(argv[1], "r"), *fh_out = fopen(argv[2], "w");
    assert(fh_in && fh_out);

    struct stat in_stat;
    assert(!stat(argv[1], &in_stat));

    char *in_buf = malloc(in_stat.st_size);
    assert(in_buf);
    fread(in_buf, in_stat.st_size, 1, fh_in);
    SFLX_HEADER* header = (SFLX_HEADER*)in_buf;
    char *out_buf = malloc(header->block_count * header->blocksize);
    assert(out_buf);
    sflx(in_buf, out_buf);
    fwrite(out_buf, header->blocksize, header->block_count, fh_out);
    fclose(fh_in);
    fclose(fh_out);

    return 0;
}
