I am attempting to build a parser for BBCode in JavaScript that will allow me to transpile a string with BBCode in it to a string with HTML. I have in my head how it is all supposed to work and I even have two of the parser steps built.
Right now the entire process of the parser can be described as
- Get input
- Break input into tokens (tokenize)
- Add information about the tokens (lex)
- Build the AST from the tokens (parse)
- Clean up the AST based on grammar rules (clean)
- Evaluate AST and transform to HTML (evaluate)
- Return the HTML string
I have a general idea of how to do all of this in my head except for step four.
When I reached step four I ran into a problem when building the AST. The problem was how would I go about recursively building this tree. I have in the past recursively built two dimensional arrays but a variable depth tree is way out of my scope of abilities.
In my head I think that the tree should look something like this:
// Hello, [b]World![/b]
{
    "text": "Hello, ",
    "tag": {
        "type": "b",
        "text": "World!"
    }
}
But when trying to generate this I have an issue with recursively building this down.
A more complex example would be as follows:
// [c=red]Hello Tom, [/c][b][c=green]how are you?[/c][/b]
{
    "tag": {
        type: "c",
        "parameters": "red",
        "text": "Hello Tom, "
        "tag": {
            "type": "b",
            "tag": {
                "type": "c",
                "parameters": "green",
                "text": "how are you?"
            }
        }
    }
}
The main issue I run across is keeping my place while building down without accidentally overwriting the entire tree.
Currently the code I am using is:
var bbcode = {};
bbcode._tokens = {
    'TO_DEL': '[',
    'TC_DEL': ']',
    'TE_DEL': '/',
    'EQ_DEL': '='
};
bbcode._tags = ['c', 'b'];
bbcode.parse = function(bbcode) {
    var tokens = this._tokenize(bbcode);
        tokens = this._lex(tokens);
    var ast = this._parse(tokens);
    console.log(JSON.stringify(ast, null, 4));
    //return tokens;
};
bbcode._isToken = function(token) {
    for (var k in this._tokens) {
        if (this._tokens[k] === token) {
            return true;
        }
    }
    return false;
};
bbcode._isTag = function(token) {
    return (this._tags.indexOf(token) > -1) ? true : false;
};
bbcode._getType = function(token) {
    for (var k in this._tokens) {
        if (this._tokens[k] === token) {
            return k;
        }
    }   
};
bbcode._next = function(tokens, curr) {
    return tokens[curr + 1][0];
};
bbcode._previous = function(tokens, curr) {
    return tokens[curr - 1][0];
};
bbcode._tokenize = function(bbcode) {
    var tree = [];
    var temp = '';
    for (var i = 0; i < bbcode.length; i++) {
        if (this._isToken(bbcode[i])) {
            if (temp.length > 0) {
                tree.push(temp);
                temp = '';
            }
            tree.push(bbcode[i]);
        } else {
            temp += bbcode[i];
        }
    }
    return tree;
};
bbcode._lex = function(tokens) {
    var tree = [];
    for (var i = 0; i < tokens.length; i++) {
        if (this._isToken(tokens[i])) {
            tree.push([this._getType(tokens[i]), tokens[i]]);
        } else if (this._isTag(tokens[i])) {
            tree.push(['BB_TAG', tokens[i]]);
        } else {
            tree.push(['BB_STRING', tokens[i]]);
        }
    }
    return tree;
};
/*****************************************************************************/
/* I need help with the block below                                          */
/*****************************************************************************/
bbcode._parse = function(tokens) {
    var tree = {};
    for (var i = 0; i < tokens.length; i++) {
        if (tokens[i][0] === 'BB_STRING') {
            if (tree['text']) {
                tree['text'] += tokens[i][1];
            } else {
                tree['text'] = tokens[i][1];
            }
        } else if (tokens[i][0] === 'TO_DEL') {
            if (this._next(tokens, i) === 'BB_TAG') {
                tree['tag'] = {};
            } else {
                if (tree['text']) {
                    tree['text'] += tokens[i][1];
                } else {
                    tree['text'] = tokens[i][1];
                }
            }
        }
    }
    return tree;
};
/*****************************************************************************/
 
    