/* Tokenizer for JavaScript code */
var tokenizeJavaScript = (function () {
    // Advance the stream until the given character (not preceded by a
    // backslash) is encountered, or the end of the line is reached.
    function nextUntilUnescaped(source, end) {
        var escaped = false;
        var next;
        while (!source.endOfLine()) {
            var next = source.next();
            if (next == end && !escaped)
                return false;
            escaped = !escaped && next == "\\";
        }
        return escaped;
    }
    // A map of JavaScript's keywords. The a/b/c keyword distinction is
    // very rough, but it gives the parser enough information to parse
    // correct code correctly (we don't care that much how we parse
    // incorrect code). The style information included in these objects
    // is used by the highlighter to pick the correct CSS style for a
    // token.
    var keywords = function () {
        function result(type, style) {
            return {type: type, style: "js-" + style};
        }
        // keywords that take a parenthised expression, and then a
        // statement (if)
        var keywordA = result("keyword a", "keyword");
        // keywords that take just a statement (else)
        var keywordB = result("keyword b", "keyword");
        // keywords that optionally take an expression, and form a
        // statement (return)
        var keywordC = result("keyword c", "keyword");
        var operator = result("operator", "keyword");
        var atom = result("atom", "atom");
        return {
            "if": keywordA, "while": keywordA, "with": keywordA,
            "else": keywordB, "do": keywordB, "try": keywordB, "finally": keywordB,
            "return": keywordC, "break": keywordC, "continue": keywordC, "new": keywordC, "delete": keywordC, "throw": keywordC,
            "in": operator, "typeof": operator, "instanceof": operator,
            "var": result("var", "keyword"), "function": result("function", "keyword"), "catch": result("catch", "keyword"),
            "for": result("for", "keyword"), "switch": result("switch", "keyword"),
            "case": result("case", "keyword"), "default": result("default", "keyword"),
            "true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom
        };
    }();
    // Some helper regexps
    var isOperatorChar = /[+\-*&%=<>!?|]/;
    var isHexDigit = /[0-9A-Fa-f]/;
    var isWordChar = /[\w\$_]/;
    // Wrapper around jsToken that helps maintain parser state (whether
    // we are inside of a multi-line comment and whether the next token
    // could be a regular expression).
    function jsTokenState(inside, regexp) {
        return function (source, setState) {
            var newInside = inside;
            var type = jsToken(inside, regexp, source, function (c) {
                newInside = c;
            });
            var newRegexp = type.type == "operator" || type.type == "keyword c" || type.type.match(/^[\[{}\(,;:]$/);
            if (newRegexp != regexp || newInside != inside)
                setState(jsTokenState(newInside, newRegexp));
            return type;
        };
    }
    // The token reader, inteded to be used by the tokenizer from
    // tokenize.js (through jsTokenState). Advances the source stream
    // over a token, and returns an object containing the type and style
    // of that token.
    function jsToken(inside, regexp, source, setInside) {
        function readHexNumber() {
            source.next(); // skip the 'x'
            source.nextWhileMatches(isHexDigit);
            return {type: "number", style: "js-atom"};
        }
        function readNumber() {
            source.nextWhileMatches(/[0-9]/);
            if (source.equals(".")) {
                source.next();
                source.nextWhileMatches(/[0-9]/);
            }
            if (source.equals("e") || source.equals("E")) {
                source.next();
                if (source.equals("-"))
                    source.next();
                source.nextWhileMatches(/[0-9]/);
            }
            return {type: "number", style: "js-atom"};
        }
        // Read a word, look it up in keywords. If not found, it is a
        // variable, otherwise it is a keyword of the type found.
        function readWord() {
            source.nextWhileMatches(isWordChar);
            var word = source.get();
            var known = keywords.hasOwnProperty(word) && keywords.propertyIsEnumerable(word) && keywords[word];
            return known ? {type: known.type, style: known.style, content: word} :
            {type: "variable", style: "js-variable", content: word};
        }
        function readRegexp() {
            nextUntilUnescaped(source, "/");
            source.nextWhileMatches(/[gi]/);
            return {type: "regexp", style: "js-string"};
        }
        // Mutli-line comments are tricky. We want to return the newlines
        // embedded in them as regular newline tokens, and then continue
        // returning a comment token for every line of the comment. So
        // some state has to be saved (inside) to indicate whether we are
        // inside a /* */ sequence.
        function readMultilineComment(start) {
            var newInside = "/*";
            var maybeEnd = (start == "*");
            while (true) {
                if (source.endOfLine())
                    break;
                var next = source.next();
                if (next == "/" && maybeEnd) {
                    newInside = null;
                    break;
                }
                maybeEnd = (next == "*");
            }
            setInside(newInside);
            return {type: "comment", style: "js-comment"};
        }
        function readOperator() {
            source.nextWhileMatches(isOperatorChar);
            return {type: "operator", style: "js-operator"};
        }
        function readString(quote) {
            var endBackSlash = nextUntilUnescaped(source, quote);
            setInside(endBackSlash ? quote : null);
            return {type: "string", style: "js-string"};
        }
        // Fetch the next token. Dispatches on first character in the
        // stream, or first two characters when the first is a slash.
        if (inside == "\"" || inside == "'")
            return readString(inside);
        var ch = source.next();
        if (inside == "/*")
            return readMultilineComment(ch);
        else if (ch == "\"" || ch == "'")
            return readString(ch);
        // with punctuation, the type of the token is the symbol itself
        else if (/[\[\]{}\(\),;\:\.]/.test(ch))
            return {type: ch, style: "js-punctuation"};
        else if (ch == "0" && (source.equals("x") || source.equals("X")))
            return readHexNumber();
        else if (/[0-9]/.test(ch))
            return readNumber();
        else if (ch == "/") {
            if (source.equals("*")) {
                source.next();
                return readMultilineComment(ch);
            }
            else if (source.equals("/")) {
                nextUntilUnescaped(source, null);
                return {type: "comment", style: "js-comment"};
            }
            else if (regexp)
                return readRegexp();
            else
                return readOperator();
        }
        else if (isOperatorChar.test(ch))
            return readOperator();
        else
            return readWord();
    }
    // The external interface to the tokenizer.
    return function (source, startState) {
        return tokenizer(source, startState || jsTokenState(false, true));
    };
})();
 
  |