/*****************************************************************************
 *
 * token.c
 *
 *  Tokenization.
 *
 *  The tokenizer always returns unsnapped tokens.
 *
 *  We avoid the traditional tokenizer problems of ``giant comment'' and
 *  ``giant string'' by using a dynamic token buffer.
 *
 *  All tokens are stacked into the token buffer.  If you need the token
 *  to be persistent, you have to save it somewhere else.
 *
 *****************************************************************************/

#include "m4.h"

/*****************************************************************************
 *
 *  typGetComTch
 *
 *      Scan and consume a comment token, returning typQuo
 *      because comments and quotes are essentially the same thing.
 *      tch contains the open-comment.
 *
 *      Comments do not nest.
 *
 *****************************************************************************/

TYP STDCALL
typGetComTch(TCH tch)
{
    AddArgTch(tch);                     /* Save the comment start */
    do {
        tch = tchGet();
        AddArgTch(tch);
        if (tch == tchMagic) {
            /* Ooh, regurgitating a magic token - these consist of two bytes */
            tch = tchGet();
            if (tch == tchEof) {
                Die("EOF in comment");
            }
            AddArgTch(tch);
        }
    } while (!fRcomTch(tch));
    return typQuo;
}

/*****************************************************************************
 *
 *  typGetQuoTch
 *
 *      Scan and consume a quote token, returning typQuo.
 *      tch contains the open-quote.
 *
 *****************************************************************************/

TYP STDCALL
typGetQuoTch(TCH tch)
{
    int iDepth = 1;
    for (;;) {
        tch = tchGet();
        if (tch == tchMagic) {
            /* SOMEDAY -- Should unget so that Die won't see past EOF */

            /* Ooh, regurgitating a magic token - these consist of two bytes */
            tch = tchGet();
            if (tch == tchEof) {
                Die("EOF in quote");
            }
            AddArgTch(tchMagic);        /* Add the magic prefix */
                                        /* Fallthrough will add tch */
        } else if (fLquoTch(tch)) {
            ++iDepth;
        } else if (fRquoTch(tch)) {
            if (--iDepth == 0) {
                break;                  /* Final Rquo found */
            }
        }
        AddArgTch(tch);
    }
    return typQuo;
}

/*****************************************************************************
 *
 *  typGetIdentTch
 *
 *      Scan and consume an identifier token, returning typId.
 *      tch contains the first character of the identifier.
 *
 *****************************************************************************/

TYP STDCALL
typGetIdentTch(TCH tch)
{
    do {
        AddArgTch(tch);
        tch = tchGet();
    } while (fIdentTch(tch));
    UngetTch(tch);
    return typId;
}

/*****************************************************************************
 *
 *  typGetMagicTch
 *
 *      Scan and consume a magic token, returning the token type.
 *      Magics are out-of-band gizmos that get inserted into the
 *      input stream via the tchMagic escape.
 *
 *****************************************************************************/

TYP STDCALL
typGetMagicTch(TCH tch)
{
    AddArgTch(tch);
    tch = tchGet();
    Assert(fValidMagicTch(tch));
    AddArgTch(tch);
    return typMagic;
}

/*****************************************************************************
 *
 *  typGetPuncTch
 *
 *      Scan and consume a punctuation token, returning the token type.
 *
 *      It is here that comments are recognized.
 *
 *
 *  LATER - It is here where consecutive typPunc's are coalesced.
 *  This would speed up top-level scanning.
 *  Be careful not to coalesce a comma!
 *  Lparen is okay because xtok handles that one.
 *  Whitespace is also okay because xtok handles those too.
 *
 *****************************************************************************/

TYP STDCALL
typGetPuncTch(TCH tch)
{
    AddArgTch(tch);
    return typPunc;
}

/*****************************************************************************
 *
 *  typGetPtok
 *
 *      Scan and consume a snapped token, returning the token type.
 *
 *****************************************************************************/

TYP STDCALL
typGetPtok(PTOK ptok)
{
    TCH tch;
    TYP typ;

    OpenArgPtok(ptok);

    tch = tchGet();

    if (fInitialIdentTch(tch)) {
        typ = typGetIdentTch(tch);
    } else if (fLcomTch(tch)) {
        typ = typGetComTch(tch);
    } else if (fLquoTch(tch)) {
        typ = typGetQuoTch(tch);
    } else if (fMagicTch(tch)) {
        typ = typGetMagicTch(tch);
    } else {
        typ = typGetPuncTch(tch);
    }
    CloseArgPtok(ptok);
    SnapArgPtok(ptok);
    return typ;
}