/**
 * Title:        Comedia Utils
 * Description:  Project contains some general purpose non-visual beans.
 * Beans do not require any special libraies.
 * Copyright:    Copyright (c) 2001
 * Company:      Capella Development Group
 * @author Sergey Seroukhov
 * @version 1.0
 */

package org.comedia.util.scanner;

/**
 * Performs lexical scanning for XML-like languages.
 * <p>
 * Example of scanner usage:
 * <p><pre>
 * System.out.println("*********** Xml Scanner Test *************");
 *
 * CXmlScanner scanner = new CXmlScanner();
 * scanner.setBuffer("<?xml <!-- comment -->param+0.1=\"xxx\">\"www\" +=11 labmda</ a/>");
 * scanner.setShowEol(true);
 * scanner.setShowSpace(true);
 * scanner.setShowComment(true);
 *
 * // Tests string convertions
 * String str = "The test \'string\'";
 * System.out.println("Start string: " + str);
 * str = scanner.wrapString(str);
 * System.out.println("Wrapped string: " + str);
 * str = scanner.unwrapString(str);
 * System.out.println("Unwrapped string: " + str);
 *
 * System.out.println();
 * System.out.println("Initial string: " + scanner.getBuffer());
 *
 * while (scanner.lex() != EOF) {
 *   switch (scanner.getTokenType()) {
 *     case UNKNOWN: System.out.print("Type: Unknown "); break;
 *     case COMMENT: System.out.print("Type: Comment "); break;
 *     case KEYWORD: System.out.print("Type: Keyword "); break;
 *     case TYPE: System.out.print("Type: Type "); break;
 *     case IDENT: System.out.print("Type: Ident "); break;
 *     case ALPHA: System.out.print("Type: Alpha "); break;
 *     case OPERATOR: System.out.print("Type: Operator "); break;
 *     case BRACE: System.out.print("Type: Brace "); break;
 *     case SEPARATOR: System.out.print("Type: Separator "); break;
 *     case EOL: System.out.print("Type: Eol "); break;
 *     case LF: System.out.print("Type: Lf "); break;
 *     case SPACE: System.out.print("Type: Space "); break;
 *     case INT: System.out.print("Type: Int "); break;
 *     case FLOAT: System.out.print("Type: Float "); break;
 *     case STRING: System.out.print("Type: String "); break;
 *     case BOOL: System.out.print("Type: Bool "); break;
 *     case EOF: System.out.print("Type: Eof "); break;
 *   }
 *   System.out.println("Value: '" + scanner.getToken()
 *     + "' Pos: " + scanner.getPosition() + " Line: " + scanner.getLineNo());
 * }
 * </pre>
 * The result:
 * <p><pre>
 * *********** Xml Scanner Test *************
 * Start string: The test 'string'
 * Wrapped string: "The test 'string'"
 * Unwrapped string: The test 'string'
 *
 * Initial string: <?xml <!-- comment -->param+0.1="xxx">"www" +=11 labmda</ a/>
 * Value: '<?' Pos: 0 Line: 0
 * Type: Ident Value: 'xml' Pos: 2 Line: 0
 * Type: Space Value: ' ' Pos: 5 Line: 0
 * Type: Comment Value: '<!-- comment -->' Pos: 6 Line: 0
 * Type: Ident Value: 'param' Pos: 22 Line: 0
 * Value: '+' Pos: 27 Line: 0
 * Type: Float Value: '0.1' Pos: 28 Line: 0
 * Type: Operator Value: '=' Pos: 31 Line: 0
 * Type: String Value: '"xxx"' Pos: 32 Line: 0
 * Value: '>' Pos: 37 Line: 0
 * Type: Unknown Value: '"www"' Pos: 38 Line: 0
 * Type: Space Value: ' ' Pos: 43 Line: 0
 * Type: Unknown Value: '+=11' Pos: 44 Line: 0
 * Type: Space Value: ' ' Pos: 48 Line: 0
 * Type: Unknown Value: 'labmda' Pos: 49 Line: 0
 * Value: '</' Pos: 55 Line: 0
 * Type: Space Value: ' ' Pos: 57 Line: 0
 * Type: Ident Value: 'a' Pos: 58 Line: 0
 * Value: '/>' Pos: 59 Line: 0
 * </pre>
 */
public class CXmlScanner extends CScanner {
  /**
   * The flag which shows that parsing is inside tag.
   */
  private boolean insideTag = false;

  /**
   * Constructs this class with default parameters.
   */
  public CXmlScanner() {
    super();
  }

  /**
   * Check is current parsing inside a tag.
   * @result <code>TRUE</code> if parsing inside a tag and <code>FALSE</code>
   *   otherwise.
   */
  public boolean isInsideTag() {
    return insideTag;
  }

  /**
   * Sets a new input buffer and resets buffer pointers.
   * @param s a new input stream.
   */
  public void setBuffer(String s) {
    insideTag = false;
    super.setBuffer(s);
  }

  /**
   * Gets a lowlevel token. Presents the main parsing process.
   * @param curr a "Holder" which containes extracted token.
   * @result extracted token type represented by special constant.
   */
  protected int lowRunLex(Lexem curr) {
    innerStartLex(curr);
    if (curr.tokenType != UNKNOWN) return curr.tokenType;

    char temp = curr.token.charAt(0);
    char temp1 = (bufferPos < bufferLen)? buffer.charAt(bufferPos): '\0';
    char temp2 = (bufferPos + 1 < bufferLen)? buffer.charAt(bufferPos + 1): '\0';
    char temp3 = (bufferPos + 2 < bufferLen)? buffer.charAt(bufferPos + 2): '\0';

    // Checks for comment
    if (temp == '<' && temp1 == '!' && temp2 == '-' && temp3 == '-') {
      curr.token = curr.token + temp1 + temp2 + temp3;
      bufferPos += 3;

      while (bufferPos + 2 < bufferLen) {
        temp = buffer.charAt(bufferPos);
        temp1 = buffer.charAt(bufferPos + 1);
        temp2 = buffer.charAt(bufferPos + 2);

        curr.token += temp;
        bufferPos++;
        if (temp == '-' && temp1 == '-' && temp2 == '>') {
          curr.token = curr.token + temp1 + temp2;
          bufferPos += 2;
          break;
        }
        if (temp == '\n')
          bufferLine++;
      }
      return (curr.tokenType = COMMENT);
    }

    // Checks for start tag delimiter
    if (temp == '<') {
      if (temp1 == '?' || temp1 == '!' || temp1 == '/') {
        curr.token += temp1;
        bufferPos++;
      }
      insideTag = true;
      return (curr.tokenType = BRACE);
    }

    // Checks for end tag delimiter
    if (temp == '>') {
      insideTag = false;
      return (curr.tokenType = BRACE);
    }

    if (temp1 == '>' && (temp == '?' || temp == '/')) {
      curr.token += temp1;
      bufferPos++;
      insideTag = false;
      return (curr.tokenType = BRACE);
    }

    // Checks for operators
    if (insideTag && temp == '=')
      return (curr.tokenType = OPERATOR);

    // Checks for a string
    if (insideTag && (temp == '\'' || temp == '\"'))
      return innerProcString(curr);

    // Checks for numbers and identifiers
    innerProcIdent(curr);
    if (insideTag && curr.tokenType == IDENT)
      return (curr.tokenType = KEYWORD);

    if (insideTag && curr.tokenType == UNKNOWN)
      curr.tokenType = SEPARATOR;

    while (!insideTag && bufferPos < bufferLen) {
      temp = buffer.charAt(bufferPos);
      if (isWhite(temp) || temp == '\n' || temp == '\r' || temp == '<') break;
      curr.token += temp;
      bufferPos++;
    }
    return (curr.tokenType = (insideTag)? curr.tokenType: UNKNOWN);
  }

  /**
   * Converts a value from ordinary into XML-like escape format
   * limited with quotes.
   * @param s a string in ordinary (local) presentation.
   * @result a result string in XML-like escape format.
   */
  public static String wrapValue(String s) {
    String result = "";
    for (int p = 0; p < s.length(); p++) {
      switch (s.charAt(p)) {
        case '&': result += "&amp;"; break;
        case '\"': result += "&quot;"; break;
        case '\'': result += "&apos;"; break;
        case '>': result += "&gt;"; break;
        case '<': result += "&lt;"; break;
        default: result += s.charAt(p);
      }
    }
    return result;
  }

  /**
   * Converts a string from ordinary into XML-like escape format
   * limited with quotes.
   * @param s a string in ordinary (local) presentation.
   * @result a result string in XML-like escape format.
   */
  public static String wrapString(String s) {
    return "\'" + wrapValue(s) + "\'";
  }

  /**
   * Converts hex decimal string into integer.
   * @param s a hex decimal string.
   * @result a converted integer value.
   */
  public static int convertHex2Int(String s) {
    int result = 0;
    for (int i = 0; i < s.length(); i++) {
      if (s.charAt(i) >= '0' && s.charAt(i) <= '9')
        result = (result << 4) | (((byte) s.charAt(i)) - '0');
      if (s.charAt(i) >= 'A' && s.charAt(i) <= 'F')
        result = (result << 4) | (((byte) s.charAt(i)) - 'A' + 10);
      if (s.charAt(i) >= 'a' && s.charAt(i) <= 'f')
        result = (result << 4) | (((byte) s.charAt(i)) - 'a' + 10);
    }
    return result;
  }

  /**
   * Converts a value from XML-like escape format limited
   * with quotes into oridinary (local) presentation.
   * @param s a string in XML-like escape format.
   * @result a result string in ordinary (local) presentation.
   */
  public static String unwrapValue(String s) {
    int p = 0;
    int len = s.length();
    if (len == 0) return "";

    String result = "";
    while (p < len) {
      char temp = s.charAt(p);
      p++;
      if (temp == '&') {
        String escape = "";
        while (p < len) {
          temp = s.charAt(p);
          p++;
          if (temp == ';') break;
          escape += temp;
        }
        escape = escape.toLowerCase();
        if (escape.equals("amp")) result += "&";
        else if (escape.equals("quot")) result += "\"";
        else if (escape.equals("apos")) result += "\'";
        else if (escape.equals("gt")) result += ">";
        else if (escape.equals("lt")) result += "<";
        else if (escape.startsWith("#x"))
          result += ((char) convertHex2Int(escape.substring(2)));
        else if (escape.startsWith("#")) {
          try {
            result += ((char) new Integer(escape.substring(1)).intValue());
          } catch (Exception e) {}
        }
      }
      else result += temp;
    }
    return result;
  }

  /**
   * Converts a string from XML-like escape format limited
   * with quotes into oridinary (local) presentation.
   * @param s a string in XML-like escape format.
   * @result a result string in ordinary (local) presentation.
   */
  public static String unwrapString(String s) {
    return unwrapValue(CScanner.unwrapString(s));
  }

  /**
   * The main function for test purposes.
   */
  public static void main(String[] args) {
    System.out.println("*********** Xml Scanner Test *************");

    CXmlScanner scanner = new CXmlScanner();
    scanner.setBuffer("<?xml <!-- comment -->param+0.1=\"xxx\">\"www\" +=11 labmda</ a/>");
    scanner.setShowEol(true);
    scanner.setShowSpace(true);
    scanner.setShowComment(true);

    // Tests string convertions
    String str = "The test & \'string\'";
    System.out.println("Start string: " + str);
    str = scanner.wrapString(str);
    System.out.println("Wrapped string: " + str);
    str = scanner.unwrapString("&#169;" + str + "&#xA9;");
    System.out.println("Unwrapped string: " + str);

    System.out.println();
    System.out.println("Initial string: " + scanner.getBuffer());

    while (scanner.lex() != EOF) {
      switch (scanner.getTokenType()) {
        case UNKNOWN: System.out.print("Type: Unknown "); break;
        case COMMENT: System.out.print("Type: Comment "); break;
        case KEYWORD: System.out.print("Type: Keyword "); break;
        case TYPE: System.out.print("Type: Type "); break;
        case IDENT: System.out.print("Type: Ident "); break;
        case ALPHA: System.out.print("Type: Alpha "); break;
        case OPERATOR: System.out.print("Type: Operator "); break;
        case BRACE: System.out.print("Type: Brace "); break;
        case SEPARATOR: System.out.print("Type: Separator "); break;
        case EOL: System.out.print("Type: Eol "); break;
        case LF: System.out.print("Type: Lf "); break;
        case SPACE: System.out.print("Type: Space "); break;
        case INT: System.out.print("Type: Int "); break;
        case FLOAT: System.out.print("Type: Float "); break;
        case STRING: System.out.print("Type: String "); break;
        case BOOL: System.out.print("Type: Bool "); break;
        case EOF: System.out.print("Type: Eof "); break;
        default: System.out.print("Type: ??? "); break;
      }
      System.out.println("Value: '" + scanner.getToken()
        + "' Pos: " + scanner.getPosition() + " Line: " + scanner.getLineNo());
    }
  }
}
