/******************************************************************************
CountedInput extends the BufferedInputStream by providing character and line
counting plus a few additional methods to scan some input structures.

----------------------------------------------------
Copyright (c) Gunnar Gotshalks. All Rights Reserved.

Permission to use, copy, modify, and distribute this software
and its documentation for NON-COMMERCIAL purposes and
without fee is hereby granted. 
******************************************************************************/

package FlexOr.io;
import java.io.*;

/**
CountedInput is meant to be used to process ASCII files.  It provides a base
for from which specific scanning files can be constructed.

To help programmers provide some input statistics in application programs we
provide charCount to count the total number of characters input from a file,
charNumber to give the position of the character within the current line,
and lineNumber of the current line.  Scan and skip operations for whitespace
and the rest of the current line are provided.

<P>The Java file input API read method always returns a character, including
an unbounded number of EOF characters when the end of file is reached.
Consequently, all input files are implicitly unbounded in length giving the
following logical structure for a file.
<PRE>  file = input[0..N-1 , N, ...] </PRE>
where a file contains the  <tt>N</TT> data characters <TT>input[0..N-1] and
an unbounded number EOF characters in <TT>input[N, N+1, ...]</TT>.

<P>The EOL character (end-ofline) always counts as one data character.  See the
method <TT>read()</TT>


<DL><B>Definitions</B>
<DT>charCount<DD>The net number of characters read by the user taking
backtracking (mark/reset) into account.
<DT>markCharCount<DD>The net number of characters read by the user at
the last mark.  Value is 0 before the first call to mark.
<DT>#userReadCalls<DD>The number of times the user has called read() since the
last mark or reset.
<DT>lineNumber<DD>One plus the net number of end-of-lines read by the user
taking backtracking (mark/reset) into account.
<DT>#linesReadSinceMark<DD>The number of EOLs read by the user since the
last mark or reset.
<DT>position<DD>The index of the next character to read.  Initial value is 0.
</DL>

<P><B>Class invariant:</B> 
<PRE>    charCount = max(markCharCount + #userReadCalls, #input)
&nbsp;   lineNumber = markLineNumber + #linesReadSinceMark
&nbsp;   0 <= position
</PRE>
<P>
@author Gunnar Gotshalks
@version 1.0 1999 Jan 10
*/

public class CountedInput extends BufferedInputStream {

/******************************************************************************
Creating an instance of CountedInput opens the file named by the String.
There is no open required by the user.

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;   charChar = 0 and lineNumber = 1 and position = 0 and
&nbsp;   charNumber = 0;
</PRE>
@exception FileNotFoundException when the file named by fileName cannot be
opened.
*/

public CountedInput (String fileName) throws FileNotFoundException {
  super(new FileInputStream(fileName));
}

/******************************************************************************
To help programmers provide input statistics in application programs we
provide charCount to count the total number of characters input from a file,
charNumber to provide the position of the current charcter in the current line,
and lineNumber to count the input lines.
---------------------------*/
/** Net number of data characters -- exclusive of backtracking -- read from the
file.  End-of-line counts as one character even if \r\n is its representation.
End-of-file is not a data character.
*/

protected int theCharCount = 0;

/** Position of the current character in the current line. */

protected int theCharNumber = 0;

/** Net number of end-of-line characters  -- exclusive of backtracking -- read
from the file.  End-of-line counts as one even if \r\n is its representation.
End-of-file is not an end of line character.
*/

protected int theLineNumber = 1;

/**
Return the net number of data characters read from the file.
<P><PRE><B>Requires:</B>
&nbsp;   True</PRE>
@return charCount

*/

public int charCount() { return theCharCount; }

/**
Return the position of the current character in the current line.
<P><PRE><B>Requires:</B>
&nbsp;   True</PRE>
@return charNumber

*/

public int charNumber() { return theCharNumber; }

/**
Return the current line number -- the net number of lines read from the file.
<P><PRE><B>Requires:</B>
&nbsp;   True </PRE>
@return lineNumber
*/

public int lineNumber() { return theLineNumber; }

/******************************************************************************
Need to keep the previous read character to handle the case of '\r\n' 
sequence which is counted as one character.  To speed up the read operation
we have the current character global for the object.
---------------------------*/
/** Value is the actual data character read at the time of the previous call
to read.  If \r was the actual data character read on the previous call it
was returned to the user as Character.EOL (= \n).
*/

protected int prevChar = Char.EOF;

/** Value is the result returned by each call to read.  If Character.EOL (=\n)
was returned on the previous read, the actual data character read may have been
Character.CR (= \n).
*/
protected int currentChar = Char.EOF;

/**
Return the current character.
<P><PRE><B>Requires:</B>
&nbsp;   True </PRE>
@return currentChar
*/

public char currentChar() { return (char) currentChar; }

/******************************************************************************
read()

No need to test for EOF before reading as Java returns EOF an unbounded number
of times.

Need to have different cases depending upon how end-of-line is represented but
end-of-line always counts as 1 character.

In a Macintosh file it is '\r' (13), in a Unix file is is '\n' (10), in a
Windows file it is '\r\n'.  This is the case that requires either a special
look ahead one character or keeping track of the previous character.  It
requires that after recognizing '\r' we must skip a following '\n'.

EOF does not count as a data character.
*/
/**
Read the next character in the file.

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;   position = 1 + old position
&nbsp;   (  input[old position] = EOF and charCount = old charCount 
&nbsp;        and lineNumber = old lineNumber
&nbsp;    or
&nbsp;      input[old position] = EOL  and charCount = 1 + old charCount
&nbsp;        and charNumber = 0  and lineNumber = 1 + oldLineNumber
&nbsp;    or
&nbsp;      input[old position]  notin {EOL, EOF} and charCount = 1 + old charCount
&nbsp;        and charNumber = 1 + old charNumber  and lineNumber = old lineNumber
&nbsp;    )
</PRE>
@return input[position-1]
*/

public int read() {
  try {
    currentChar = super.read();
    switch (currentChar) {
      case 13 : prevChar = currentChar; currentChar = Char.EOL;
                theCharCount++; theCharNumber = 0; theLineNumber++;
                break;
      case 10 : if (prevChar == 13) { prevChar = currentChar;
                    currentChar = read();
                }
                else { prevChar = currentChar; theCharCount++;
                       theCharNumber = 0; theLineNumber++; }
                break;
      case -1 : currentChar = Char.EOF; break;
      default : prevChar = currentChar; theCharCount++; theCharNumber++;
    }
  } catch (IOException e) { System.err.println("Trouble reading" + e); }

  return currentChar;
}

/*****************************************************************************
Reads the rest of the current line.  The next read will read the first
character on the next line, if any, or read EOF.  If the last read character
was EOL or EOF then no characters are read -- rest of line is empty.

<P><PRE><B>Requires:</B>
&nbsp;   100 >= number of characters in rest of line
<B>Ensures:</B>
&nbsp;    input[old position .. position-2] intersect {EOF, EOL} = null
&nbsp;    input[position-1] in {EOF, EOL}
</PRE>
@return input[old position .. position-2]
*/

public String readLine() {
  StringBuffer buffer = new StringBuffer(100);
  while ((currentChar != Char.EOF) && (currentChar != Char.EOL)) {
    buffer.append((char) currentChar);
    read();
  }
  return buffer.toString();
}

/*****************************************************************************
Skip characters until a non whitespace character is read.  EOF is not a
whitespace character.  Using whitespace definition in Character.isWhitespace

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;    input[old position .. position-1] intersect nonWhitespace = null
</PRE>
@return input[position]
*/

public int skipWhitespace() {
  while (Character.isWhitespace((char) currentChar)) read();
  return currentChar;
}

/*****************************************************************************
Skip to and return the first character on the next line.

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;    input[old position .. position-1] intersect {EOL, EOF} = null
</PRE>
@return input[position]
*/

public int skipToNextLine() {
  while((currentChar != Char.EOL) && (currentChar != Char.EOF)) read();
  return read();
}

/*****************************************************************************
Span whitespace.

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;    input[old position .. position-1] intersect nonWhiteSpace = null
</PRE>
@return input[old position .. position-1] 
*/

public String spanWhitespace() {
  StringBuffer sb = new StringBuffer();
  while (Character.isWhitespace((char) currentChar)) {
    sb.append((char) currentChar);
    read();
  }
  return sb.toString();
}

/*****************************************************************************
Span non whitespace

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;    input[old position .. position-1] intersect whiteSpace = null
</PRE>
@return input[old position .. position-1]
*/

public String spanNonWhitespace() {
  StringBuffer sb = new StringBuffer();
  while (!Character.isWhitespace((char) currentChar))  {
    sb.append((char) currentChar);
    read();
  }
  return sb.toString();
}

/*****************************************************************************
Objects and methods for backtracking.
--------------------------*/
/** The maximum lookahead for the last mark call. */

protected int marklimit;

/** The net number of characters read at the time of the last mark call. */

protected int markCharCount;

/** The character position at the time of the last mark call. */

protected int markCharNumber;

/** The net number of end-of-line characters read at the time of the last mark
call. */

protected int markLineNumber;

/** Mark position for backtracking with reset.  Marklimit is the maximum read
ahead before reset cannot be used.

<P><PRE><B>Requires:</B>
&nbsp;   True
<B>Ensures:</B>
&nbsp;   position = old position  and charNumber = old charNumber
&nbsp;   charCount = old charCount  and  lineNumber = old lineNumber
&nbsp;   markPosition = position  and markCharNumber = charNumber
&nbsp;   markCharCount = charCount  and  markLineNumber = lineNumber
</PRE>
*/

public void mark(int marklimit) {
  super.mark(marklimit);
  markCharCount = theCharCount; markCharNumber = theCharNumber;
  markLineNumber = theLineNumber;
}

/*****************************************************************************
Reset position to last mark point.

<P><PRE><B>Requires:</B>
&nbsp;   mark(marklimit) toave been called
<B>Ensures:</B>
&nbsp;   markPosition = old markPosition
&nbsp;   markCharCount = old markCharCount
&nbsp;   markCharCount = old markCharCount
&nbsp;   markLineNumber = old markLineNumber
&nbsp;   markPosition = position
&nbsp;   markCharCount = charCount  and  markLineNumber = theLineNumber
&nbsp;   markCharNumber = charNumber
</PRE>
@exception IOException  When position-markPosition > marklimit
or mark(marklimit) has not been called.
*/

public void reset() throws IOException {
  super.reset();
  theCharCount = markCharCount; theLineNumber = markLineNumber;
  theCharNumber = markCharNumber;
}

}