| 
0102
 03
 04
 05
 06
 07
 08
 09
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 
 | package algs55; // section 5.5
import stdlib.*;
import algs52.TST;
/* ***********************************************************************
 *  Compilation:  javac LZW.java
 *  Execution:    java LZW - < input.txt   (compress)
 *  Execution:    java LZW + < input.txt   (expand)
 *  Dependencies: BinaryIn.java BinaryOut.java
 *
 *  Compress or expand binary input from standard input using LZW.
 *
 *  WARNING: STARTING WITH ORACLE JAVA 6, UPDATE 7 the SUBSTRING
 *  METHOD TAKES TIME AND SPACE LINEAR IN THE SIZE OF THE EXTRACTED
 *  SUBSTRING (INSTEAD OF CONSTANT SPACE AND TIME AS IN EARLIER
 *  IMPLEMENTATIONS).
 *
 *  See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a>
 *  for more details.
 *
 **************************************************************************/
public class LZW {
  private static BinaryIn binaryIn;
  private static BinaryOut binaryOut;
  private static final int R = 256;        // number of input chars
  private static final int L = 4096;       // number of codewords = 2^W
  private static final int W = 12;         // codeword width
  public static void compress() {
    String input = binaryIn.readString();
    TST<Integer> st = new TST<>();
    for (int i = 0; i < R; i++)
      st.put("" + (char) i, i);
    int code = R+1;  // R is codeword for EOF
    while (input.length() > 0) {
      String s = st.longestPrefixOf(input);  // Find max prefix match s.
      binaryOut.write(st.get(s), W);         // Print s's encoding.
      int t = s.length();
      if (t < input.length() && code < L)    // Add s to symbol table.
        st.put(input.substring(0, t + 1), code++);
      input = input.substring(t);            // Scan past s in input.
    }
    binaryOut.write(R, W);
    binaryOut.close();
  }
  public static void expand() {
    String[] st = new String[L];
    int i; // next available codeword value
    // initialize symbol table with all 1-character strings
    for (i = 0; i < R; i++)
      st[i] = "" + (char) i;
    st[i++] = "";                        // (unused) lookahead for EOF
    int codeword = binaryIn.readInt(W);
    String val = st[codeword];
    while (true) {
      binaryOut.write(val);
      codeword = binaryIn.readInt(W);
      if (codeword == R) break;
      String s = st[codeword];
      if (i == codeword) s = val + val.charAt(0);   // special case hack
      if (i < L) st[i++] = val + s.charAt(0);
      val = s;
    }
    binaryOut.close();
  }
  public static void main(String[] args) {
    String txtFile = "data/genomeTiny.txt";
    String binFile = "/tmp/genomeTiny.bin";
    //args = new String[] { "+" }; binaryIn = new BinaryIn(binFile); binaryOut = new BinaryOut();
    args = new String[] { "-" }; binaryIn = new BinaryIn(txtFile); binaryOut = new BinaryOut(binFile);
    if      (args[0].equals("-")) compress();
    else if (args[0].equals("+")) expand();
    else throw new Error("Illegal command line argument");
  }
}
 |