/** 
  * Dictionary generator for the text/HTML word counter.
  * @author Robert J Morton <robmorton@clara.net>
  * @version 04 September 2000
  * @copyright Sep 2000 Robert J Morton (all rights reserved) */

/* This program accepts a word from from wc.class. If the word is not currently in the 
   dictionary, it adds it to the dictionary. If the word is already in the dictionary,
   it increments its number of occurrences figure. It produces the dictionary as a file
   called dic.txt. This lists words and their numbers of occurrences in both alphabetical
   order and in descending order of occurrences. */

import java.io.*;                            // for saving dictionary

class dic {
   private int N = 0;                        // current number of entries
   private int n = 0;                        // current entry number
   private dicEnt AE[];                      // reference to an array of dictionary entries for alphabetic dictionary
   private dicEnt NE[];                      // reference to an array of dictionary entries for occurrences dictionary
   private String W;                         // for word which is being submitted to the dictionary
   private String w;                         // the above cast to lower case

   dic() {                                   //Construct the dictionary array
      AE = new dicEnt[20000];                //vast array of references to dictionary entries
      NE = new dicEnt[20000];                //ditto for sorting by number of occurrences
   }


   void submit(String q) {                   // submit a word to the dictionary together with its capitalisation status
      W = q;                                 // make new submitted word visible to all methods
      w = q.toLowerCase();                   // forced lower case version of the submitted word
      if(find()) {                           // if word already in the dictionary 
         if(Character.isUpperCase(AE[n].getWord().charAt(0))   // if word already in dictionary starts with a capital
         && Character.isLowerCase(W.charAt(0)))                // and identical submitted word starts with a small letter
            AE[n].putWord(W);                // the small letter version takes precidence, so store it
         AE[n].incr();                       // increment its number of occurrence
      } else if(N < AE.length) {             // else word does not yet appear in dictionary
         for(int i = N; i > n; i--)          // so if still room in dictionary entries reference array
            AE[i] = AE[i - 1];               // move all higher entries up by one
         AE[n] = new dicEnt(W); N++;         // insert the new entry
      } else
         System.out.println("DICTIONARY ENTRY REFERENCES ARRAY OVERFLOW!");
   }


   private boolean find() {                  // FIND WORD IN DICTIONARY BY BINARY SLICE SEARCH
      if(N == 0) return false;               // if nothing in dictionary, go straight to enter new word
      int x;                                 // 3-state string comparison variable         
      n = N >> 1;                            // start with the keyword half way up the index
      int j = n;                             // jump size
      while((j >>= 1) > 0) {                 // while the jump size, having been halved, > zero 
         x = w.compareTo(AE[n].getword());   // compare submitted word with nth word in dictionary
         if(x > 0)                           // if w > retrieved word, we are too far down the index
            n += j;                          // so split the partition
         else if(x < 0)                      // if w < retrieved word, we are too far up the index
            n -= j;                          // so split the partition
         else return true;                   // return if w = retrieved keyword
      }                                      // Now too close to matching entry to continue binary search
      boolean u = false;                     // true indicates going up!
      boolean d = false;                     // true indicates going down!
      while(!(u && d)) {                     // while not yet reversed direction along index
         x = w.compareTo(AE[n].getword());   // compare submitted word with nth word in dictionary
         if(x > 0) {                         // w > retrieved word, we are too far down the index
            if(++n == N)                     // if moving up then overshoots end of index,
               break;                        // return with n pointing at blank entry above current highest
            if(d)                            // if we have just reversed direction of search
               break;                        // return with n pointing at the position where w must be inserted
            u = true;                        // indicate that we have just moved up the index
         } else if(x < 0) {                  // w < retrieved word, we are too far up the index
            if(u)                            // if overshot while going up the index, 
               break;                        // return with n pointing at the position where w must be inserted
            if(--n < 0) {                    // if moving down overshoots start of index, 
               n = 0; break;                 // return with n pointing at the first entry
            }
            d = true;                        // indicate that we have just moved down the index
         } else                              // if submitted word = nth word in dictionary (exact match found),
            return true;                     // return true with n pointing at the matching dictionary entry
      }
      return false;                          // keyword cannot be found so return false
   }


   void save() {                             // SAVE DICTIONARY TO FILE dic.txt
      for(int i = 0; i < N; i++)             // copy all dictionary entry references to 
         NE[i] = AE[i];                      // the 'number of occurrences' array.
      System.out.print("Sorting dictionary in descending order of word occurrences ...");
      qs(0, N - 1);                          // do a C.A.R. Hoare QuickSort on NE[]
      System.out.println(" done.");          // advise console when sort is finished
      try {                                  // for capturing file save IO Exceptions 
         System.out.print("Saving dictionary to file 'dic.txt' ... ");
         BufferedWriter o
            = new BufferedWriter(            // create a buffered file output stream
              new OutputStreamWriter(        // to dic.txt
              new FileOutputStream("dic.txt")
              ));
         String s = "WORDS ALPHABETICAL   OCCURRENCES"  // column headings for file
                  + "     "
                  + "WORDS BY OCCURRENCE  OCCURRENCES";

         o.write(s, 0, s.length());          // write the column headings
         o.newLine();                        // ensure a system-native new-line
         for(int i = 0; i < N; i++) {        // for each entry in the dictionary
            s = AE[i].format()               // format [next] alphabetic entry
              + "     "                      // add a separator
              + NE[N - i - 1].format();      // format [next] occurrence entry
            o.write(s, 0, s.length());       // write the line to the file
            o.newLine();                     // ensure a system-native new-line
         }
         o.newLine();                        // ensure a system-native new-line
         s = "TOTAL VOCABULARY = " + N + "words.";
         o.write(s, 0, s.length());          // write 'total words' line to file
         o.newLine();                        // ensure a system-native new-line
         o.close();                          // close the dictionary file
         System.out.println(" done.");       // advise colsole that dictionary stored
      } catch(IOException e) {               // advise console of location and nature
         System.out.println("Save: " + e);   // of any malfunctions while saving file
      }
   }


   private void qs(int LO, int HI) {         // AN ADAPTATION OF C.A.R. HOARE'S QUICK-SORT ALGORITHM
      int lo = LO;                           // set moving lo to LO end of partition
      int hi = HI;                           // set moving hi to HI end of partition
      if (HI > LO) {                         // if the partition contains anything
         int mid = NE[(LO + HI) >> 1].getOcc();      // get content of mid element of partition
         while(lo <= hi) {                   // loop through array until indices cross
            while(lo < HI && NE[lo].getOcc() < mid)  // while lowest keyword < midway keyword 
               lo++;                         // push lower sort boundary up by one element
            while(hi > LO && NE[hi].getOcc() > mid)  // while highest keyword > midway keyword 
               hi--;                         // pull upper sort boundary down by one element
            if(lo <= hi) {                   // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS'
               dicEnt x = NE[lo];            // get index of lo element
               NE[lo] = NE[hi];              // put index of hi element in lo element
               NE[hi] = x;                   // put index of lo element in hi element
               lo++;                         // push lower sort boundary up by one element
               hi--;                         // pull upper sort boundary down by one element
            }
         }
         if(LO < hi) qs(LO, hi);             // if hi not yet reached start of file sort lower partition
         if(lo < HI) qs(lo, HI);             // if lo not yet reached end of file sort upper partition
      }
   }

}



class dicEnt {                               // Class of dictionary entries
   private String W;                         // the word as it appears
   private String w;                         // the word in lower case for sorting purposes
   private int o;		             // number of occurrences of word within all files

   dicEnt(String s) {                        // Construct a new dictionary entry
      W = s;                                 // write submitted new word into class instance
      w = s.toLowerCase();                   // store the word in lower case form
      o = 1;                                 // this is the first occurrence of this word 
   }

   void incr() {o++;}                        // increment the word's occurrence count
   String getWord() {return W;}              // return the word as is
   String getword() {return w;}              // return the word in lower case
   int getOcc() {return o;}                  // return its number of occurrences so far
   void putWord(String W) {this.W = W;}      // overwrite word in this entry with W

   String format() {                         // convert this dictionary entry to a formatted string
      String s = W;                          // the word string
      for(int i = s.length(); i < 21; i++)   // extend it with spaces 
         s += " ";                           // to make it 21 characters long
      String t = Integer.toString(o);        // convert its number of occurrences to a string
      for(int i = t.length(); i < 11; i++)   // pad it with leading spaces
         t = " " + t;                        // to make it 11 characters long
      return s + t;                          // return the concatenated strings
   }

/*  Robert J Morton, the author of this program, 
    is a poor but Right Honourable Fellow of the
    Ancient and Noble Order of the Long-term Unemployed.

    Offers of work please to: robmorton@clara.net  */
}

This page's parent within this Web Site. About this Web Site. Its home page. Email its Author.