/**
* Dictionary generator for the text/HTML word counter.
* @author Robert J Morton <robmorton@clara.net>
* @version 04 September 2000
* @copyright Sep 2000 Robert J Morton (all rights reserved) */
/* This program accepts a word from from wc.class. If the word is not currently in the
dictionary, it adds it to the dictionary. If the word is already in the dictionary,
it increments its number of occurrences figure. It produces the dictionary as a file
called dic.txt. This lists words and their numbers of occurrences in both alphabetical
order and in descending order of occurrences. */
import java.io.*; // for saving dictionary
class dic {
private int N = 0; // current number of entries
private int n = 0; // current entry number
private dicEnt AE[]; // reference to an array of dictionary entries for alphabetic dictionary
private dicEnt NE[]; // reference to an array of dictionary entries for occurrences dictionary
private String W; // for word which is being submitted to the dictionary
private String w; // the above cast to lower case
dic() { //Construct the dictionary array
AE = new dicEnt[20000]; //vast array of references to dictionary entries
NE = new dicEnt[20000]; //ditto for sorting by number of occurrences
}
void submit(String q) { // submit a word to the dictionary together with its capitalisation status
W = q; // make new submitted word visible to all methods
w = q.toLowerCase(); // forced lower case version of the submitted word
if(find()) { // if word already in the dictionary
if(Character.isUpperCase(AE[n].getWord().charAt(0)) // if word already in dictionary starts with a capital
&& Character.isLowerCase(W.charAt(0))) // and identical submitted word starts with a small letter
AE[n].putWord(W); // the small letter version takes precidence, so store it
AE[n].incr(); // increment its number of occurrence
} else if(N < AE.length) { // else word does not yet appear in dictionary
for(int i = N; i > n; i--) // so if still room in dictionary entries reference array
AE[i] = AE[i - 1]; // move all higher entries up by one
AE[n] = new dicEnt(W); N++; // insert the new entry
} else
System.out.println("DICTIONARY ENTRY REFERENCES ARRAY OVERFLOW!");
}
private boolean find() { // FIND WORD IN DICTIONARY BY BINARY SLICE SEARCH
if(N == 0) return false; // if nothing in dictionary, go straight to enter new word
int x; // 3-state string comparison variable
n = N >> 1; // start with the keyword half way up the index
int j = n; // jump size
while((j >>= 1) > 0) { // while the jump size, having been halved, > zero
x = w.compareTo(AE[n].getword()); // compare submitted word with nth word in dictionary
if(x > 0) // if w > retrieved word, we are too far down the index
n += j; // so split the partition
else if(x < 0) // if w < retrieved word, we are too far up the index
n -= j; // so split the partition
else return true; // return if w = retrieved keyword
} // Now too close to matching entry to continue binary search
boolean u = false; // true indicates going up!
boolean d = false; // true indicates going down!
while(!(u && d)) { // while not yet reversed direction along index
x = w.compareTo(AE[n].getword()); // compare submitted word with nth word in dictionary
if(x > 0) { // w > retrieved word, we are too far down the index
if(++n == N) // if moving up then overshoots end of index,
break; // return with n pointing at blank entry above current highest
if(d) // if we have just reversed direction of search
break; // return with n pointing at the position where w must be inserted
u = true; // indicate that we have just moved up the index
} else if(x < 0) { // w < retrieved word, we are too far up the index
if(u) // if overshot while going up the index,
break; // return with n pointing at the position where w must be inserted
if(--n < 0) { // if moving down overshoots start of index,
n = 0; break; // return with n pointing at the first entry
}
d = true; // indicate that we have just moved down the index
} else // if submitted word = nth word in dictionary (exact match found),
return true; // return true with n pointing at the matching dictionary entry
}
return false; // keyword cannot be found so return false
}
void save() { // SAVE DICTIONARY TO FILE dic.txt
for(int i = 0; i < N; i++) // copy all dictionary entry references to
NE[i] = AE[i]; // the 'number of occurrences' array.
System.out.print("Sorting dictionary in descending order of word occurrences ...");
qs(0, N - 1); // do a C.A.R. Hoare QuickSort on NE[]
System.out.println(" done."); // advise console when sort is finished
try { // for capturing file save IO Exceptions
System.out.print("Saving dictionary to file 'dic.txt' ... ");
BufferedWriter o
= new BufferedWriter( // create a buffered file output stream
new OutputStreamWriter( // to dic.txt
new FileOutputStream("dic.txt")
));
String s = "WORDS ALPHABETICAL OCCURRENCES" // column headings for file
+ " "
+ "WORDS BY OCCURRENCE OCCURRENCES";
o.write(s, 0, s.length()); // write the column headings
o.newLine(); // ensure a system-native new-line
for(int i = 0; i < N; i++) { // for each entry in the dictionary
s = AE[i].format() // format [next] alphabetic entry
+ " " // add a separator
+ NE[N - i - 1].format(); // format [next] occurrence entry
o.write(s, 0, s.length()); // write the line to the file
o.newLine(); // ensure a system-native new-line
}
o.newLine(); // ensure a system-native new-line
s = "TOTAL VOCABULARY = " + N + "words.";
o.write(s, 0, s.length()); // write 'total words' line to file
o.newLine(); // ensure a system-native new-line
o.close(); // close the dictionary file
System.out.println(" done."); // advise colsole that dictionary stored
} catch(IOException e) { // advise console of location and nature
System.out.println("Save: " + e); // of any malfunctions while saving file
}
}
private void qs(int LO, int HI) { // AN ADAPTATION OF C.A.R. HOARE'S QUICK-SORT ALGORITHM
int lo = LO; // set moving lo to LO end of partition
int hi = HI; // set moving hi to HI end of partition
if (HI > LO) { // if the partition contains anything
int mid = NE[(LO + HI) >> 1].getOcc(); // get content of mid element of partition
while(lo <= hi) { // loop through array until indices cross
while(lo < HI && NE[lo].getOcc() < mid) // while lowest keyword < midway keyword
lo++; // push lower sort boundary up by one element
while(hi > LO && NE[hi].getOcc() > mid) // while highest keyword > midway keyword
hi--; // pull upper sort boundary down by one element
if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS'
dicEnt x = NE[lo]; // get index of lo element
NE[lo] = NE[hi]; // put index of hi element in lo element
NE[hi] = x; // put index of lo element in hi element
lo++; // push lower sort boundary up by one element
hi--; // pull upper sort boundary down by one element
}
}
if(LO < hi) qs(LO, hi); // if hi not yet reached start of file sort lower partition
if(lo < HI) qs(lo, HI); // if lo not yet reached end of file sort upper partition
}
}
}
class dicEnt { // Class of dictionary entries
private String W; // the word as it appears
private String w; // the word in lower case for sorting purposes
private int o; // number of occurrences of word within all files
dicEnt(String s) { // Construct a new dictionary entry
W = s; // write submitted new word into class instance
w = s.toLowerCase(); // store the word in lower case form
o = 1; // this is the first occurrence of this word
}
void incr() {o++;} // increment the word's occurrence count
String getWord() {return W;} // return the word as is
String getword() {return w;} // return the word in lower case
int getOcc() {return o;} // return its number of occurrences so far
void putWord(String W) {this.W = W;} // overwrite word in this entry with W
String format() { // convert this dictionary entry to a formatted string
String s = W; // the word string
for(int i = s.length(); i < 21; i++) // extend it with spaces
s += " "; // to make it 21 characters long
String t = Integer.toString(o); // convert its number of occurrences to a string
for(int i = t.length(); i < 11; i++) // pad it with leading spaces
t = " " + t; // to make it 11 characters long
return s + t; // return the concatenated strings
}
/* Robert J Morton, the author of this program,
is a poor but Right Honourable Fellow of the
Ancient and Noble Order of the Long-term Unemployed.
Offers of work please to: robmorton@clara.net */
}
This page's parent within this Web Site. About this Web Site. Its home page. Email its Author.