Spell checking with hashtable

1 Hashtable
1.1 Summary

You will design and implement a hashtable Structure. The application of the structure
will include spell checking and word counting. Words in an inputFile will be sequentially
investigated and compared to a comprehensive list of correctly spelled words – a lexicon.
The lexicon will be stored in a hash table structure to provide for an efficient search time.
Major factors to consider for the design should be theoretical efficiency (both time and
space), practical efficiency, and appropriate memory management. Although some tips are
provided below (and have been provided in class), the determination of how to make the
structure efficient is largely your charge.

1.2 Programming Languages

You can submit your projects using C++. You may choose a different programming language. However there are caveats as not all programming languages have the same characteristics. For example in C++ you cannot usepre-existing such as vectors, stacks, lists, trees etc.
To help facilitate io and timing, you may use ctime, string, ifstream, iostream, and string
stream.

1.3 Input Requirements

The program will take three command line arguments (Do not prompt the user).
./p5 [dictionaryFile] [inputFile]
Argument 2 is inputFile: the name of a text file to be spell-checked. Argument 1 is dictionaryFile: the name of a text file that contains all correctly spelled words (a lexicon).

1.4 Structural and Operational Requirements

  1. Hashtable Implementation
    • Collision resolution or mitigation should be considered.
  • Initialize the outer hash to a “reasonable” size. Consider the following: the interior
    hashes will be quadratic in the size of the number of collisions per bucket, thus
    you should initialize the outer hash to a size large enough to keep the number of
    collisions low, e.g. the size of the lexicon should be a good guess here.
    • Constructing the hash table should be done as follows:
    (a) Initialize the outer hashtable and randomly draw a hash function for use with the outer table. This first draw is kept usually without question or confirmation of “goodness”.
    (b) Hash the lexicon into the outer table and count the number of collisions in
    each bucket.
    (c) For each bucket, use the quadratic-space method discussed in class: intialize
    each inner table to square size of the number of collisions for that bucket.
    Keep randomly drawing a hash function for each bucket until no inner collisions.
  1. Application: Spell Checking
    • The program sequentially investigates each word in inputFile and confirm that
    this word exists in the lexicon. If so, it is assumed the word is spelled correctly;
    if not, the word is spelled incorrectly.
    • The program will keep a count of the number of misspelled words in inputFile.

Solution 

linux file 

Hash.cpp

#include <iostream>

#include <cstdlib>

#include <iomanip>

#include <string>

#include <fstream>

#include <assert.h>

#include <time.h>

using namespace std;

string replace(string str1, string str2, string str3)

{

size_t position = 0;

for ( position = str1.find(str2); position != std::string::npos; position = str1.find(str2,position) )

{

str1.replace(position ,1, str3);

}

return(str1);

}

classhashClass{

private :

staticconstinttableSize = 10;

structwordStruct {

string word;

wordStruct* next;

};

wordStruct* Hashtable[tableSize];

public:

hashClass();

int Hash(string key);

voidaddword(string word);

int numbers(int index);

boolfindword(string word);

voidprinttable();

intgettableSize();

};

voidhashClass::printtable(){

for(int i=0; i <tableSize ; i++)

{

cout<<“—————–\n”;

cout<< “index = ” << i <<endl;

cout<<Hashtable[i]->word <<endl;

cout<< “# of items ” << numbers(i) <<endl;

cout<< “—————–\n”;

}

}

hashClass::hashClass(){

for(int i=0;i<tableSize ; i++){

Hashtable[i] = new wordStruct;

Hashtable[i]->word = “empty”;

Hashtable[i]->next = NULL ;

}

}

voidhashClass::addword(string word){

string s;

s= replace(word, ” “, “”);

s = replace(s, “.”, “”);

s = replace(s, “:”, “”);

s = replace(s, “;”, “”);

s = replace(s, “,”, “”);

int index = Hash(s);

char a[1000];

if(Hashtable[index]->word == “empty”){

Hashtable[index]->word = s;

}

else

{

wordStruct* ptr = Hashtable[index];

wordStruct* n = new wordStruct;

n->word = s;

n->next = NULL;

while(ptr->next != NULL){ ptr = ptr->next;}

ptr->next = n;

}

//cout<< s <<endl;

}

inthashClass::Hash(string key){

int hash = 0;

int index;

for(int i=0;i<key.length();i++){

if((int)key[i] == 32) continue;

hash = hash + (int)key[i];

}

index = hash % tableSize;

//cout<< key << “of word index is: “<< index <<endl;

return index;

}

boolhashClass::findword(string word)

{

string s =word;

s = replace(s, ” “, “”);

s = replace(s, “.”, “”);

s = replace(s, “:”, “”);

s = replace(s, “;”, “”);

s = replace(s, “,”, “”);

int index = Hash(s);

bool found = false;

wordStruct* ptr = Hashtable[index];

while(ptr != NULL){

if(ptr->word == s) {found = true;}

ptr = ptr->next;}

//if(!found) {cout<<endl;cout<<s<<“- word not found\n”;}

return found;

}

inthashClass::numbers(int index){

int count = 0;

if(Hashtable[index]->word == “empty”)

{

return count;

}

else{

wordStruct* ptr = Hashtable[index];

while(ptr->next != NULL)

{

count++;

ptr = ptr->next;

}

}

return count+1;

}

inthashClass::gettableSize(){

returntableSize;

}

int main (intargc, char **argv) {

char a[1000000], b[40];

inti,z, counter=0;

string line , temp,s1,s2;

hashClasswordtable;

//cout<< “Creating Hash table with bucket size = “<<wordtable.gettableSize()<<endl;

ifstreammyfile, dic;

ofstreamwritefile;

dic.open(argv[1]);

//cout<< “Importing Words from Dictionarry file to Hash Class. \nThis may take several minutes…”<<endl;

constclock_tbegin_time = clock();

do{

getline(dic, temp);

wordtable.addword(temp);

// cout<< temp <<” entered in database with index number = ” <<wordtable.Hash(temp) <<endl;

}while(!(dic.eof()));

dic.close();

float s = float( clock () – begin_time ) /  CLOCKS_PER_SEC;

// cout<< “Checking words from inputFile “<<endl;

myfile.open(argv[2]);

do{

getline(myfile, temp);

line += temp;

line += “\n”;

}while(!(myfile.eof()));

strcpy(a,line.c_str());

for(i=0;a[i] != NULL;i++){

if(a[i]==’\n’) a[i]=32;

}

constclock_t begin_time1 = clock();

z=0;

for(i=0;a[i] != NULL;i++){

b[z] = a[i];

z++;

if(a[i] == 32) {

b[z] = NULL ;

z=0;

temp=b;

//if (temp.size () > 0)  temp.resize (temp.size () – 1);

if(!wordtable.findword(temp) && temp[0] != 32) counter++;

}

}

float p = float( clock () – begin_time1 ) /  CLOCKS_PER_SEC;

cout<<“\n\n”;

cout<< “\tNumber of misspelled words: ” << counter <<endl;

cout<< “\ttime to create perfect Hashtable: ” << s <<endl;

cout<< “\ttime to check for Misspelled words: ” << p <<endl;

cout<<endl;

system(“pause”);

return 0;

} 

VC file 

Hash.cpp

#include <iostream>

#include <cstdlib>

#include <iomanip>

#include <string>

#include <fstream>

#include <assert.h>

#include <time.h>

using namespace std;

string replace(string str1, string str2, string str3)

{

size_t position = 0;

for ( position = str1.find(str2); position != std::string::npos; position = str1.find(str2,position) )

{

str1.replace(position ,1, str3);

}

return(str1);

}

classhashClass{

private :

staticconstinttableSize = 10;

structwordStruct {

string word;

wordStruct* next;

};

wordStruct* Hashtable[tableSize];

public:

hashClass();

int Hash(string key);

voidaddword(string word);

int numbers(int index);

boolfindword(string word);

voidprinttable();

intgettableSize();

};

voidhashClass::printtable(){

for(int i=0; i <tableSize ; i++)

{

cout<<“—————–\n”;

cout<< “index = ” << i <<endl;

cout<<Hashtable[i]->word <<endl;

cout<< “# of items ” << numbers(i) <<endl;

cout<< “—————–\n”;

}

}

hashClass::hashClass(){

for(int i=0;i<tableSize ; i++){

Hashtable[i] = new wordStruct;

Hashtable[i]->word = “empty”;

Hashtable[i]->next = NULL ;

}

}

voidhashClass::addword(string word){

string s;

s= replace(word, ” “, “”);

s = replace(s, “.”, “”);

s = replace(s, “:”, “”);

s = replace(s, “;”, “”);

s = replace(s, “,”, “”);

int index = Hash(s);

char a[1000];

if(Hashtable[index]->word == “empty”){

Hashtable[index]->word = s;

}

else

{

wordStruct* ptr = Hashtable[index];

wordStruct* n = new wordStruct;

n->word = s;

n->next = NULL;

while(ptr->next != NULL){ ptr = ptr->next;}

ptr->next = n;

}

//cout<< s <<endl;

}

inthashClass::Hash(string key){

int hash = 0;

int index;

for(int i=0;i<key.length();i++){

if((int)key[i] == 32) continue;

hash = hash + (int)key[i];

}

index = hash % tableSize;

//cout<< key << “of word index is: “<< index <<endl;

return index;

}

boolhashClass::findword(string word)

{

string s =word;

s = replace(s, ” “, “”);

s = replace(s, “.”, “”);

s = replace(s, “:”, “”);

s = replace(s, “;”, “”);

s = replace(s, “,”, “”);

int index = Hash(s);

bool found = false;

wordStruct* ptr = Hashtable[index];

while(ptr != NULL){

if(ptr->word == s) {found = true;}

ptr = ptr->next;}

//if(!found) {cout<<endl;cout<<s<<“- word not found\n”;}

return found;

}

inthashClass::numbers(int index){

int count = 0;

if(Hashtable[index]->word == “empty”)

{

return count;

}

else{

wordStruct* ptr = Hashtable[index];

while(ptr->next != NULL)

{

count++;

ptr = ptr->next;

}

}

return count+1;

}

inthashClass::gettableSize(){

returntableSize;

}

int main (intargc, char **argv) {

char a[1000000], b[40];

inti,z, counter=0;

string line , temp,s1,s2;

hashClasswordtable;

//cout<< “Creating Hash table with bucket size = “<<wordtable.gettableSize()<<endl;

ifstreammyfile, dic;

ofstreamwritefile;

dic.open(“dictionaryFile.txt”);

//cout<< “Importing Words from Dictionarry file to Hash Class. \nThis may take several minutes…”<<endl;

constclock_tbegin_time = clock();

do{

getline(dic, temp);

wordtable.addword(temp);

// cout<< temp <<” entered in database with index number = ” <<wordtable.Hash(temp) <<endl;

}while(!(dic.eof()));

dic.close();

float s = float( clock () – begin_time ) /  CLOCKS_PER_SEC;

// cout<< “Checking words from inputFile “<<endl;

myfile.open(“inputFile.txt”);

do{

getline(myfile, temp);

line += temp;

line += “\n”;

}while(!(myfile.eof()));

strcpy(a,line.c_str());

for(i=0;a[i] != NULL;i++){

if(a[i]==’\n’) a[i]=32;

}

constclock_t begin_time1 = clock();

z=0;

for(i=0;a[i] != NULL;i++){

b[z] = a[i];

z++;

if(a[i] == 32) {

b[z] = NULL ;

z=0;

temp=b;

//if (temp.size () > 0)  temp.resize (temp.size () – 1);

if(!wordtable.findword(temp) && temp[0] != 32) counter++;

}

}

float p = float( clock () – begin_time1 ) /  CLOCKS_PER_SEC;

cout<<“\n\n”;

cout<< “\tNumber of misspelled words: ” << counter <<endl;

cout<< “\ttime to create perfect Hashtable: ” << s <<endl;

cout<< “\ttime to check for Misspelled words: ” << p <<endl;

cout<<endl;

system(“pause”);

return 0;

}