/*********************************************************************** * Program: * Assignment 16, Assignment 16 * Brother Ercanbrack, CS235 * Author: * Derek Hildreth * Summary: * Searches through the full Book of Mormon and returns the top 100 * words used and how many times that word occured. Uses map. * * ******************************************************************** * ****NOTE for Bro. Ercanbrack:*************************************** * There is some unused commented code down below, and it is * there so that you know of the conditions I tested for, and * still never matched the output word for word. I was one off * in the overall words processed and a for some reason "below" * was on the list of top 100. **********************************************************************/ #include #include #include #include #include using namespace std; /*********************************************************************** * This is Main! ***********************************************************************/ int main(int argc, char* argv[]) { char* inFileName; string word; string seperate; int howMany; int wordCount = 0; int counter = 0; //****If there is argument after a.out, then...****// if (argc > 1) { inFileName = argv[1]; } //****If there is not argument after a.out, then...****// else { cout << "Enter input filename: "; inFileName = new char[80]; cin >> inFileName; } ifstream file; file.open(inFileName); //****Checks to see that file is valid.****// if (file.fail()) { cout << "Invalid" << endl; exit(1); } file >> word; map < string, int > theMap; map < int, string > theMap2; //****Read file into the list****// while (!file.eof()) { for (int i = 0; i < word.length(); i++) { //*****Converting all words to lowercase *****// word[i] = tolower(word[i]); // //*****Checking for case "Lamanites--Written" *****// // if (word[i] == '-' && word[i + 1] == '-' && isalpha(word[i + 2])) // { // seperate = word.substr(i + 2); // word.erase(i); // //****After seperating, the word needs to be lowercased*****// // for (int i = 0; i < seperate.length(); i++) // { // seperate[i] = tolower(seperate[i]); // } // theMap[seperate]++; // } //****Checking for cases of punctuation including "they're" ****// if (!isalpha(word[i]) && word[i] != '-') { word.replace(i, 1, "", 0); //****After deleting, the word needs to be lowercased... //****Particularly helpful in cases of "(Alalikiah)" *****// for (int i = 0; i < word.length(); i++) { word[i] = tolower(word[i]); } } //****Checking for cases of "Mormon--" ******// if (word[i] == '-' && word[i + 1] == '-') { word.erase(i); } // //****Checking for digits between 0 and 9 ******// // if (isdigit(word[i])) // { // word.erase(i); // } } //*****Checking for return or blank characters*****// // if (word.length() == 0) // { // file >> word; // } // else // { theMap[word]++; file >> word; // } } map < string, int > :: iterator it; map < int, string > :: reverse_iterator rit2; for (it = theMap.begin(); it != theMap.end(); it++) { howMany = it->second; theMap2[howMany] = it->first; wordCount += it->second; } cout << "\nNumber of words processed: " << wordCount << endl; cout << "100 most common words found and their frequencies: " << endl; for (rit2 = theMap2.rbegin(); rit2 != theMap2.rend(); ++rit2) { if (counter < 100) cout << setw(23) << setfill(' ') << rit2->second << " - " << rit2->first << endl; else break; counter++; } }