+1 (315) 557-6473 

Create A Program To Count Words Program Using Hash Table In C Assignment Solution.


Instructions

Objective
Write program to search DNA sequence in C and MIPS assembly language.

Requirements and Specifications

Given a text file as input, we will count the number of unique words that appear. We want to count a word the first time we see it, then we never count the same word again. An open hash table is a great Data Structure to solve this problem.
for each word in the file do if word not in Hash Table then Count Unique Word Add to Hash Table end if Count Total Words end for Print Total Words in File Print Unique Words in File Print Hash Table Stats.
We need to define what a ”word” is. There may be many special characters and numbers in a file. Here are the rules for defining a word:
  • Words are only broken by Space, Newline, Return, or End Of File
  • Case Doesn’t matter, convert all words to lowercase
  • Special Characters and numbers get ignored
Imagine we have the following sentence: ”They aren’t going to buy 17 cats.”
We would call the words in this file:
  1. they T became lower case
  2. arent single quote is ignored
  3. going
  4. to
  5. buy
  6. cats period is ignored
Notice that 17 was completely ignored because it contains no letters, only numbers.
Your program will take two command line inputs. The first is the size of the array as an integer. This tells you how much to make the array forming the foundation of your Open Hash Table. The second is the file to read.
You may assume the user will never call the program with bad inputs. We want to see how well the Hash Table did. Once the program is over, print out some statistics about the table. For each row, print out how many values were placed in that row. Then print out the average
number of values per row.
Screenshots of output
Word count program using Hash table in C
Word count program using Hash table in C 1
Word count program using Hash table in C 2
Word count program using Hash table in C 3
Word count program using Hash table in C 4
Word count program using Hash table in C 5
Source Code
Hash.c
/**
 * Name: ?????
 * Date: April 28, 2022
 * File: hash.c
 */
#include "hash.h"
// Create a New Empty Hash
// Size of the underlying array is required
struct OpenHash* newOpenHash(int size)
{
    struct OpenHash* oHash;
    int i;
    // allocate space for structure
    oHash = (struct OpenHash*) malloc(sizeof(struct OpenHash));
    if (oHash == NULL)
    {
        printf("Error allocating hash table\n");
        exit(EXIT_FAILURE);
    }
    oHash->size = size; // set size
    // allocate space for array of entries
    oHash->entry = (struct Entry**) malloc(size * sizeof(struct Entry *));
    if (oHash->entry == NULL)
    {
        printf("Error allocating hash table entries\n");
        exit(EXIT_FAILURE);
    }
    // initialize all entries to null
    for (i = 0; i < size; i++)
        oHash->entry[i] = NULL;
    return oHash; // return initialized table
}
// Hash A String
// See Psuedocode below for guidelines
int hash(char* string, int N)
{
    int total = 0;
    int i = 0;
    char c;
    while (string[i] != 0)
    {
        c = string[i];
        total += (unsigned) (c) & 0xFF;
        total *= 101;
        total = total % N;
        i++;
    }
    return total;
}
// Is the item stringa member of the hash table H
// Return 1 for Yes and 0 for No
char member(char* string, struct OpenHash* H)
{
    struct Entry *entry;
    int h = hash(string, H->size); // get string hash
    entry = H->entry[h];
    while (entry != NULL) // search in chained list for entry
    {
        if (!strcmp(string, entry->word)) // if word is found in entry
            return 1;
        entry = entry->next; // else advance to next entry in chained list
    }
    return 0; // not found
}
// Insert string into hash table H
void insert(char* string, struct OpenHash* H)
{
    struct Entry *entry, *currPtr, *prevPtr;
    int h = hash(string, H->size); // get string hash
    // create new entry with the string
    entry = (struct Entry*) malloc(sizeof(struct Entry));
    if (entry == NULL)
    {
        deleteOpenHash(H);
        printf("Error allocating hash table entry\n");
        exit(EXIT_FAILURE);
    }
    entry->next = NULL;
    // allocate space for word
    entry->word = (char *) malloc(strlen(string) + 1);
    if (entry == NULL)
    {
        free(entry);
        deleteOpenHash(H);
        printf("Error allocating hash table entry word\n");
        exit(EXIT_FAILURE);
    }
    strcpy(entry->word, string); // copy word to entry
    if (H->entry[h] == NULL) // if there were no entries in row, add at start
        H->entry[h] = entry;
    else
    {
        prevPtr = NULL;
        currPtr = H->entry[h];
        while (currPtr != NULL) // search for last entry in row chain
        {
            if (!strcmp(string, currPtr->word)) // if word is found in entry
            {
                free(entry->word); // discard entry
                free(entry);
                return; // don't add
            }
            prevPtr = currPtr; // update previous pointer
            currPtr = currPtr->next; // advance to next entry in chained list
        }
        prevPtr->next = entry; // insert at end of chain
    }
}
// Hash Stats
// Print stats required for final project
// See below for what stats this should print
void hashStats(struct OpenHash* H)
{
    int numValues;
    int i;
    struct Entry *entry;
    float sumValues;
    printf("Hash Size: %d\n", H->size);
    sumValues = 0.0;
    for (i = 0; i < H->size; i++) // go through all rows
    {
        numValues = 0;
        entry = H->entry[i]; // get first entry
        while(entry != NULL) // if not null
        {
            numValues++; // increment values in row
            entry = entry->next;
        }
        sumValues += numValues; // add to total number of values
        printf("Row %d contains %d values.\n", i, numValues);
    }
    printf("Average Length: %.2f\n", sumValues / H->size);
}
// Deletes the open hash entries and frees allocated memory
void deleteOpenHash(struct OpenHash *H)
{
    struct Entry *entry, *temp;
    int i;
    if (H != NULL)
    {
        for (i = 0; i < H->size; i++)
        {
            entry = H->entry[i];
            while (entry != NULL)
            {
                temp = entry;
                entry = entry->next; // advance to next entry in chained list
                free(temp->word); // free allocated memory for word
                free(temp); // free entry
            }
        }
        free(H->entry); // free table entry array
        free(H); // free table
    }
}
Hash.h
/**
 * Name: ?????
 * Date: April 28, 2022
 * File: hash.h
 */
#ifndef HASH_H
#define HASH_H
#include
#include
#include
// Definition of the hash table entry
struct Entry
{
    char *word; // saved word
    struct Entry *next; // chained entries
};
// Definition of the open hash structure
struct OpenHash
{
    struct Entry **entry; // hash table entries
    int size; // hash table size
};
// Create a New Empty Hash
// Size of the underlying array is required
struct OpenHash* newOpenHash(int size);
// Hash A String
// See Psuedocode below for guidelines
int hash(char* string, int N);
// Is the item stringa member of the hash table H
// Return 1 for Yes and 0 for No
char member(char* string, struct OpenHash* H) ;
// Insert string into hash table H
void insert(char* string, struct OpenHash* H) ;
// Hash Stats
// Print stats required for final project
// See below for what stats this should print
void hashStats(struct OpenHash* H);
// Deletes the open hash entries
void deleteOpenHash(struct OpenHash *H);
// Deletes the open hash entries and frees allocated memory
void saveHash(char *filename, struct OpenHash *H);
#endif /* HASH_H */
wordCount.c
/**
 * Name: ?????
 * Date: April 28, 2022
 * File: wordCounts.c
 */
#include
#include
#include
#include
#include "hash.h"
// Load a given text file
char *loadFile(char *filename);
// Normalize word by removing special characters, converting to lowercase
void normalizeWord(char *word);
int main(int argc, char **argv)
{
    int size;
    char *contents;
    char *word;
    struct OpenHash *table;
    int uniqueWords;
    int totalWords;
    if (argc != 3) // if bad number of arguments
    {
        printf("Usage:\n");
        printf(" %s tableSize inputFile\n", argv[0]);
        return 0;
    }
    size = atoi(argv[1]); // get size
    contents = loadFile(argv[2]); // load file
    table = newOpenHash(size); // create hash table
    uniqueWords = 0; // initialize counters to zero
    totalWords = 0;
    word = strtok(contents, " \t\r\n"); // separate word by spaces
    while (word != NULL) // for each word in the file do
    {
        normalizeWord(word); // normalize word
        if (word[0] != 0) // if not empty string
        {
            if (!member(word, table)) // if word not in Hash Table then
            {
                uniqueWords++; // Count Unique Word
                insert(word, table); // Add to Hash Table
            }
            totalWords++; // Count Total Words
        }
        word = strtok(NULL, " \t\r\n"); // separate word by spaces
    }
    printf("Total Words: %d\n", totalWords); // Print Total Words in File
    printf("Unique Words: %d\n", uniqueWords); // Print Unique Words in File
    hashStats(table); //Print Hash Table Stats
    free(contents); // free file contents
    deleteOpenHash(table); // remove all entries in table
    return 0;
}
// Load a given text file
char *loadFile(char *filename)
{
    FILE *file;
    size_t fileSize;
    char *contents;
    file = fopen(filename, "rt"); // open file as read only
    if (file == NULL)
    {
        printf("Error opening file '%s'\n", filename);
        exit(EXIT_FAILURE);
    }
    fseek(file, 0, SEEK_END); // move to end of file
    fileSize = ftell(file); // get file size
    fseek(file, 0, SEEK_SET); // move to start of file
    contents = (char *) malloc(fileSize); // create buffer for file
    if (contents == NULL)
    {
        printf("Error allocating buffer for file\n");
        exit(EXIT_FAILURE);
    }
    fread(contents, 1, fileSize, file); // read all file contents
    fclose(file);
    return contents;
}
// Normalize word by removing special characters, converting to lowercase
void normalizeWord(char *word)
{
    int i, j;
    int len = strlen(word);
    for (i = 0; i < len; i++)
        word[i] = tolower(word[i]); // convert to lowercase
    i = 0;
    while (word[i] != 0) // check all characters in string
    {
        if (!isalpha(word[i])) // if special char or digit
        {
            for (j = i; j < len; j++) // remove char from word
                word[j] = word[j + 1];
            len--; // decrement word length
        }
        else
            i++;
    }
}