Instructions
Objective
Write program to search DNA sequence in C and MIPS assembly language.
Requirements and Specifications
Given a text file as input, we will count the number of unique words that appear. We want to count a word the first time we see it, then we never count the same word again. An open hash table is a great Data Structure to solve this problem.
for each word in the file do if word not in Hash Table then Count Unique Word Add to Hash Table end if Count Total Words end for Print Total Words in File Print Unique Words in File Print Hash Table Stats.
We need to define what a ”word” is. There may be many special characters and numbers in a file. Here are the rules for defining a word:
- Words are only broken by Space, Newline, Return, or End Of File
- Case Doesn’t matter, convert all words to lowercase
- Special Characters and numbers get ignored
Imagine we have the following sentence: ”They aren’t going to buy 17 cats.”
We would call the words in this file:
- they T became lower case
- arent single quote is ignored
- going
- to
- buy
- cats period is ignored
Notice that 17 was completely ignored because it contains no letters, only numbers.
Your program will take two command line inputs. The first is the size of the array as an integer. This tells you how much to make the array forming the foundation of your Open Hash Table. The second is the file to read.
You may assume the user will never call the program with bad inputs. We want to see how well the Hash Table did. Once the program is over, print out some statistics about the table. For each row, print out how many values were placed in that row. Then print out the average
number of values per row.
Screenshots of output






Source Code
Hash.c
/**
* Name: ?????
* Date: April 28, 2022
* File: hash.c
*/
#include "hash.h"
// Create a New Empty Hash
// Size of the underlying array is required
struct OpenHash* newOpenHash(int size)
{
struct OpenHash* oHash;
int i;
// allocate space for structure
oHash = (struct OpenHash*) malloc(sizeof(struct OpenHash));
if (oHash == NULL)
{
printf("Error allocating hash table\n");
exit(EXIT_FAILURE);
}
oHash->size = size; // set size
// allocate space for array of entries
oHash->entry = (struct Entry**) malloc(size * sizeof(struct Entry *));
if (oHash->entry == NULL)
{
printf("Error allocating hash table entries\n");
exit(EXIT_FAILURE);
}
// initialize all entries to null
for (i = 0; i < size; i++)
oHash->entry[i] = NULL;
return oHash; // return initialized table
}
// Hash A String
// See Psuedocode below for guidelines
int hash(char* string, int N)
{
int total = 0;
int i = 0;
char c;
while (string[i] != 0)
{
c = string[i];
total += (unsigned) (c) & 0xFF;
total *= 101;
total = total % N;
i++;
}
return total;
}
// Is the item stringa member of the hash table H
// Return 1 for Yes and 0 for No
char member(char* string, struct OpenHash* H)
{
struct Entry *entry;
int h = hash(string, H->size); // get string hash
entry = H->entry[h];
while (entry != NULL) // search in chained list for entry
{
if (!strcmp(string, entry->word)) // if word is found in entry
return 1;
entry = entry->next; // else advance to next entry in chained list
}
return 0; // not found
}
// Insert string into hash table H
void insert(char* string, struct OpenHash* H)
{
struct Entry *entry, *currPtr, *prevPtr;
int h = hash(string, H->size); // get string hash
// create new entry with the string
entry = (struct Entry*) malloc(sizeof(struct Entry));
if (entry == NULL)
{
deleteOpenHash(H);
printf("Error allocating hash table entry\n");
exit(EXIT_FAILURE);
}
entry->next = NULL;
// allocate space for word
entry->word = (char *) malloc(strlen(string) + 1);
if (entry == NULL)
{
free(entry);
deleteOpenHash(H);
printf("Error allocating hash table entry word\n");
exit(EXIT_FAILURE);
}
strcpy(entry->word, string); // copy word to entry
if (H->entry[h] == NULL) // if there were no entries in row, add at start
H->entry[h] = entry;
else
{
prevPtr = NULL;
currPtr = H->entry[h];
while (currPtr != NULL) // search for last entry in row chain
{
if (!strcmp(string, currPtr->word)) // if word is found in entry
{
free(entry->word); // discard entry
free(entry);
return; // don't add
}
prevPtr = currPtr; // update previous pointer
currPtr = currPtr->next; // advance to next entry in chained list
}
prevPtr->next = entry; // insert at end of chain
}
}
// Hash Stats
// Print stats required for final project
// See below for what stats this should print
void hashStats(struct OpenHash* H)
{
int numValues;
int i;
struct Entry *entry;
float sumValues;
printf("Hash Size: %d\n", H->size);
sumValues = 0.0;
for (i = 0; i < H->size; i++) // go through all rows
{
numValues = 0;
entry = H->entry[i]; // get first entry
while(entry != NULL) // if not null
{
numValues++; // increment values in row
entry = entry->next;
}
sumValues += numValues; // add to total number of values
printf("Row %d contains %d values.\n", i, numValues);
}
printf("Average Length: %.2f\n", sumValues / H->size);
}
// Deletes the open hash entries and frees allocated memory
void deleteOpenHash(struct OpenHash *H)
{
struct Entry *entry, *temp;
int i;
if (H != NULL)
{
for (i = 0; i < H->size; i++)
{
entry = H->entry[i];
while (entry != NULL)
{
temp = entry;
entry = entry->next; // advance to next entry in chained list
free(temp->word); // free allocated memory for word
free(temp); // free entry
}
}
free(H->entry); // free table entry array
free(H); // free table
}
}
Hash.h
/**
* Name: ?????
* Date: April 28, 2022
* File: hash.h
*/
#ifndef HASH_H
#define HASH_H
#include
#include
#include
// Definition of the hash table entry
struct Entry
{
char *word; // saved word
struct Entry *next; // chained entries
};
// Definition of the open hash structure
struct OpenHash
{
struct Entry **entry; // hash table entries
int size; // hash table size
};
// Create a New Empty Hash
// Size of the underlying array is required
struct OpenHash* newOpenHash(int size);
// Hash A String
// See Psuedocode below for guidelines
int hash(char* string, int N);
// Is the item stringa member of the hash table H
// Return 1 for Yes and 0 for No
char member(char* string, struct OpenHash* H) ;
// Insert string into hash table H
void insert(char* string, struct OpenHash* H) ;
// Hash Stats
// Print stats required for final project
// See below for what stats this should print
void hashStats(struct OpenHash* H);
// Deletes the open hash entries
void deleteOpenHash(struct OpenHash *H);
// Deletes the open hash entries and frees allocated memory
void saveHash(char *filename, struct OpenHash *H);
#endif /* HASH_H */
wordCount.c
/**
* Name: ?????
* Date: April 28, 2022
* File: wordCounts.c
*/
#include
#include
#include
#include
#include "hash.h"
// Load a given text file
char *loadFile(char *filename);
// Normalize word by removing special characters, converting to lowercase
void normalizeWord(char *word);
int main(int argc, char **argv)
{
int size;
char *contents;
char *word;
struct OpenHash *table;
int uniqueWords;
int totalWords;
if (argc != 3) // if bad number of arguments
{
printf("Usage:\n");
printf(" %s tableSize inputFile\n", argv[0]);
return 0;
}
size = atoi(argv[1]); // get size
contents = loadFile(argv[2]); // load file
table = newOpenHash(size); // create hash table
uniqueWords = 0; // initialize counters to zero
totalWords = 0;
word = strtok(contents, " \t\r\n"); // separate word by spaces
while (word != NULL) // for each word in the file do
{
normalizeWord(word); // normalize word
if (word[0] != 0) // if not empty string
{
if (!member(word, table)) // if word not in Hash Table then
{
uniqueWords++; // Count Unique Word
insert(word, table); // Add to Hash Table
}
totalWords++; // Count Total Words
}
word = strtok(NULL, " \t\r\n"); // separate word by spaces
}
printf("Total Words: %d\n", totalWords); // Print Total Words in File
printf("Unique Words: %d\n", uniqueWords); // Print Unique Words in File
hashStats(table); //Print Hash Table Stats
free(contents); // free file contents
deleteOpenHash(table); // remove all entries in table
return 0;
}
// Load a given text file
char *loadFile(char *filename)
{
FILE *file;
size_t fileSize;
char *contents;
file = fopen(filename, "rt"); // open file as read only
if (file == NULL)
{
printf("Error opening file '%s'\n", filename);
exit(EXIT_FAILURE);
}
fseek(file, 0, SEEK_END); // move to end of file
fileSize = ftell(file); // get file size
fseek(file, 0, SEEK_SET); // move to start of file
contents = (char *) malloc(fileSize); // create buffer for file
if (contents == NULL)
{
printf("Error allocating buffer for file\n");
exit(EXIT_FAILURE);
}
fread(contents, 1, fileSize, file); // read all file contents
fclose(file);
return contents;
}
// Normalize word by removing special characters, converting to lowercase
void normalizeWord(char *word)
{
int i, j;
int len = strlen(word);
for (i = 0; i < len; i++)
word[i] = tolower(word[i]); // convert to lowercase
i = 0;
while (word[i] != 0) // check all characters in string
{
if (!isalpha(word[i])) // if special char or digit
{
for (j = i; j < len; j++) // remove char from word
word[j] = word[j + 1];
len--; // decrement word length
}
else
i++;
}
}