Instructions
Requirements and Specifications
- Before writing any code, reflect on the task requirements and constraints. Mentally explore different approaches and algorithms, considering their potential performance and costs. The metrics of merit are static code length, dynamic execution time, and storage requirements. There are often trade-offs between these parameters.
- Once a promising approach is chosen, a high level language (HLL) implementation (e.g., in C) can deepen its understanding. The HLL implementation is more flexible and convenient for exploring the solution space and should be written before constructing the assembly version where design changes are more costly and difficult to make. For P1-1, you will write a C assignment implementation of the program.
- Once a working C version is created, it’s time to “be the compiler” and see how it translates to MIPS assembly. This is an opportunity to see how HLL constructs are supported on a machine platform (at the ISA level). This level requires the greatest programming effort; but it also uncovers many new opportunities to increase performance and efficiency. You will write the assembly version for P1-2.





Source Code
/*
ECE 2035 Project P1-1
Please fill in the following
Student name:
Current date:
This is the only file that should be modified for the C implementation
of Project 1.
This program initializes a DNA sequence of 10,240 random characters and
a pattern of 3 to 7 random characters, all characters are from the DNA
alphabet {A, T, G, C}.
For Project P1-1, you will implement the function Match, called by main.
The function is defined at the end of this file.
*/
#include
#include
#include
// DO NOT include any additional libraries.
#define DEBUG 0 // RESET THIS TO 0 BEFORE SUBMITTING YOUR CODE
#define SEQLEN 10240
#define MAXPATLEN 10
#define NUMCHAR 4
char Alphabet[] = "ACTG";
int MatchIndices[SEQLEN];
int main(int argc, char *argv[]) {
char Seq[SEQLEN], Pat[MAXPATLEN];
int I, PatLen;
void Print_Seq(char *Seq, int SeqLen);
void Print_Pat(char *Pat, int PatLen);
void Match(char *Pat, int PatLen, char *Seq, int SeqLen);
if (DEBUG){
printf("Sample debugging print statement.\n");
}
srand((unsigned int) time(NULL)); // seed random number generator
PatLen = (rand() % MAXPATLEN) + 1; // compute pattern length
for (I = 0; I < SEQLEN; I++)
Seq[I] = Alphabet[rand() % NUMCHAR]; // create sequence
for (I = 0; I < PatLen; I++)
Pat[I] = Alphabet[rand() % NUMCHAR]; // create pattern
Print_Pat(Pat, PatLen); // print pattern
Print_Seq(Seq, SEQLEN); // print sequence
Match(Pat, PatLen, Seq, SEQLEN); // match pattern in sequence
printf("Pattern detected at the following locations:\n");
I = 0;
while (MatchIndices[I] != -1)
printf("Base pair %d in the sequence\n", MatchIndices[I++]);
return 0;
}
/* Print Sequence
This routine prints the sequence. */
void Print_Seq(char *Seq, int SeqLen) {
int I;
printf("The sequence is ...\n");
for (I = 0; I < SeqLen; I++) {
putchar(Seq[I]);
if (I % 80 == 79)
printf("\n");
}
}
/* Print Pattern
This routine prints the match pattern. */
void Print_Pat(char *Pat, int PatLen) {
int I;
printf("The pattern is ... \"");
for (I = 0; I < PatLen; I++)
putchar(Pat[I]);
printf("\"\n");
}
/* Match
This routine find indices of occurrences of a variable length DNA
pattern in a DNA sequence and stores them in the global array MatchIndices.
It stores a -1 at MatchIndices[n] (where n = number of occurrences of the pattern) to indicate the end of the sequence of indices. */
void Match(char Pat[], int PatLen, char Seq[], int SeqLen) {
// insert your code here
int indSeq; // index in dna sequence
int indPat; // index in pattern
int indMat; // index in matched indices
int matched; // 1 if pattern is matched, 0 otherwise
indMat = 0; // start at initial position in match indices array;
for (indSeq = 0; indSeq <= SeqLen - PatLen; indSeq++) // search pattern in all sequence positions
{
matched = 1; // assume pattern matches at start
for (indPat = 0; indPat < PatLen && matched; indPat++) // match all characters in pattern, continue while chars are equal
{
if (Seq[indSeq + indPat] != Pat[indPat]) // if not equal,
matched = 0; // pattern is not matched
}
if (matched) // if all pattern characters were matched
{
MatchIndices[indMat++] = indSeq; // save current sequence index as a match and increment matches
}
}
MatchIndices[indMat] = -1; // save ending -1
return;
}
Assembly Language
# DNA Search
#
# This program computes the indices of all (possibly overlapping) occurrences
# of a pattern string in an input DNA sequence, given in the array starting
# at the label Input. It stores the indices in the output array starting at
# label Indices.
#
# Please fill in the following
# Student name:
# Date:
.data
Input: .alloc 600
Indices: .alloc 4800
.text
DNAsearch: addi $1, $0, Input # point to input base
swi 548 # create DNA search
# your code goes here
srl $3, $2, 16 # get length from returned value
andi $2, $2, 0xFFFF # clear upper word to leave only the pattern
addi $4, $0, 1 # load a 1
sll $5, $3, 1 # length *2
sllv $4, $4, $5 # calculate 2 ^ length*2
addi $4, $4, -1 # calculate 2 ^ length*2 - 1, which is the bit mask for the given length
lw $5, 0($1) # load first word from input
lw $6, 4($1) # load second word from input
addi $1, $1, 8 # advance to third word
sll $6, $6, 16 # move second word to upper part
or $5, $5, $6 # combine both words into a single 32 bit one
addi $6, $0, 4800 # number of input characters
sub $6, $6, $3 # num - length
addi $6, $6, 1 # num - length + 1 = maximum index
addi $7, $0, Indices # point to start of indices
addi $8, $0, -1 # load -1
sw $8, 0($7) # store initial index as -1
addi $8, $0, 0 # start in position 0 in input
searchLoop: addi $9, $0, 8 # start check count
matchLoop: and $10, $4, $5 # leave only pattern length bits of input
bne $10, $2, nextMatch # if they are not equal, is not a match
sw $8, 0($7) # else, is a match, store index
addi $7, $7, 4 # move to next position in indices
nextMatch: srl $5, $5, 2 # shift right twice to advance to next character
addi $8, $8, 1 # increment position in input
beq $8, $6, endSearch # if i = max index end the search
addi $9, $9, -1 # decrement count
bne $9, $0, matchLoop # if not zero, do another match
lw $10, 0($1) # load next word from input
sll $10, $10, 16 # move second word to upper part
or $5, $5, $10 # combine both words into a single 32 bit one
addi $1, $1, 4 # advance to next word
j searchLoop # repeat the loop to search more matches
endSearch: addi $10, $0, -1 # load -1
sw $10, 0($7) # store last index as -1
addi $2, $0, Indices # store the output
swi 580 #
jr $31 # return to caller