Processing CSV Files in C
/*
File: hw4Sort.c
Author: ///
Date: April 27, 2017
Statement:
"Your statement that the program is entirely your work and that you have
neither developed your code together with any other person nor copied program
code from any other person, nor permitted your code to be copied or otherwise
used by any other person, nor have you copied, modified, or otherwise used
program code that you have found in any external source, including but not
limited to, online sources"
*/
#include
#include
#include
#define MAX_LINE 512 // maximum line length
#define SORT_BY_LOCID 0
#define SORT_BY_LAT 1
typedef struct airPdata {
int seqNumber; //The process output's sequence number
char *LocID; //Airport's "Short Name", ie MCO
char *fieldName; //Airport Name
char *city; //Associated City
float longitude; //Longitude
float latitude; //Latitude
}airPdata;
/* Struct for making a linked list */
typedef struct lListAirPdata
{
airPdata *data;
struct lListAirPdata *next;
}lListAirPdata;
/* struct for making an AVL */
typedef struct AVLTree
{
airPdata *key;
struct AVLTree *left;
struct AVLTree *right;
struct AVLTree *parent;
int balance;
}AVLTree;
typedef int (*CmpFunc)(airPdata *a,airPdata *b); /* comparison function definition */
int max(int a, int b)
{
return a > b ? a : b;
}
/* safe version of malloc to detect when malloc fails */
void *safe_malloc(size_t size)
{
void *data;
data=malloc(size);
if(data==NULL)
{
printf("Out of memory!\n");
exit(1);
}
return data;
}
/* Print the program usage */
void print_usage(char **argv)
{
printf("Usage:\n\t%s\tfilename.ext sortParameter\n",argv[0]);
printf("\nDescription:\n\tfilename.ext\tinput filename\n");
printf("\tsortParameter\tmust be either \"a\" for alphabetical sort\n");
printf("\t \tor \"n\" for north bound exit\n");
}
/* Validates the command line arguments, if an error is found it doesn't return */
void validate_arguments(int argc,char **argv)
{
if(argc<2 || argc>3)
{
printf("hw4Sort ERRROR: Invalid number of arguments.\n");
print_usage(argv);
exit(1);
}
else if(argc==2)
{
if(!strcmp(argv[1],"-h"))
{
print_usage(argv);
exit(0);
}
else if(argv[1][1]=='\0')
{
printf("hw4Sort ERRROR: Filename missing.\n");
print_usage(argv);
exit(1);
}
else
{
printf("hw4Sort ERRROR: sortParameter not found.\n");
print_usage(argv);
exit(1);
}
}
}
/* validate a numeric string, it returns 1 if the string contains only digits
before the endchar, endpos contains the position after the last digit.
Otherwise, it returns 0, and endpos is not modified.
*/
int valid_number(char *str,char endchar,int *endpos)
{
int i=0;
while(str[i]!='\0' && str[i]!=endchar)
{
if(str[i]<'0' || str[i]>'9')
return 0;
i++;
}
if(str[i]!=endchar)
return 0;
*endpos=i;
return 1;
}
/* converts a DD-MM-SS.MASD string to a decimal degree float, if the string
doesn't have the correct format it returns 0.0
*/
float sexag2decimal(char *degreeString)
{
int n,start,end;
int degrees,minutes;
float ssmas;
float decimal;
int sign;
if(degreeString==NULL)
return 0.0;
start=0;
if(!valid_number(°reeString[start],'-',&end))
return 0.0;
degreeString[start+end]=0;
degrees=atoi(°reeString[start]);
if(degrees>180) // valid global degrees 0-180
return 0.0;
if(degrees>99) // valid US degrees 0-99
return 0.0;
start+=end+1;
if(!valid_number(°reeString[start],'-',&end))
return 0.0;
degreeString[start+end]=0;
minutes=atoi(°reeString[start]);
if(minutes>59) // valid minutes 0-59
return 0.0;
start+=end+1;
n=start+strlen(°reeString[start]);
if(degreeString[n-1]=='N' || degreeString[n-1]=='E')
sign=+1;
else if(degreeString[n-1]=='S' || degreeString[n-1]=='W')
sign=-1;
else
return 0.0;
degreeString[n-1]=0;
n=start;
if(!valid_number(°reeString[start],'.',&end))
return 0.0;
start+=end+1;
if(!valid_number(°reeString[start],'\0',&end))
return 0.0;
ssmas=atof(°reeString[n]);
if(ssmas>59.9999) // valid seconds.milliarcseconds 0-59.0-9999
return 0.0;
decimal = sign*(degrees + minutes/60.0 + ssmas/3600.0);
return decimal;
}
/* insert a new node to linked list */
void insert(airPdata *data,lListAirPdata *list)
{
lListAirPdata *node;
node=(lListAirPdata*)safe_malloc(sizeof(lListAirPdata));
node->next=list->next; // insert at the start
node->data=data;
list->next=node;
}
/* print the contents of the list */
void print(lListAirPdata *list)
{
lListAirPdata *node;
if(list==NULL)
return;
node=list->next;
printf("code,name,city,lat,lon\n");
while(node)
{
printf("%s,",node->data->LocID);
printf("%s,",node->data->fieldName);
printf("%s,",node->data->city);
printf("%.4f,",node->data->latitude);
printf("%.4f\n",node->data->longitude);
node=node->next;
}
}
/* free the contents of the list */
void free_list(lListAirPdata *list)
{
lListAirPdata *node,*tmp;
if(list==NULL)
return;
node=list->next;
while(node)
{
tmp=node;
node=node->next;
free(tmp->data->LocID);
free(tmp->data->fieldName);
free(tmp->data->city);
free(tmp->data);
free(tmp);
}
}
/* Create a new AVL tree node */
AVLTree *new_tree_node(airPdata *key,AVLTree *parent)
{
AVLTree *node;
node=(AVLTree *)safe_malloc(sizeof(AVLTree));
node->left=NULL;
node->right=NULL;
node->key=key;
node->parent=parent;
node->balance=0;
return node;
}
/* Calculate the AVL tree node height */
int tree_height(AVLTree *n)
{
if (n == NULL)
return -1;
return 1 + max(tree_height(n->left), tree_height(n->right));
}
/* Set the AVL tree node balance */
void set_tree_balance(AVLTree *n)
{
n->balance = tree_height(n->right) - tree_height(n->left);
}
/* Rotate the AVL tree node to the left */
AVLTree *tree_rotate_l(AVLTree *a) {
AVLTree *b = a->right;
b->parent = a->parent;
a->right = b->left;
if (a->right != NULL)
a->right->parent = a;
b->left = a;
a->parent = b;
if (b->parent != NULL)
{
if (b->parent->right == a)
b->parent->right = b;
else
b->parent->left = b;
}
set_tree_balance(a);
set_tree_balance(b);
return b;
}
/* Rotate the AVL tree node to the left */
AVLTree *tree_rotate_r(AVLTree *a) {
AVLTree *b = a->left;
b->parent = a->parent;
a->left = b->right;
if (a->left != NULL)
a->left->parent = a;
b->right = a;
a->parent = b;
if (b->parent != NULL)
{
if (b->parent->right == a)
b->parent->right = b;
else
b->parent->left = b;
}
set_tree_balance(a);
set_tree_balance(b);
return b;
}
/* Rotate the AVL tree node to the left and then to the right */
AVLTree *tree_rotate_lr(AVLTree *n) {
n->left = tree_rotate_l(n->left);
return tree_rotate_r(n);
}
/* Rotate the AVL tree node to the right and then to the left */
AVLTree *tree_rotate_rl(AVLTree *n) {
n->right = tree_rotate_r(n->right);
return tree_rotate_l(n);
}
/* Rebalance the AVL tree node, moving the root if necessary */
void tree_rebalance(AVLTree *n,AVLTree **root)
{
set_tree_balance(n);
if (n->balance == -2) {
if (tree_height(n->left->left) >= tree_height(n->left->right))
n = tree_rotate_r(n);
else
n = tree_rotate_lr(n);
}
else if (n->balance == 2) {
if (tree_height(n->right->right) >= tree_height(n->right->left))
n = tree_rotate_l(n);
else
n = tree_rotate_rl(n);
}
if (n->parent != NULL) {
tree_rebalance(n->parent,root);
}
else {
*root = n;
}
}
/* Insert a new leaf in the the AVL tree, use the give compare function for
comparing values */
int tree_insert(airPdata *key,AVLTree **root,CmpFunc compare)
{
AVLTree *n,*parent;
int goLeft;
if (*root == NULL)
*root = new_tree_node(key, NULL);
else
{
n = *root;
while (1)
{
if (!compare(n->key,key))
return 0;
parent = n;
goLeft = compare(n->key,key)>0; // n->key > key;
n = goLeft ? n->left : n->right;
if (n == NULL)
{
if (goLeft)
parent->left = new_tree_node(key, parent);
else
parent->right = new_tree_node(key, parent);
tree_rebalance(parent,root);
break;
}
}
}
return 1;
}
/* Free all allocated nodes on the tree */
void free_tree(AVLTree *tree)
{
if(tree==NULL)
return;
free_tree(tree->right);
free_tree(tree->left);
free(tree);
}
/* save the tree in the airPdata linked list */
void tree_dump(AVLTree *tree,lListAirPdata *list)
{
if(tree==NULL)
return;
tree_dump(tree->right,list);
insert(tree->key,list);
tree_dump(tree->left,list);
}
/* Sort the airport list using an AVL tree and using the given comparison
function, the linked list is modified and in return, it will contain the sorted
data */
void sort(lListAirPdata *airports,CmpFunc compare)
{
lListAirPdata *node,*tmp;
AVLTree *tree=NULL;
if(airports==NULL)
return;
node=airports->next;
while(node)
{
tmp=node;
tree_insert(node->data,&tree,compare);
node=node->next;
free(tmp);
}
airports->next=NULL;
tree_dump(tree,airports);
free_tree(tree); // free allocated space
}
/* Comparison function to compare by location ID */
int compare_by_LocID(airPdata *a,airPdata *b)
{
return strcmp(a->LocID,b->LocID);
}
void sortByLocID(lListAirPdata *airports)
{
sort(airports,compare_by_LocID);
}
/* Comparison function to compare by location latitude */
int compare_by_latitude(airPdata *a,airPdata *b)
{
return (int)((a->latitude-b->latitude)*10000);
}
void sortByLatitude(lListAirPdata *airports)
{
sort(airports,compare_by_latitude);
}
/* Determines if the given location ID is a string of 3 or 4 letters without
numbers, if so it returns 1, otherwiese return 0 */
int valid_locID(char *locID)
{
int i,n;
n=strlen(locID);
if(n!=3 && n!=4)
return 0;
for(i=0; i='0' && locID[i]<='9')
return 0;
return 1;
}
/* load the given file and parse its contents saving the parsed content in a
linked list of lListAirPdata structures */
void load_file(char *filename,lListAirPdata *list)
{
FILE *file;
char *line;
int i,start,ntoken,valid;
airPdata *data;
file=fopen(filename,"rt");
if(file==NULL)
{
printf("hw4Sort ERRROR: File \"%s\" not found.\n",filename);
exit(1);
}
list->next=NULL;
line=(char*)safe_malloc(MAX_LINE);
while(!feof(file))
{
fgets(line,MAX_LINE,file);
i=start=ntoken=0;
valid=1;
while(line[i] && valid)
{
if(line[i]==',' || line[i]=='\n')
{
line[i]='\0';
switch(ntoken)
{
case 1: if((valid=valid_locID(&line[start]))!=0)
{
data=(airPdata *)safe_malloc(sizeof(airPdata));
data->LocID=(char*)safe_malloc(i-start+1);
strcpy(data->LocID,&line[start]);
}
break;
case 2: data->fieldName=(char*)safe_malloc(i-start+1);
strcpy(data->fieldName,&line[start]);
break;
case 3: data->city=(char*)safe_malloc(i-start+1);
strcpy(data->city,&line[start]);
break;
case 8: data->latitude=sexag2decimal(&line[start]);
break;
case 9: data->longitude=sexag2decimal(&line[start]);
break;
}
start=i+1;
ntoken++;
}
i++;
}
if(valid)
insert(data,list);
}
free(line);
fclose(file);
}
int main(int argc,char **argv)
{
int sort_type;
lListAirPdata list;
validate_arguments(argc,argv);
if(argv[2][1]=='\0' && (argv[2][0]=='a' || argv[2][0]=='A'))
sort_type=SORT_BY_LOCID;
else if(argv[2][1]=='\0' && (argv[2][0]=='n' || argv[2][0]=='N'))
sort_type=SORT_BY_LAT;
else
{
printf("hw4Sort ERRROR: sortParameter is not valid.\n");
print_usage(argv);
exit(1);
}
load_file(argv[1],&list);
if(sort_type==SORT_BY_LOCID)
sortByLocID(&list);
else
sortByLatitude(&list);
print(&list);
free_list(&list);
return 0;
}