/* This routine uses C/C++ CSV Parser API http://sourceforge.net/p/cccsvparser/wiki/Home/ */ #include #include #include #include #include "csvparser.h" #include "csvparser.c" #ifdef __cplusplus extern "C" { #endif /* * This routine replaces a substring with another * Preconditions: * s, old and new are valid non-NULL pointers * strlen(old) >= 1 * * s is the string we want to modify * * old is the substring that needs to be replaced * * new is the substring we want to replace the old substring * * returns the new string or Null in case of error. * */ char *replace(const char *s, const char *old, const char *new){ char *ret; char *sr; int i; int count = 0; int newlen = strlen(new); int oldlen = strlen(old); if (newlen != oldlen) { for (i = 0; s[i] != '\0'; ) { if (memcmp(&s[i], old, oldlen) == 0){ count++; i += oldlen; } else{ i++; } } } else{ i = strlen(s); } ret = malloc(i + 1 + count * (newlen - oldlen)); if (ret == NULL) return NULL; sr = ret; while (*s) { if (memcmp(s, old, oldlen) == 0) { memcpy(sr, new, newlen); sr += newlen; s += oldlen; } else{ *sr++ = *s++; } } *sr = '\0'; return ret; } /* This subroutine parses the inputString based on given delimiter, and returns the value of specifield fieldNumber into returnField. The subroutine returns: 0 in case of success 1 in case of negative or zero field Number 2 in case of out of bound field Number 3 in case of bad input string or unknow errors */ int parseCSV(char *returnField, char *inputString, char *delimiter, int fieldNumber) { //check if return field number is less than one if(fieldNumber < 1) return 1; //set delimeter to a default value if it is blank if( strlen(delimiter) == 0 ) delimiter = ","; /* Create a new instance of CsvParser, that parses a given string Returns an instance of CsvParser inputString - String that contains CSV formatted text, to be parsed delimiter - pointer to a single char to be considered as delimiter. If NULL is passed, using default, which is a comma (,) char firstLineIsHeader - Use 0 for start reading rows from the first line. Use non-zero value for starting to read rows from the second line */ // string, delimiter, first_line_is_header? CsvParser *csvparser = CsvParser_new_from_string(inputString, delimiter, 0); CsvRow *row; /* Get the next row from the CSV Returns an instance of CsvRow, or NULL in case of EOF or error csvParser - an instance of CsvParser CsvRow *CsvParser_getRow(CsvParser *csvParser); */ if((row = CsvParser_getRow(csvparser)) ) { int fieldCount = CsvParser_getNumFields(row); //check if the field number is out of bound if(fieldCount < fieldNumber){ CsvParser_destroy_row(row); CsvParser_destroy(csvparser); return 2; }//end if char **rowFields = CsvParser_getFields(row); //Appx starts from 1 instead on 0 fieldNumber = fieldNumber - 1; //escape line feed and carriage return if they exist if(strchr(rowFields[fieldNumber],'\n')||strchr(rowFields[fieldNumber],'\r')){ char *field = NULL; char *str = NULL; //Assume for at most 100 replacement str = malloc ( strlen(rowFields[fieldNumber])+401); strcpy(str,rowFields[fieldNumber]); if(strchr(rowFields[fieldNumber],'\r')!= NULL){ field = replace(str, "\r", "\\\\r\\\\"); strcpy(str, field); } if(strchr(rowFields[fieldNumber],'\n')!=NULL){ field = replace(str, "\n", "\\\\n\\\\"); } //check if we successfully replaced if(field != NULL){ strcpy(returnField, field); free(str); free(field); } else{ free(str); free(field); CsvParser_destroy_row(row); CsvParser_destroy(csvparser); return 3; } } else{ strcpy(returnField, rowFields[fieldNumber]); } //clean up CsvParser_destroy_row(row); free(rowFields); }//end if //failed to read a row from the given string else{ //if the string is not blank, something is wrong if(strlen(inputString) > 0){ CsvParser_destroy_row(row); CsvParser_destroy(csvparser); return 3; }//end if }//end else CsvParser_destroy(csvparser); return 0; }//end parseCSV /* This subroutine parses the inputString based on given delimiter, and set the fieldCount to the number of fields in the string. The subroutine returns: 0 in case of success 3 in case of bad input string or unknow errors */ int CSVFieldCount(int *fieldCount, char *inputString, char *delimiter) { *fieldCount = 0; //set delimeter to a default value if it is blank if(strlen(delimiter) == 0) delimiter = ","; /* Create a new instance of CsvParser, that parses a given string Returns an instance of CsvParser inputString - String that contains CSV formatted text, to be parsed delimiter - pointer to a single char to be considered as delimiter. If NULL is passed, using default, which is a comma (,) char firstLineIsHeader - Use 0 for start reading rows from the first line. Use non-zero value for starting to read rows from the second line */ // string, delimiter, first_line_is_header? CsvParser *csvparser = CsvParser_new_from_string(inputString, delimiter, 0); CsvRow *row; /* Get the next row from the CSV Returns an instance of CsvRow, or NULL in case of EOF or error csvParser - an instance of CsvParser CsvRow *CsvParser_getRow(CsvParser *csvParser); */ if((row = CsvParser_getRow(csvparser)) ) { *fieldCount = CsvParser_getNumFields(row); CsvParser_destroy_row(row); }//end if //failed to read a row from the given string else{ //if the string is not blank, something is wrong if(strlen(inputString) > 0){ CsvParser_destroy_row(row); CsvParser_destroy(csvparser); return 3; }//end if }//end else CsvParser_destroy(csvparser); return 0; }//end CSVFieldCount /* * This subroutine parses the inputString based on given delimiter, and writes it to the given file. * The subroutine returns: * 0 in case of success * 1 in case of invalid filePathName * 3 in case of bad input string or unknow errors * **/ int parseCSVToFile(char *filePathName, char *inputString, char *delimiter, char *errorMsg) { strcpy(errorMsg, "Success!"); //check if return field number is less than one if(strlen(filePathName) == 0){ strcpy(errorMsg, "Blank File Name"); return 1; } //set delimeter to a default value if it is blank if( strlen(delimiter) == 0 ) delimiter = ","; //Create the file for writing FILE *outputFile; if((outputFile = fopen( filePathName, "w"))==NULL){ //printf("Error Msg: %s",strerror(errno)); //strcpy(errorMsg, strerror(errno)); return 1; } /* * Create a new instance of CsvParser, that parses a given string * Returns an instance of CsvParser * * inputString - String that contains CSV formatted text, to be parsed * delimiter - pointer to a single char to be considered as delimiter. If NULL is passed, using default, which is a comma (,) char * firstLineIsHeader - Use 0 for start reading rows from the first line. Use non-zero value for starting to read rows from the second line **/ // string, delimiter, first_line_is_header? CsvParser *csvparser = CsvParser_new_from_string(inputString, delimiter, 0); CsvRow *row; /* * Get the next row from the CSV * Returns an instance of CsvRow, or NULL in case of EOF or error * * csvParser - an instance of CsvParser * * CsvRow *CsvParser_getRow(CsvParser *csvParser); **/ if((row = CsvParser_getRow(csvparser)) ) { int fieldCount = CsvParser_getNumFields(row); char **rowFields = CsvParser_getFields(row); int i = 0; for(i = 0; i < fieldCount; i++){ //escape line feed and carriage return if they exist if(strchr(rowFields[i],'\n')||strchr(rowFields[i],'\r')){ char *field = NULL; char *str = NULL; //Assume for at most 100 replacement str = malloc ( strlen(rowFields[i])+401); strcpy(str,rowFields[i]); if(strchr(rowFields[i],'\r')!= NULL){ // printf("\nfound cr\n"); field = replace(str, "\r", "\\\\r\\\\"); strcpy(str, field); } if(strchr(rowFields[i],'\n')!=NULL){ field = replace(str, "\n", "\\\\n\\\\"); //printf("\nfound line feed\n"); } //check if we successfully replaced if(field != NULL){ fputs(field, outputFile); free(str); free(field); } else{ strcpy(errorMsg, "Error during line escaping"); free(str); free(field); CsvParser_destroy_row(row); CsvParser_destroy(csvparser); fclose(outputFile); return 3; } } else{ fputs(rowFields[i], outputFile); } fputc('\n', outputFile); } //clean up CsvParser_destroy_row(row); free(rowFields); }//end if //failed to read a row from the given string else{ //if the string is not blank, something is wrong if(strlen(inputString) > 0){ strcpy(errorMsg, "Parse Error"); CsvParser_destroy_row(row); CsvParser_destroy(csvparser); fclose(outputFile); return 3; }//end if }//end else CsvParser_destroy(csvparser); fclose(outputFile); //errorMsg = ""; return 0; }//end parseCSVToFile /* * This subroutine parses the inputFile based on given delimiter, and writes it to the given file. * The output file has each field in one line and records are being separated by "$%&*!" string. * * The subroutine returns: * 0 in case of success * 1 in case of invalid filePathName * 3 in case of bad input string or unknow errors * * errorMsg will be Success! in case of successfull termination. * */ int parseCSVFileToFile(char *filePathName, char *inputFile, char *delimiter, char *errorMsg) { strcpy(errorMsg, "Success!"); //check if return field number is less than one if(strlen(filePathName) == 0){ strcpy(errorMsg, "Blank Output File Name"); return 1; } if(strlen(inputFile) == 0){ strcpy(errorMsg, "Blank Input File Name"); return 1; } //set delimiter to a default value if it is blank if( strlen(delimiter) == 0 ) delimiter = ","; //Create the file for writing FILE *outputFile; if((outputFile = fopen( filePathName, "w"))==NULL){ //printf("Error Msg: %s",strerror(errno)); strcpy(errorMsg, "Failed to open Output File"); return 1; } /* * Create a new instance of CsvParser, that parses a given file * Returns an instance of CsvParser * * inputFile - File path name that contains CSV formatted text, to be parsed * delimiter - pointer to a single char to be considered as delimiter. If NULL is passed, using default, which is a comma (,) char * firstLineIsHeader - Use 0 for start reading rows from the first line. Use non-zero value for starting to read rows from the second line **/ // csvFile, delimiter, first_line_is_header? CsvParser *csvparser = CsvParser_new(inputFile, delimiter, 0); CsvRow *row; /* * Get the next row from the CSV * Returns an instance of CsvRow, or NULL in case of EOF or error * * csvParser - an instance of CsvParser * * CsvRow *CsvParser_getRow(CsvParser *csvParser); **/ while ((row = CsvParser_getRow(csvparser)) ) { int fieldCount = CsvParser_getNumFields(row); char **rowFields = CsvParser_getFields(row); int i = 0; //write every field in one line for (i = 0 ; i < fieldCount ; i++) { if(strchr(rowFields[i],'\n')||strchr(rowFields[i],'\r')){ char *field = NULL; char *str = NULL; //Assume for at most 100 replacement str = malloc ( strlen(rowFields[i])+401); strcpy(str,rowFields[i]); if(strchr(rowFields[i],'\r')!= NULL){ field = replace(str, "\r", "\\\\r\\\\"); strcpy(str, field); } if(strchr(rowFields[i],'\n')!=NULL){ field = replace(str, "\n", "\\\\n\\\\"); } // if(field != NULL){ fputs(field, outputFile); free(str); free(field); } else{ strcpy(errorMsg, "Error during line escaping"); free(str); free(field); CsvParser_destroy_row(row); CsvParser_destroy(csvparser); fclose(outputFile); return 3; } } else{ fputs(rowFields[i], outputFile); } fputc('\n', outputFile); } //end of record - add record separator string fputs("$%&*!", outputFile); fputc('\n', outputFile); //clean up CsvParser_destroy_row(row); free(rowFields); }//end while CsvParser_destroy(csvparser); fclose(outputFile); return 0; }//end parseCSVFileToFile /* main function for testing */ int main(int argc, char *argv[]){ int k = 0; char *input="\"12\r\n\r3\",\"456\n\",787,\"99\r99\""; printf("input: %s",input); // for(k=1;k < argc; k++){ // strcat(input, argv[k]); // } int i = 0; int fieldCount = 0; i = CSVFieldCount( &fieldCount, input, ","); if( i == 0){ printf("\nFIELD COUNT %d:", fieldCount); }else{ printf("\nError %d", i); return i; } char field[32000]; int j=0; memset( field, ' ', 32000 ); for( j = 1; j<= fieldCount; j++){ i = parseCSV( field, input, "," , j); if( i == 0){ printf("\n%d:%s",j,field); }else{ printf("\nError %d", i); return i; } } char *fileName = "/tmp/csvParsedOut.txt"; char errorMsg[100]; j=parseCSVToFile( fileName, input, ",", errorMsg); if(j != 0) printf("\nError:%d",j); //char *inputFile = "/usr/local/appx/data/DMA/BIB/Portdata/BIBLGRPC.csv"; //j=parseCSVFileToFile(fileName, inputFile, ",", errorMsg); //printf("\nError: %s",errorMsg); //if(j!=0) printf("\n return code : %d",j); return 0; } #ifdef __cplusplus } #endif