String Parser

String Parser

 String Parser
A string parser is an application that takes a block of text and extracts the string residing between
2 user defined tags. If these tags occur multiple times in the text, then the parser extracts
multiple strings.
For instance assume TestData.txt contains the following data;
<note>
<to>Tove</to>
<to>bonney</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don’t forget me this weekend!</body>
</note>
And my Start and End Tags are <to> and </to>. The StringParser library should find
Tove
bonney
A string parser design should be flexible in that it should be able to use any Start and
End Tag.

Library
The string parser is so versatile that it should be packaged in a library so that it can beeasily integrated into other applications. This approach has the following advantages;
1. Easy to integrate into multiple projects.
2. Easy to maintain. Any changes made to the parser library are made in a
single codebase regardless of the number of applications using it.

  1. It makes it easier to divvy up projects in a team. The library is defined by its
    interface, in this case a header file that describes the functionality itprovides. The header file is a contract describing library functions, howthey are accessed, what is returned, and what is expected. It completelydefines all communication possibilities between the library and its clients.
    For these 3 reasons alone much of the code produced in the world isprovided as a library.

What you get

On the course website, projects section, you will find 3 incomplete projects. The

following shows the solution as it appears in Eclipse. The, includes directory shouldbe in a top level stand alone folder, but we are running up against a limitation ofeclipses’ ‘Workspace’ concept. So it’s located in the 327_proj3_test folder. Note thatFileIO.h and StringParserClass.h as well as constants.h are located there. 

Assignment

This project links 327_proj3_fileIO Library correctly to the 327_proj3_Test
application. Please implement the 327_proj3_StringParser library and link it to the
327_proj3_Test application.
Please fill in all required content in;
• FileIO.cpp
• StringParserClass.cpp
• 327_proj3_test.cpp
Note that 327_proj3_test.cpp requires command line parameters to be passed in
when the program is invoked, there should be 4 of them;
• the first is the filename to read data from
• the second is the first tag to search for
• the third is the second tag to search for
• and the fourth is the output file to write all the found data to

see below for sample run

FileIO.cpp 

#include <iostream>

#include <fstream>

#include “../327_proj3_test/includes/FileIO.h”

#include “../327_proj3_test/includes/constants.h”

using namespace std;

intKP_FileIO::getFileContents(conststd::string &filename, std::string &contents)

{

//TODO fill in

}

intKP_FileIO::writeVectortoFile(conststd::string filename,std::vector<std::string>&myEntryVector)

{

//TODO fill in

} 

StringParserClass.cpp

#include <string>

#include <string.h>

//TODO Fill in 

327_proj3_test.cpp

#include <iostream>

#include <algorithm>

#include <vector>

#include <string>

#include <iterator>

#include <string.h>

#include “../includes/constants.h”

#include “Stringtopointer.h”

#include “../includes/StringParserClass.h”

#include “../includes/FileIO.h”

#include “test.h”

using namespace std;

//two utility functions for looping through vector and printing

//its contents to std::out (the Console)

voidoutputvectorrow(string i){

std::cout<<i<<std::endl;

}

voiddumpVecToStdOut(vector<string>myVector){

for_each(myVector.begin(), myVector.end(), outputvectorrow);

}

inttypicalrun(string &inputfile,string&startTag,string&endTag,string&outputfile){

vector<string>myStrings;

intiret = SUCCESS;

//open file, if not there ask for a different file or exit

std::string filecontents;

iret = KP_FileIO::getFileContents(inputfile.c_str(),filecontents );

if (iret != SUCCESS)

returniret;

//we cant manipulate String contents easily, so make a copy in a dynamically allocated array

//incidently this class shows the principles of RAII, allocated memory is automatically

//deallocated in the destructor

String_to_pointerstp(filecontents);

char* pChar = stp.getPointerToStringBegginning();

//create an instance of the stringparser

KP_StringParserClass::StringParserClassmyClass;

//what tags are we searching for?

iret = myClass.setTags(startTag.c_str(),endTag.c_str());

if (iret != SUCCESS)

returniret;

//pull out the data

iret = myClass.getDataBetweenTags(pChar, myStrings);

if (iret != SUCCESS)

returniret;

//serialize to file

iret = KP_FileIO::writeVectortoFile(outputfile,myStrings);

if (iret != SUCCESS)

returniret;

//dump to standard output

//dumpVecToStdOut(myStrings);

}

int main(intargc, char *argv[]){

vector<string>myStrings;

intiret = SUCCESS;

//TODO verify that correct number of params are entered

//TODO otherwise return output WRONG_NUMB_ARGS and return FAIL_WRONG_NUMBER_ARGS

//harvest all the user info

stringinputfile = argv[1];

stringstartTag = argv[2];

stringendTag = argv[3];

stringoutputfile = argv[4];

//lets test the inputs

testfileIO(inputfile, outputfile);

//now lets test the string parsing

testStringParser(startTag,endTag);

cout<<“Score is:”<<getScore()<<endl;

typicalrun(inputfile,startTag,endTag,outputfile);

} 

Stringtopointer.cpp

#include <string.h>

#include “Stringtopointer.h”

constint SPACE_FOR_TERMINATING_NULL_CHAR = 1;

//make a dynamic copy of myString with pChar pointing to start

String_to_pointer::String_to_pointer(conststd::string &myString): pChar(0),aString(myString) {

}

//clean up any allocated memory

String_to_pointer::~String_to_pointer() {

clear();

}

//very dangerous to expose private data!

char* String_to_pointer::getPointerToStringBegginning(){

resetPointer();

returnpChar;

}

voidString_to_pointer::clear(){

if(pChar)

delete [] pChar;

pChar = 0;

}

voidString_to_pointer::resetPointer(){

clear();

//how many chars (+1 for the null!)

intlen = strlen(aString.c_str())+SPACE_FOR_TERMINATING_NULL_CHAR;

if(len>SPACE_FOR_TERMINATING_NULL_CHAR){

pChar = new char[len];

strncpy(pChar,aString.c_str(),len);

*(pChar+len)=0;

}

}

voidString_to_pointer::changeString(conststd::string &myString){

aString = myString;

clear();

} 

Stringtopointer.h

#ifndef STRINGTOPOINTER_H_

#define STRINGTOPOINTER_H_

#include <string>

/**

* Helper class to convert from string to dynamically allocated array

* memory is automatically deleted in the destructor

*

*/

classString_to_pointer {

public:

//make a dynamic copy of myString with pChar pointing to start

String_to_pointer(conststd::string &myString);

//calls clear

virtual ~String_to_pointer();

//very dangerous to expose private data!

char* getPointerToStringBegginning();

voidchangeString(conststd::string &myString);

private:

void clear();   //deallocates memory

voidresetPointer();

char* pChar;

std::stringaString;

};

#endif /* STRINGTOPOINTER_H_ */ 

test.cpp

 #include “test.h”

#include <iostream>

#include <fstream>

#include <sstream>

#include <string>

#include <time.h>

#include “../includes/constants.h”

#include “Stringtopointer.h”

#include “../includes/StringParserClass.h”

#include “../includes/FileIO.h”

using namespace std;

constint ZERO_POINTS = 0;  //for retests

constint ONE_POINTS = 1;

constint TWO_POINTS = 2;

constint THREE_POINTS = 3;

constint FIVE_POINTS = 5;

constint TEN_POINTS = 10;

constint FIFTEEN_POINTS = 15;

//tracks how many points you will get out of 100

//yeah, yeah its a global

inttotal_points =0;

//this is a template class, its mostly here as a helper for me

//the T and U are generic params, I can substitute any type for them

//they must be comparable with ==, templates are extremely hard to get right BTW

template<typename T, typename U>

bool EXPECT_EQ(T expectedVal, U actualVal,stringtestnumb = “”, intpts=ONE_POINTS){

bool bout = (expectedVal == actualVal);

if (bout){

cout<<“SUCCESS “+testnumb;

total_points+=pts;

}

else

cout<<“FAIL “+ testnumb<< ” Expected:”<<expectedVal<<”  got:”<<actualVal;

cout<<endl;

return bout;

}

conststd::string TEST_DATA_NON_EXISTANT         = “./data/notthere/nonexistantfile”;

conststd::string TEST_DATA_SMALL_OUT                  = “./output/testdata_small.out”;

conststd::string TEST_DATA_SMALL_OUT_VALUE      = “Asmalllizard”;

voidtestfileIO(conststd::string &userEnteredInputFile, conststd::string &userEnteredOutputFile){

string contents;

string filename;

vector<string> mv;

mv.push_back(“A”);

mv.push_back(“small”);

mv.push_back(“lizard”);

//test read from user supplied input file

EXPECT_EQ (SUCCESS, KP_FileIO::getFileContents(userEnteredInputFile, contents),”25″,FIVE_POINTS);

//TODO probably should confirm that it also has right contents

//test read and write to a bogus file

filename = TEST_DATA_NON_EXISTANT;

EXPECT_EQ (COULD_NOT_OPEN_FILE_TO_READ, KP_FileIO::getFileContents(filename, contents),”1″,FIVE_POINTS);

EXPECT_EQ (COULD_NOT_OPEN_FILE_TO_WRITE, KP_FileIO::writeVectortoFile(filename, mv),”2″,FIVE_POINTS);

//test write real

EXPECT_EQ (SUCCESS, KP_FileIO::writeVectortoFile(userEnteredOutputFile, mv),”3″,FIVE_POINTS);

//test read real

contents.clear();

EXPECT_EQ (SUCCESS, KP_FileIO::getFileContents(userEnteredOutputFile, contents),”4″,FIVE_POINTS);

EXPECT_EQ (true, TEST_DATA_SMALL_OUT_VALUE == contents,”5″,FIVE_POINTS);

}

conststd::string TEST_STRING                    = “<li class=\”nav-twilight\”><a href=\”/mylittlepony/en_US/ponies/twilight-sparkle.cfm\”.<to>Twilight Sparkle</to><to>Pinkie Pie</to></li>”;

conststd::string TEST_STRING_NO_END_TAG   = “<li class=\”nav-twilight\”><a href=\”/mylittlepony/en_US/ponies/twilight-sparkle.cfm\”.<to>Twilight Sparkle<to></li>”;

conststd::string TEST_STRING_NO_START_TAG       = “<li class=\”nav-twilight\”><a href=\”/mylittlepony/en_US/ponies/twilight-sparkle.cfm\”.</to>Twilight Sparkle</to></li>”;

conststd::string TS_FIRST_NONCE                 = “Twilight Sparkle”;

conststd::string TS_SECOND_NONCE                = “Pinkie Pie”;

conststd::string BOGUS_TAG = “——-“;

voidtestStringParser(conststd::string &startTag,conststd::string &endTag){

vector<string> mv;

KP_StringParserClass::StringParserClasssp;

//verify correct null behaviour

char* pChar =0;

EXPECT_EQ (ERROR_TAGS_NULL, sp.getDataBetweenTags(pChar, mv),”6″,FIVE_POINTS);

//verify correct null tag behaviour

EXPECT_EQ (ERROR_TAGS_NULL, sp.setTags(0, 0),”7″,FIVE_POINTS);

//set tags

String_to_pointerstp_start(startTag);

String_to_pointerstp_end(endTag);

EXPECT_EQ (SUCCESS, sp.setTags(stp_start.getPointerToStringBegginning(), stp_end.getPointerToStringBegginning()),”8″,FIVE_POINTS);

//now that tags are set verify that it fails if there is no data

EXPECT_EQ (ERROR_DATA_NULL, sp.getDataBetweenTags(pChar, mv),”9″,FIVE_POINTS);

//we cant manipulate String contents easily, so make a copy in a dynamically allocated array

//incidently this class shows the principles of RAII, allocated memory is automatically

//deallocated in the destructor

String_to_pointerstp(TEST_STRING);

pChar = stp.getPointerToStringBegginning();

EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),”10″,FIVE_POINTS);

EXPECT_EQ (2, mv.size(),”11″,FIVE_POINTS);

EXPECT_EQ (TS_FIRST_NONCE, mv[0],”12″,TWO_POINTS);

EXPECT_EQ (TS_SECOND_NONCE, mv[1],”13″,THREE_POINTS);

//make sure they clear the vector and not append to it

pChar = stp.getPointerToStringBegginning();

EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),”14″,ZERO_POINTS); //already tested

EXPECT_EQ (2, mv.size(),”15″,FIVE_POINTS);                                                //if one then it was cleared if 2 not

//try with no end tag

stp.changeString(TEST_STRING_NO_END_TAG);

pChar = stp.getPointerToStringBegginning();

EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),”16″,FIVE_POINTS);

EXPECT_EQ (0, mv.size(),”17″,FIVE_POINTS);

//try with no start tag

stp.changeString(TEST_STRING_NO_START_TAG);

pChar = stp.getPointerToStringBegginning();

EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),”18″,FIVE_POINTS);

EXPECT_EQ (0, mv.size(),”19″,FIVE_POINTS);

//make sure they made a deep copy of the tags

//the following 2 lines should not affect start and end tags in sp

stp_start.changeString(BOGUS_TAG);

stp_end.changeString(BOGUS_TAG);

//try with no start tag

stp.changeString(TEST_STRING);

pChar = stp.getPointerToStringBegginning();

mv.clear();

EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),”20″,ZERO_POINTS); //already tested

EXPECT_EQ (2, mv.size(),”21”,TEN_POINTS);

}

intgetScore(){

returntotal_points;

} 

test.h

#ifndef TEST_H_

#define TEST_H_

#include <string>

voidtestfileIO(conststd::string &userEnteredInputFile, conststd::string &userEnteredOutputFile);

voidtestStringParser(conststd::string &startTag,conststd::string &endTag);

intgetScore( );

#endif /* TEST_H_ */ 

Solution

327_proj3_test.cpp

#include <iostream>

#include <algorithm>

#include <vector>

#include <string>

#include <iterator>

#include <string.h>

#include “../includes/constants.h”

#include “Stringtopointer.h”

#include “../includes/StringParserClass.h”

#include “../includes/FileIO.h”

#include “test.h”

using namespace std;

//two utility functions for looping through vector and printing

//its contents to std::out (the Console)

voidoutputvectorrow(string i){

std::cout<<i<<std::endl;

}

voiddumpVecToStdOut(vector<string>myVector){

for_each(myVector.begin(), myVector.end(), outputvectorrow);

}

inttypicalrun(string &inputfile,string&startTag,string&endTag,string&outputfile){

vector<string>myStrings;

intiret = SUCCESS;

//open file, if not there ask for a different file or exit

std::string filecontents;

iret = KP_FileIO::getFileContents(inputfile.c_str(),filecontents );

if (iret != SUCCESS)

returniret;

//we cant manipulate String contents easily, so make a copy in a dynamically allocated array

//incidently this class shows the principles of RAII, allocated memory is automatically

//deallocated in the destructor

String_to_pointerstp(filecontents);

char* pChar = stp.getPointerToStringBegginning();

//create an instance of the stringparser

KP_StringParserClass::StringParserClassmyClass;

//what tags are we searching for?

iret = myClass.setTags(startTag.c_str(),endTag.c_str());

if (iret != SUCCESS)

returniret;

//pull out the data

iret = myClass.getDataBetweenTags(pChar, myStrings);

if (iret != SUCCESS)

returniret;

//serialize to file

iret = KP_FileIO::writeVectortoFile(outputfile,myStrings);

if (iret != SUCCESS)

returniret;

//dump to standard output

//dumpVecToStdOut(myStrings);

}

int main(intargc, char *argv[]){

vector<string>myStrings;

intiret = SUCCESS;

//TODO verify that correct number of params are entered

//TODO otherwise return output WRONG_NUMB_ARGS and return FAIL_WRONG_NUMBER_ARGS

if (argc< 5)

{

return FAIL_WRONG_NUMBER_ARGS;

}

//harvest all the user info

stringinputfile = argv[1];

stringstartTag = argv[2];

stringendTag = argv[3];

stringoutputfile = argv[4];

//lets test the inputs

testfileIO(inputfile, outputfile);

//now lets test the string parsing

testStringParser(startTag,endTag);

cout<<“Score is:”<<getScore()<<endl;

typicalrun(inputfile,startTag,endTag,outputfile);

} 

FileIO.cpp

 #include <iostream>

#include <fstream>

#include <sstream>

#include “../327_proj3_test-master/includes/FileIO.h”

#include “../327_proj3_test-master/includes/constants.h”

using namespace std;

intKP_FileIO::getFileContents(conststd::string &filename, std::string &contents)

{

ifstreaminFile;

inFile.open(filename);//open the input file

if (!inFile.is_open())

{

return COULD_NOT_OPEN_FILE_TO_READ;

}

stringlineStr;

while (getline(inFile, lineStr))

{

contents += lineStr;

}

inFile.close();

return SUCCESS;

}

intKP_FileIO::writeVectortoFile(conststd::string filename,std::vector<std::string>&myEntryVector)

{

ofstream out(filename);

if (!out.is_open())

{

return COULD_NOT_OPEN_FILE_TO_WRITE;

}

for (std::vector<std::string>::iterator it = myEntryVector.begin(); it != myEntryVector.end(); it++)

{

out<< *it<<endl;

if (out.bad())

{

return COULD_NOT_OPEN_FILE_TO_WRITE;

}

}

out.close();

return SUCCESS;

} 

StringParserClass.cpp

#include <string>

#include <string.h>

#include “../327_proj3_test-master/includes/StringParserClass.h”

#include “../327_proj3_test-master/includes/constants.h”

KP_StringParserClass::StringParserClass::StringParserClass(void)

: pStartTag(0), pEndTag(0), areTagsSet(false)

{}

KP_StringParserClass::StringParserClass::~StringParserClass(void)

{

cleanup();

}

intKP_StringParserClass::StringParserClass::setTags(const char* pStart, const char* pEnd)

{

if (pStart == 0 || pEnd == 0)

{

return ERROR_TAGS_NULL;

}

cleanup();

intlen = strlen(pStart) + 1;

pStartTag = new char[len];

strncpy_s(pStartTag, len, pStart, len);

*(pStartTag + len) = 0;

len = strlen(pEnd) + 1;

pEndTag = new char[len];

strncpy_s(pEndTag, len, pEnd, len);

*(pEndTag + len) = 0;

return SUCCESS;

}

int KP_StringParserClass::StringParserClass::getDataBetweenTags(char* pDataToSearchThru, std::vector<std::string>&myVector)

{

myVector.clear();

if (pStartTag == 0 || pStartTag == 0)

{

return ERROR_TAGS_NULL;

}

if (pDataToSearchThru == 0)

{

return ERROR_DATA_NULL;

}

char* cursor = pDataToSearchThru;

char* start = 0;

char* end = 0;

char* endOfInput(pDataToSearchThru);

endOfInput += strlen(pDataToSearchThru);

while(findTag(cursor, start, end) == SUCCESS)

{

char* startCopy(start);

startCopy += strlen(pStartTag);

intlen = end – startCopy;

char* buffer = new char[len + 1];

strncpy_s(buffer, len + 1, startCopy, len);

*(buffer + len) = 0;

std::string s(buffer);

delete[]buffer;

myVector.push_back(s);

cursor = end + strlen(pEndTag);

if (cursor >= endOfInput)

{

break;

}

}

return SUCCESS;

}

voidKP_StringParserClass::StringParserClass::cleanup()

{

if (pStartTag != 0)

{

delete[]pStartTag;

pStartTag = 0;

}

if (pEndTag != 0)

{

delete[]pEndTag;

pEndTag = 0;

}

}

intKP_StringParserClass::StringParserClass::findTag(char *pTagToLookFor, char *&pStart, char *&pEnd)

{

if (pStartTag == 0 || pStartTag == 0)

{

return ERROR_TAGS_NULL;

}

pStart = strstr(pTagToLookFor, pStartTag);

pEnd = strstr(pTagToLookFor, pEndTag);

if (pStart == 0 || pEnd == 0)

{

return FAIL;

}

return SUCCESS;

}