In order to be able to split a string using several alternative delimiters of one or more characters I used the following function:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <boost/algorithm/string.hpp> | |
#include <boost/algorithm/string/join.hpp> | |
#include <boost/algorithm/string/regex.hpp> | |
#include <boost/regex.hpp> | |
std::vector<std::string> StringUtils::split( | |
const std::string & str, | |
const std::vector<std::string> & delimiters) { | |
std::vector<std::string> tokens; | |
boost::algorithm::split_regex( | |
tokens, | |
str, | |
boost::regex( | |
boost::join(escapeDelimiters(delimiters), "|")) | |
); | |
return tokens; | |
} |
Yesterday I managed to use some C++11 features to get the same functionality without using the boost library. This is the resulting code which uses C++11 regex and lambdas:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <regex> | |
#include <sstream> | |
std::vector<std::string> StringUtils::split( | |
const std::string & str, | |
const std::vector<std::string> & delimiters) { | |
std::regex rgx(join(escapeStrings(delimiters), "|")); | |
std::sregex_token_iterator | |
first{begin(str), end(str), rgx, -1}, | |
last; | |
return{first, last}; | |
} | |
std::vector<std::string> StringUtils::split(const std::string & str, | |
const std::string & delimiter) { | |
std::vector<std::string> delimiters = {delimiter}; | |
return split(str, delimiters); | |
} | |
std::string StringUtils::join( | |
const std::vector<std::string> & tokens, | |
const std::string & delimiter) { | |
std::stringstream stream; | |
stream << tokens.front(); | |
std::for_each( | |
begin(tokens) + 1, | |
end(tokens), | |
[&](const std::string &elem) { | |
stream << delimiter << elem;} | |
); | |
return stream.str(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <gmock/gmock.h> | |
#include <vector> | |
#include "../code/StringUtils.h" | |
using namespace ::testing; | |
TEST(StringUtils, CanEscapeOneString) { | |
EXPECT_THAT(StringUtils::escapeString("*"), Eq("\\*")); | |
EXPECT_THAT(StringUtils::escapeString("\\"), Eq("\\\\")); | |
EXPECT_THAT(StringUtils::escapeString("||"), Eq("\\|\\|")); | |
EXPECT_THAT(StringUtils::escapeString(","), Eq(",")); | |
} | |
TEST(StringUtils, CanEscapeSeveralStringsEvenThoseUsedInRegularExpressions) { | |
EXPECT_THAT(StringUtils::escapeStrings({",", "*", "||", ";", "\\"}), | |
ElementsAre(",", "\\*", "\\|\\|", ";", "\\\\")); | |
} | |
TEST(StringUtils, CanSplitUsingMultipleTokens) { | |
std::vector<std::string> delimiters = {",", "*", "??"}; | |
EXPECT_THAT(StringUtils::split("1*2??3,4", delimiters), | |
ElementsAre("1", "2", "3", "4")); | |
} | |
TEST(StringUtils, CanTellIfAGivenStringRepresentsAnInteger) { | |
EXPECT_TRUE(StringUtils::isAnInteger("1")); | |
EXPECT_FALSE(StringUtils::isAnInteger("15 7 8")); | |
EXPECT_FALSE(StringUtils::isAnInteger("1.5")); | |
EXPECT_FALSE(StringUtils::isAnInteger("!15")); | |
} | |
TEST(StringUtils, CanJoinAVectorOfStrings) { | |
EXPECT_THAT(StringUtils::join({"1", "2", "3"}, "|"), Eq("1|2|3")); | |
EXPECT_THAT(StringUtils::join({"1", "2", "3"}, " "), Eq("1 2 3")); | |
EXPECT_THAT(StringUtils::join({"1", "2", "3"}, ", "), Eq("1, 2, 3")); | |
EXPECT_THAT(StringUtils::join({"1", "2", "3"}, ""), Eq("123")); | |
} | |
TEST(StringUtils, CanSplitStringsAndJoinThemAgain) { | |
std::string str = "1|2|3"; | |
std::string delimiter = "|"; | |
EXPECT_THAT(StringUtils::join(StringUtils::split(str, delimiter), delimiter), | |
Eq(str)); | |
} |
No comments:
Post a Comment