When all else fails, there's always a good ol' fashioned state machine:
public const string Input = "BAHAMAS, BAHRAIN, BANGLADESH, BONAIRE SINT EUSTATIUS, BOSNIA HERZEGOVINA";
public static string DoCapitalize (string input)
{
TheState theState = TheState.Beginning;
CharacterState characterState;
var buffer = new StringBuilder (input.Length);
foreach (char c in input)
{
characterState = GetCharacterState(c);
switch (theState)
{
case TheState.Beginning:
if (characterState == CharacterState.Letter)
{
buffer.Append(char.ToUpper(c));
theState = TheState.InWord;
}
break;
case TheState.InWord:
if (characterState == CharacterState.Letter)
{
buffer.Append(char.ToLower(c));
}
else if (characterState == CharacterState.Comma)
{
buffer.Append(c);
theState = TheState.BetweenNames;
}
else if (characterState == CharacterState.Space)
{
buffer.Append(c);
theState = TheState.BetweenWords;
}
break;
case TheState.BetweenNames:
if (characterState == CharacterState.Letter)
{
buffer.Append(char.ToUpper(c));
theState = TheState.InWord;
}
break;
case TheState.BetweenWords:
if (characterState == CharacterState.Letter)
{
buffer.Append(char.ToUpper(c));
theState = TheState.InWord;
}
break;
}
}
return buffer.ToString ();
static CharacterState GetCharacterState(char c)
{
CharacterState characterState;
if (c == ' ')
{
characterState = CharacterState.Space;
}
else if (c == ',')
{
characterState = CharacterState.Comma;
}
else if (char.IsLetter(c))
{
characterState = CharacterState.Letter;
}
else
{
characterState = CharacterState.Other;
}
return characterState;
}
}
Note that the GetCharacterState function is a local function (it gets it out of the main program flow, but keeps it with the main function.
If you want to have a list of "noise works" (of, the, etc.), you could easily implement it. The code is not simple, but it is pretty easy to read, and it is O(N).
Test output from the stated input:
Bahamas,Bahrain,Bangladesh,Bonaire Sint Eustatius,Bosnia Herzegovina
y.Joinyou should trystring.Join.