Hi Trint,
This is a function that I've been meaning to write for some time so you've
provided a useful prod.
It's surprisingly tricky to work out yet easy once done. You were right to
ask for help.
I've given you the whole module below in case you want to see how it works
on the test cases. But if your strings are 'normal' you can simply plug the
SplitQuoted function into a module and off you go.
Regards,
Fergus
<code>
Public Module StringUtils
Public Sub Test
TestSplitQuoted ("A,B,C,D")
TestSplitQuoted ("""A,B,C,D""")
TestSplitQuoted (",,")
TestSplitQuoted (""","",""")
TestSplitQuoted (",,"","",a,"","",,")
TestSplitQuoted (",B,""C,D,")
TestSplitQuoted ("A,"" B , C "",""D,,""")
TestSplitQuoted ("A, ""B,C"" ,D")
TestSplitQuoted ("A, ""B,C"" ,D")
TestSplitQuoted ("A, """"B,C"""" ,D")
TestSplitQuoted ("A"" , ""B"" , ""C"" , ""D")
TestSplitQuoted ("""A , ""B"" , ""C"" , D""")
TestSplitQuoted ("""A"" , ""B"" , ""C"" , ""D""")
TestSplitQuoted ("""A, ""B,"" ""C,"" D""")
End Sub
'==================================================================
Public Sub TestSplitQuoted (sStr As String)
Dim S As String = "[" & sStr & "] " & S
S = S & vbCrLf & " "
Dim I As Integer
Dim aParts As String() = SplitQuoted (sStr)
For I = 0 To aParts.Length - 1
S = S & "<" & aParts(I).ToString & "> "
Next
Console.WriteLine (S & vbCrLf)
End Sub
'==================================================================
'This function splits a string in exactly the same way
'as Split() except that delimiters (commas) enclosed by
'the specified quoting characters (double-quote) are not
'treated as delimiters.
'
'Like Split(), this function removes <only> characters
'which occur as delimeters. All quoting characters and
'leading and traiing spaces are retained.
'
'For normal strings, this function will behave entirely
'as expected. For example (using ' as the quoting char)
' [Cat, 'Apple, Orange', 'Spade, Trowel', Dog]
'will result in
' [Cat] ['Apple, Orange'] ['Spade, Trowel'] [Dog]
'
'It is not necessary for a quoting character to occur at
'the start of a substring. If a quoting character is
'embedded, it will still act to quote embedded commas.
'
'For example:
' [A, B', 'C, D] will result in [A] [B' , 'C] and [D].
'The [', '] is embedded within the B and C and the substring
'thus formed runs from the B to the C inclusive.
'
'Note, therefore, that the following examples:
' [A', ''B,C'' ,'D] result [A', ''B,C'' ,'D]
' ['A, 'B', 'C', D'] result ['A, 'B', 'C', D']
'give single strings as output. This is correct behaviour.
'On the other hand,
' ['A, 'B,' 'C,' D'] will result in ['A, 'B] [' 'C] [' D']
'because the comma after the B is not enclosed within quotes.
'
'If there is a closing quote missing, it is assumed that
'it would have been at the end of the entire string.
'
'cComma and cQuote are named thus for convenience.
'They can be any character. They can even be the
'same character but then no splitting will occur.
'
'==================================================================
Public Function SplitQuoted (sStr As String, _
Optional cComma As Char = ","c, _
Optional cQuote As Char = """"c _
) As String()
'If there are no quotes, do it the easy way.
If sStr.IndexOf (cQuote) < 0 Then _
Return sStr.Split (cComma)
Dim alParts As New ArrayList
Dim StartPos As Integer = 0 'Look for commas hereafter .
Do
Dim PosOfComma As Integer = sStr.IndexOf (cComma, StartPos)
If PosOfComma < 0 Then
'Add the remainder of the string (or an
'empty string if there's a comma at the end)
alParts.Add (sStr)
Exit Do
End If
Dim PosOfQuote As Integer = sStr.IndexOf (cQuote, StartPos)
If PosOfQuote < 0 Then _
PosOfQuote = sStr.Length
If PosOfComma < PosOfQuote Then
'The comma comes before any quote.
'Extract the substring.
alParts.Add (sStr.Substring (0, PosOfComma))
'Remove the substring and comma.
sStr = sStr.Substring (PosOfComma + 1)
StartPos = 0
Else
'The comma comes after a quote.
'Find the closing quote and loop around to
'look for the next comma after it.
'Move to the closing quote.
PosOfQuote = sStr.IndexOf (cQuote, PosOfQuote + 1)
If PosOfQuote < 0 Then _
PosOfQuote = sStr.Length - 1
'Look for the next comma after the closing quote.
StartPos = PosOfQuote + 1
End If
Loop
'Turn the ArrayList back into an array of strings.
Dim O As Object = alParts.ToArray (GetType (String))
Return DirectCast (O, String())
End Function
End Module
</code>