G
Guest
Hello. I am trying to split a file with 334,386 lines into seperate files of
50,000 each.
This is the code i am running:
Dim intFragmentRawIndex As Integer
Dim swRawDataFile As StreamWriter
Dim intNewRawIndex As Integer
Dim strNewRawDataFileName As String
Dim intFragmentCallCount As Integer = 0
Dim strHeaderLine As String
Dim blnFileClosed As Boolean = False
strHeaderLine = colRawDataFile(1)
CreateRawDataFragment(intParentRawIndex, intNewRawIndex,
strNewRawDataFileName)
Dim myFileStream As New
System.IO.FileStream(strTempDirectoryPath & strNewRawDataFileName, _
FileMode.OpenOrCreate, FileAccess.Write, FileShare.None)
swRawDataFile = New StreamWriter(myFileStream)
swRawDataFile.WriteLine(strHeaderLine)
For i As Integer = 2 To colRawDataFile.Count
If intFragmentCallCount = 50000 Then
'Clear Stream Writer Buffer
swRawDataFile.Flush()
'Close file
swRawDataFile.Close()
'Set Call Count against raw data file
SetFragmentCallCount(intNewRawIndex, intFragmentCallCount)
'Reset call count
intFragmentCallCount = 0
'If not on final line of raw data file....
If i <> colRawDataFile.Count Then
CreateRawDataFragment(intParentRawIndex,
intNewRawIndex, strNewRawDataFileName)
myFileStream = New
System.IO.FileStream(strTempDirectoryPath & strNewRawDataFileName, _
FileMode.OpenOrCreate, FileAccess.Write, FileShare.None)
swRawDataFile = New StreamWriter(myFileStream)
swRawDataFile.WriteLine(strHeaderLine)
Else
blnFileClosed = True
End If
End If
swRawDataFile.WriteLine(colRawDataFile(i))
intFragmentCallCount += 1
Next
If Not blnFileClosed Then
'Close last fragment
swRawDataFile.Close()
'Set call count against last fragment
SetFragmentCallCount(intNewRawIndex, intFragmentCallCount)
End If
The first file creates in 3 mins.
The second file creates in 11 minutes.
The third file creates in 18 minutes.
I am still waiting for the forth file to create.
I am writing the same number of records to each file, so why would the time
it takes to write the file of the same size take longer each time?
I thought that calling the flush method of the stream would maintain
performance but this does not seem to be the case! What am i doing wrong?
50,000 each.
This is the code i am running:
Dim intFragmentRawIndex As Integer
Dim swRawDataFile As StreamWriter
Dim intNewRawIndex As Integer
Dim strNewRawDataFileName As String
Dim intFragmentCallCount As Integer = 0
Dim strHeaderLine As String
Dim blnFileClosed As Boolean = False
strHeaderLine = colRawDataFile(1)
CreateRawDataFragment(intParentRawIndex, intNewRawIndex,
strNewRawDataFileName)
Dim myFileStream As New
System.IO.FileStream(strTempDirectoryPath & strNewRawDataFileName, _
FileMode.OpenOrCreate, FileAccess.Write, FileShare.None)
swRawDataFile = New StreamWriter(myFileStream)
swRawDataFile.WriteLine(strHeaderLine)
For i As Integer = 2 To colRawDataFile.Count
If intFragmentCallCount = 50000 Then
'Clear Stream Writer Buffer
swRawDataFile.Flush()
'Close file
swRawDataFile.Close()
'Set Call Count against raw data file
SetFragmentCallCount(intNewRawIndex, intFragmentCallCount)
'Reset call count
intFragmentCallCount = 0
'If not on final line of raw data file....
If i <> colRawDataFile.Count Then
CreateRawDataFragment(intParentRawIndex,
intNewRawIndex, strNewRawDataFileName)
myFileStream = New
System.IO.FileStream(strTempDirectoryPath & strNewRawDataFileName, _
FileMode.OpenOrCreate, FileAccess.Write, FileShare.None)
swRawDataFile = New StreamWriter(myFileStream)
swRawDataFile.WriteLine(strHeaderLine)
Else
blnFileClosed = True
End If
End If
swRawDataFile.WriteLine(colRawDataFile(i))
intFragmentCallCount += 1
Next
If Not blnFileClosed Then
'Close last fragment
swRawDataFile.Close()
'Set call count against last fragment
SetFragmentCallCount(intNewRawIndex, intFragmentCallCount)
End If
The first file creates in 3 mins.
The second file creates in 11 minutes.
The third file creates in 18 minutes.
I am still waiting for the forth file to create.
I am writing the same number of records to each file, so why would the time
it takes to write the file of the same size take longer each time?
I thought that calling the flush method of the stream would maintain
performance but this does not seem to be the case! What am i doing wrong?