import text

  • Thread starter Thread starter wcoetzer
  • Start date Start date
W

wcoetzer

I was given this code, which extracts lines of text from a range of selected text files. At the time I thought I only wanted one line to be extracted per file (more precisely the name in one column and the rest of the file inanother column). Then I discovered that there are multiple lines per file.

I just need the code tweaked so that there is a loop, and every time a new > is encountered a new row is created in the spreadsheet, and the letters following that > are inserted next to the name (which appears before the >).

Thanks!

An example text file is:
Shrew_1043_16Sa_G07_19.ab1 GATTAGAGGCMTGCTKGCCAGTGACATAAGTTAAACGGCCGCGGTATCCTGACCGTGCAAAGGTAGCWTAATCATTTGTTCCTTAATTAGGGACTAGTATGAATGGCCACACGAGGGTTTAACTGTCTCTTATTTCTAATCAGTGAAATTGACCTTCCCGTGAAGAGGCGGGAATAATAAAATAAGACGAGAAGACCCTATGGAGCTTAAATTATATAACCCAATAATATATATAATCACGTACCTAACTAGGGATTAAAAACTATTTATCTGGGGTATAGATTTCGGTTGGGGTGACCTCGGAGTATAAAATAACCTCCGAGCGATTTAGACCAAGATTAACAGATCGAAGTAATAAATCAAATATTGATCCAATCCAATTGATCAACGAAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCAAGAGTCCCTATCGACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCAATGGTGCAGCAGCTATTAATGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGATYCCGAAAMCSGGA
Shrew_1043_16Sa_G07_20.ab1
GATTAGAGGCMTGCTKGCCAGTGACATAAGTTAAACGGCCGCGGTATCCTGACCGTGCAAAGGTAGCWTAATCATTTGTTCCTTAATTAGGGACTAGTATGAATGGCCACACGAGGGTTTAACTGTCTCTTATTTCTAATCAGTGAAATTGACCTTCCCGTGAAGAGGCGGGAATAATAAAATAAGACGAGAAGACCCTATGGAGCTTAAATTATATAACCCAATAATATATATAATCACGTACCTAACTAGGGATTAAAAACTATTTATCTGGGGTATAGATTTCGGTTGGGGTGACCTCGGAGTATAAAATAACCTCCGAGCGATTTAGACCAAGATTAACAGATCGAAGTAATAAATCAAATATTGATCCAATCCAATTGATCAACGAAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCAAGAGTCCCTATCGACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCAATGGTGCAGCAGCTATTAATGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGATYCCGAAAMCSGGA



Public RowCT As Long
Public Mysheet As Worksheet
Sub Control()
ActiveWorkbook.Sheets.Add
ActiveSheet.Name = "Fasta Files " & Format(Now(), "YYYY MM DD HH_NN_SS")
Set Mysheet = ActiveSheet
RowCT = 0
Call GetMyFiles("C:\WINDOWS\", "*.fasta")

End Sub

Private Sub GetMyFiles(sPath As String, sFileType As String)

' Requires reference to Microsoft Office 11.0 Object Library.
'Dim fDialog As Office.FileDialog
Dim varFile As Variant
' Clear listbox contents.
' Me.FileList.RowSource = ""
' Set up the File Dialog.
'Set fDialog = Application.FileDialog(msoFileDialogFilePicker)
With Application.FileDialog(msoFileDialogFilePicker)

.AllowMultiSelect = True
.InitialFileName = sPath
.InitialView = msoFileDialogViewDetails
' Set the title of the dialog box.
.Title = "Please select one or more fasta files"
' Clear out the current filters, and add our own.
.Filters.Clear
.Filters.Add "", sFileType
' Show the dialog box. If the .Show method returns True, the
' user picked at least one file. If the .Show method returns
' False, the user clicked Cancel.
If .Show = True Then
'Loop through each file selected and add it to our list box.
For Each varFile In .SelectedItems
Call Processfile(varFile)
Next
Else
MsgBox "You clicked Cancel in the file dialog box."
End If
End With
End Sub
Sub Processfile(sPathAndFileName As Variant)
Dim fno As Integer
Dim tstring As String
Dim sMainstr As String
Dim col As Long
fno = FreeFile
RowCT = RowCT + 1
col = 1
Cells(RowCT, col) = sPathAndFileName
col = col + 1
sMainstr = ""
Open sPathAndFileName For Input As #fno

Line Input #fno, tstring
If InStr(1, tstring, ">") > 0 Then
tstring = Left(tstring, InStr(1, tstring, ".") - 1)
Cells(RowCT, col) = tstring
Cells(RowCT, col).TextToColumns Destination:=Cells(RowCT, col), DataType:=xlDelimited, TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=False, Tab:=False, Semicolon:=False, Comma:=False, Space:=False,Other:=True, OtherChar:="_"
col = Cells(RowCT, col).End(xlToRight).Column
End If

While Not EOF(fno)
Line Input #fno, tstring
sMainstr = sMainstr & tstring
Wend

col = col + 1

Cells(RowCT, col) = sMainstr
Close #fno

End Sub
 
Sorry, I meant that the name is on the line starting with > and the next line is the text that needs to be associated with that name.
 
Back
Top