Processing Office Open XML (xlsx) files by EPPlus.
There are some way to read Excel file - by Microsoft.Jet.OLEDB.4.0 & Microsoft.ACE.OLEDB - Cайтік з web-сервісом, спеціфічним membership-провайдером та даними Excel. and by native Excel - Шаблон кода для роботи з MS Excel.. But in this page I describe modern and best way - by EPPlus.
- Write XLSX
- Read XLSX as untyped values
- Read XLSX as typed data
- Read XLSX as SharedStringTablePart
- Calculate formula
- Use EPplus in Mutlithreading parsers
- Universal loader from EPlus
Write XLSX
There are two direction to use EPPlus. First direction is to process data in Datatable structure, and eventually save data to XLSX format. In this case you can be read firstly CSV by Processing CSV files by CsvHelper to Datatable. Below you can see this pattern.
7: Imports OfficeOpenXml
...
543: Sub SaveDtToExcel(DTF As DataTable, ExcelFullName As FileInfo)
544: Dim Excel As ExcelPackage = New ExcelPackage()
545: Dim WorkSheet As ExcelWorksheet = Excel.Workbook.Worksheets.Add("Sheet 1")
546: WorkSheet.Cells("A1").LoadFromDataTable(DTF, True)
547: Excel.SaveAs(ExcelFullName)
548: End Sub
...
669: Private Sub BGW5_DoWork(sender As Object, e As DoWorkEventArgs) Handles BGW5.DoWork
670: Dim DT1 As DataTable = ReadCSV(IO.Path.Combine(SelectedTempPath, WestlawSelectedListViewFileName))
671: Dim DV2 As DataView = DT1.DefaultView
672: DV2.Sort() = e.Argument & " asc"
673: Dim DT2 As DataTable = DV2.ToTable
674: '
675: Dim AttachmentFileNameInfo As New FileInfo(IO.Path.Combine(SelectedTempPath, WestlawSelectedListViewFileName & ".xlsx"))
676: SaveDtToExcel(DT2, AttachmentFileNameInfo)
677: WestLawAttachmentFileName = AttachmentFileNameInfo.FullName
678: End Sub
Read XLSX as untyped values.
In this form I read Excel file, show something fields and processing another fields.
This is template of this form.
1: Imports System.IO
2: Imports OfficeOpenXml
3: Imports Microsoft.VisualBasic
4:
5: Public Class SearchAddress
6: Public Property FileName As String
7: Dim FI As FileInfo
8: Dim WorkBook As ExcelWorkbook
9: Dim WorkSheet As ExcelWorksheet
10: Dim ColCount As Integer
11: Dim RowCount As Integer
12: Dim CurRow As Integer
13:
14:
15: Private Sub SearchAddress_Load(sender As Object, e As EventArgs) Handles Me.Load
...
22: FI = New FileInfo(FileName)
23: Dim Package = New ExcelPackage(FI)
24: WorkBook = Package.Workbook
25: WorkSheet = WorkBook.Worksheets.First()
26: 'For i As Integer = 1 To RowCount
27: ' Debug.Print(WorkSheet.Cells(i, 1).Value.ToString())
28: 'Next
29: ColCount = WorkSheet.Dimension.End.Column
30: RowCount = WorkSheet.Dimension.End.Row
31: CurRow = 2
32: ShowRowInfo(2)
...
38: End Sub
...
40: Sub ShowRowInfo(RowNumber As Integer)
41: CurRowsLabel.Text = GetCurRowLabel(CurRow)
...
48: AddressTextBox.SelectText(StreetTextBox.Text & "," & CityTextBox.Text & "," & StateComboBox.SelectedValue & "," & ZipTextBox.Text)
49: End Sub
...
51: Dim RepeatRows As List(Of String)
52: Function GetCurRowLabel(RowNumber As Integer) As String
53: RepeatRows = New List(Of String)
54: Dim Str1 As New Text.StringBuilder
55: For i As Integer = 1 To RowCount
56: If WorkSheet.Cells(i, 1).Value = WorkSheet.Cells(RowNumber, 1).Value Then
57: RepeatRows.Add(i)
58: Str1.Append(i & ",")
59: End If
60: Next
61: Return Microsoft.VisualBasic.Left(Str1.ToString, Str1.Length - 1) & " / " & RowCount
62: End Function
Read XLSX as typed data.
Pay attention how I can access to untyped data - only as object - WorkSheet.Cells(RowNumber, ColNumber).Value, if you have access to typed data, you need sophisticated converter object value to named typed fields.
In this case you need firstly define class ExcelData with definition of each fields of you Excel file. Then create extension below and call it:
82: Dim XSLData = WorkSheet.Tables.First().ConvertTableToObjects(of ExcelData)()
1: Imports OfficeOpenXml
2: Imports OfficeOpenXml.Table
3: Imports System.Runtime.CompilerServices
4:
5: Module ExcelExtensions
6:
7: <Extension()>
8: Public Function ConvertTableToObjects(Of T As New)(ByVal Table As ExcelTable) As IEnumerable(Of T)
9: Dim ConvertDateTime = New Func(Of Double, DateTime)(Function(ExcelDate)
10: If ExcelDate < 1 Then Throw New ArgumentException("Excel dates cannot be smaller than 0.")
11: Dim DateOfReference = New DateTime(1900, 1, 1)
12: If ExcelDate > 60.0R Then
13: ExcelDate = ExcelDate - 2
14: Else
15: ExcelDate = ExcelDate - 1
16: End If
17: Return DateOfReference.AddDays(ExcelDate)
18: End Function)
19: Dim Tprops = (New T()).GetType().GetProperties().ToList()
20: Dim Groups = Table.WorkSheet.Cells(Table.Address.Start.Row, Table.Address.Start.Column, Table.Address.End.Row, Table.Address.End.Column).GroupBy(Function(Cell) Cell.Start.Row).ToList()
21: Dim Types = Groups.Skip(1).First().Select(Function(Rcell) Rcell.Value.GetType()).ToList()
22: Dim Colnames = Groups.First().Select(Function(Hcell, idx) New With {.Name = Hcell.Value.ToString(), .index = idx}).Where(Function(O) Tprops.Select(Function(P) P.Name).Contains(O.Name)).ToList()
23: Dim Rowvalues = Groups.Skip(1).Select(Function(Cg) Cg.Select(Function(c) c.Value).ToList())
24: Dim Collection = Rowvalues.Select(Function(Row)
25: Dim Tnew = New T()
26: Colnames.ForEach(Function(ColName)
27: Dim Val = Row(ColName.index)
28: Dim Type = Types(ColName.index)
29: Dim Prop = Tprops.First(Function(p) p.Name = ColName.Name)
30: If Type = GetType(Double) Then
31: Dim UnboxedVal = CDbl(Val)
32: If Prop.PropertyType = GetType(Int32) Then
33: Prop.SetValue(Tnew, CInt(UnboxedVal))
34: ElseIf Prop.PropertyType = GetType(Double) Then
35: Prop.SetValue(Tnew, UnboxedVal)
36: ElseIf Prop.PropertyType = GetType(DateTime) Then
37: Prop.SetValue(Tnew, ConvertDateTime(UnboxedVal))
38: Else
39: Throw New NotImplementedException(String.Format("Type '{0}' not implemented yet!", Prop.PropertyType.Name))
40: End If
41: Else
42: Prop.SetValue(Tnew, Val)
43: End If
44: End Function)
45: Return Tnew
46: End Function)
47: Return Collection
48: End Function
49: End Module
Last function is not a my classic VB-style of programming, I publishing this style of VB.NET only for fun.
Read XLSX as SharedStringTablePart
100: Public Async Function SeedCities(Stream As Stream) As Task Implements IDataService.SeedCities
...
106: If Stream Is Nothing Then
107: Throw New WebFaultException(Of String)("MISSING_PARAMETERS", HttpStatusCode.BadRequest)
108: End If
109:
110: Dim Parser As MultipartFormDataParser = MultipartFormDataParser.Parse(Stream)
111:
112: Dim ExcelFile As FilePart = Parser.Files.Find(Function(X) X.Name.Equals("City"))
113:
114: If ExcelFile IsNot Nothing Then
115: Dim Root As String = HostingEnvironment.MapPath("~")
116: Dim Location As String = Guid.NewGuid().ToString().ToUpperInvariant()
117: LocalStorage.WriteFileToTempDataStore(Root, Location, ExcelFile.Data)
118:
119: Dim FilePath As String = Path.Combine(LocalStorage.GetTemporaryStorageDirectory(Root), Location)
...
134: Using Connection As New SqlConnection(Data.Constants.ConnectionString)
135: Connection.Open()
136:
137: Using Transaction As SqlTransaction = Connection.BeginTransaction()
138: Try
139: If String.IsNullOrWhiteSpace(FilePath) Then
140: Throw New WebFaultException(Of String)("MISSING_FILE_PATH", HttpStatusCode.BadRequest)
141: End If
142:
143: Using Document As SpreadsheetDocument = SpreadsheetDocument.Open(FilePath, False)
144: Dim WorkbookPart As WorkbookPart = Document.WorkbookPart
145: Dim Sheet As Sheet = WorkbookPart.Workbook.Sheets.ElementAt(0)
146: Dim SheetData As SheetData = CType(WorkbookPart.GetPartById(Sheet.Id), WorksheetPart).Worksheet.Elements(Of SheetData)().ElementAt(0)
147:
148: Dim Rows As IEnumerable(Of Row) = SheetData.Elements(Of Row)()
149: Dim RowsCount As Integer = Rows.Count()
150:
151: Dim StringTable As SharedStringTablePart = WorkbookPart.GetPartsOfType(Of SharedStringTablePart).FirstOrDefault()
152:
153: For Counter As Integer = 2 To RowsCount - 1
154: Dim Cells As IEnumerable(Of Cell) = Rows.ElementAt(Counter).Elements(Of Cell)()
155:
156: Dim City1HCell As Cell = Cells.ElementAt(0)
157: Dim City1HName As String = GetCellValue(City1HCell, StringTable)
158:
159: Dim City1ECell As Cell = Cells.ElementAt(2)
160: Dim City1EName As String = GetCellValue(City1ECell, StringTable)
161:
162: Dim CountryIDCell As Cell = Cells.ElementAt(3)
163: Dim CountryID As String = GetCellValue(CountryIDCell, StringTable)
164:
165: Dim ECity As City = City.GetByName(Connection, Transaction, City1EName)
166: Dim HCity As City = City.GetByName(Connection, Transaction, City1HName)
167: If ECity Is Nothing AndAlso HCity Is Nothing Then
168: Dim City As New City()
169: City.EName = City1EName
170: City.HName = City1HName
171: City.ToCountry = Guid.Parse(CountryID)
172: City.InsertedOn = DateTime.UtcNow
173: City.Add(Connection, Transaction)
174: End If
175:
176: Dim City2HCell As Cell = Cells.ElementAt(1)
177: Dim City2HName As String = GetCellValue(City2HCell, StringTable)
178:
179: Dim City2ECell As Cell = Cells.ElementAt(3)
180: Dim City2EName As String = GetCellValue(City2ECell, StringTable)
181:
182: ECity = City.GetByName(Connection, Transaction, City2EName)
183: HCity = City.GetByName(Connection, Transaction, City2HName)
184: If ECity Is Nothing AndAlso HCity Is Nothing Then
185: Dim City As New City()
186: City.EName = City2EName
187: City.HName = City2HName
188: City.InsertedOn = DateTime.UtcNow
189: City.Add(Connection, Transaction)
190: End If
191: Next
192: End Using
193:
194: Transaction.Commit()
...
Calculate formula
This is simplest idea to store formula in string.
279: Dim Formulas() As String = {
280: "J{0}",
281: "If(Or(TEXT(C{0};""ddd"") = ""sat"";TEXT(C{0};""ddd"")=""sun"");0;If((G{0}>H{0})*(G{0}>=0.75);0;If(G{0}>H{0};(0.75-G{0})*24;If(G{0}<0.25;MIN(8-(0.25-G{0})*24;Q{0});MIN(8;Q{0};(0.75-G{0})*24)))))",
282: "If(Or(TEXT(C{0};""ddd"")=""sat"";TEXT(C{0};""ddd"")=""sun"");If(Or(G{0}>=0.75;H{0}<=0.25);0;If(G{0}>=0.25;MIN(Q{0};(0.75-G{0})*24);MIN(12;(H{0}-0.25)*24)));MIN((Q{0}-V{0});If(V{0}<8;(0.75-G{0})*24-V{0};(0.75-G{0})*24-V{0}-I{0}/60)))",
283: "Q{0}-V{0}-W{0}",
284: "If(Q{0}>=9.5;1;0)",
285: "If((N{0}=""Union"")*(Q{0}>10);1;0)",
286: "If(T{0}<>""";1;0)",
287: "If(K{0}>0;Q{0};0)",
288: "If(L{0}=""";0;Q{0})",
289: "VALUE(If(RIGHT(M{0};2)=""AB"";LEFT(M{0};LEN(M{0})-2);If(RIGHT(M{0};3)=""VMS"";LEFT(M{0};LEN(M{0})-3);0)))"
290: }
291:
292: For I As Integer = 2 To RowCount
293: WorkSheet.Cells(I, 17).Formula = String.Format(Formulas(0), I)
294: WorkSheet.Cells(I, 17).Calculate()
295: WorkSheet.Cells(I, 22).Formula = String.Format(Formulas(1), I)
296: WorkSheet.Cells(I, 22).Calculate()
297: WorkSheet.Cells(I, 23).Formula = String.Format(Formulas(2), I)
298: WorkSheet.Cells(I, 23).Calculate()
299: WorkSheet.Cells(I, 24).Formula = String.Format(Formulas(3), I)
300: WorkSheet.Cells(I, 24).Calculate()
301: WorkSheet.Cells(I, 25).Formula = String.Format(Formulas(4), I)
302: WorkSheet.Cells(I, 25).Calculate()
303: WorkSheet.Cells(I, 26).Formula = String.Format(Formulas(5), I)
304: WorkSheet.Cells(I, 26).Calculate()
305: WorkSheet.Cells(I, 27).Formula = String.Format(Formulas(6), I)
306: WorkSheet.Cells(I, 27).Calculate()
307: WorkSheet.Cells(I, 28).Formula = String.Format(Formulas(7), I)
308: WorkSheet.Cells(I, 28).Calculate()
309: WorkSheet.Cells(I, 29).Formula = String.Format(Formulas(8), I)
310: WorkSheet.Cells(I, 29).Calculate()
311: WorkSheet.Cells(I, 30).Formula = String.Format(Formulas(9), I)
312: WorkSheet.Cells(I, 30).Calculate()
313: Next
Use EPplus in Mutlithreading parsers
More details examples to use EPplus (extension function and check if needed column is exist) please see in page Multithreading Parsers with Parallel, CsQuery, Newtonsoft.Json, OfficeOpenXml and IAsyncResult/AsyncCallback..
Universal loader from EPlus
More example to use EPlus Generic VB.NET function for loading ExcelFile to DataBase (EF6 CodeFirst) with Reflection and avoiding Linq-to-Entity Linq.Expressions (use Linq-to-Object IEnumerable)
|