I'm trying to use the Microsoft.Office.Interop.Excel namespace to open an Excel file (XSL or CSV, but sadly not XSLX) and import it into a DataSet. I don't have control over the worksheet or column names, so I need to allow for changes to them.
I've tried the OLEDB method of this in the past, and had a lot of problems with it (buggy, slow, and required prior knowledge of the Excel file's schema), so I want to avoid doing that again. What I'd like to do is use Microsoft.Office.Interop.Excel to import the workbook directly to a DataSet, or loop through the worksheets and load each one into a DataTable.
Believe it or not, I've had trouble finding resources for this. A few searches on StackOverflow have found mostly people trying to do the reverse (DataSet => Excel), or the OLEDB technique. Google hasn't been much more helpful.
public void Load(string filename, Excel.XlFileFormat format = Excel.XlFileFormat.xlCSV)
{
app = new Excel.Application();
book = app.Workbooks.Open(Filename: filename, Format: format);
DataSet ds = new DataSet();
foreach (Excel.Worksheet sheet in book.Sheets)
{
DataTable dt = new DataTable(sheet.Name);
ds.Tables.Add(dt);
//??? Fill dt from sheet
}
this.Data = ds;
}
I'm fine with either importing the entire book at once, or looping through one sheet at a time. Can I do this with Interop.Excel?
This question is related to
c#
excel
interop
dataset
excel-interop
Quiet Late though!.
This method is properly tested and it converts the excel to DataSet
.
public DataSet Dtl()
{
//Instance reference for Excel Application
Microsoft.Office.Interop.Excel.Application objXL = null;
//Workbook refrence
Microsoft.Office.Interop.Excel.Workbook objWB = null;
DataSet ds = new DataSet();
try
{
objXL = new Microsoft.Office.Interop.Excel.Application();
objWB = objXL.Workbooks.Open(@"Book1.xlsx");//Your path to excel file.
foreach (Microsoft.Office.Interop.Excel.Worksheet objSHT in objWB.Worksheets)
{
int rows = objSHT.UsedRange.Rows.Count;
int cols = objSHT.UsedRange.Columns.Count;
DataTable dt = new DataTable();
int noofrow = 1;
//If 1st Row Contains unique Headers for datatable include this part else remove it
//Start
for (int c = 1; c <= cols; c++)
{
string colname = objSHT.Cells[1, c].Text;
dt.Columns.Add(colname);
noofrow = 2;
}
//END
for (int r = noofrow; r <= rows; r++)
{
DataRow dr = dt.NewRow();
for (int c = 1; c <= cols; c++)
{
dr[c - 1] = objSHT.Cells[r, c].Text;
}
dt.Rows.Add(dr);
}
ds.Tables.Add(dt);
}
//Closing workbook
objWB.Close();
//Closing excel application
objXL.Quit();
return ds;
}
catch (Exception ex)
{
objWB.Saved = true;
//Closing work book
objWB.Close();
//Closing excel application
objXL.Quit();
//Response.Write("Illegal permission");
return ds;
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Reflection;
using Microsoft.Office.Interop.Excel;
namespace trg.satmap.portal.ParseAgentSkillMapping
{
class ConvertXLStoDT
{
private StringBuilder errorMessages;
public StringBuilder ErrorMessages
{
get { return errorMessages; }
set { errorMessages = value; }
}
public ConvertXLStoDT()
{
ErrorMessages = new StringBuilder();
}
public System.Data.DataTable XLStoDTusingInterOp(string FilePath)
{
#region Excel important Note.
/*
* Excel creates XLS and XLSX files. These files are hard to read in C# programs.
* They are handled with the Microsoft.Office.Interop.Excel assembly.
* This assembly sometimes creates performance issues. Step-by-step instructions are helpful.
*
* Add the Microsoft.Office.Interop.Excel assembly by going to Project -> Add Reference.
*/
#endregion
Microsoft.Office.Interop.Excel.Application excelApp = null;
Microsoft.Office.Interop.Excel.Workbook workbook = null;
System.Data.DataTable dt = new System.Data.DataTable(); //Creating datatable to read the content of the Sheet in File.
try
{
excelApp = new Microsoft.Office.Interop.Excel.Application(); // Initialize a new Excel reader. Must be integrated with an Excel interface object.
//Opening Excel file(myData.xlsx)
workbook = excelApp.Workbooks.Open(FilePath, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
Microsoft.Office.Interop.Excel.Worksheet ws = (Microsoft.Office.Interop.Excel.Worksheet)workbook.Sheets.get_Item(1);
Microsoft.Office.Interop.Excel.Range excelRange = ws.UsedRange; //gives the used cells in sheet
ws = null; // now No need of this so should expire.
//Reading Excel file.
object[,] valueArray = (object[,])excelRange.get_Value(Microsoft.Office.Interop.Excel.XlRangeValueDataType.xlRangeValueDefault);
excelRange = null; // you don't need to do any more Interop. Now No need of this so should expire.
dt = ProcessObjects(valueArray);
}
catch (Exception ex)
{
ErrorMessages.Append(ex.Message);
}
finally
{
#region Clean Up
if (workbook != null)
{
#region Clean Up Close the workbook and release all the memory.
workbook.Close(false, FilePath, Missing.Value);
System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
#endregion
}
workbook = null;
if (excelApp != null)
{
excelApp.Quit();
}
excelApp = null;
#endregion
}
return (dt);
}
/// <summary>
/// Scan the selected Excel workbook and store the information in the cells
/// for this workbook in an object[,] array. Then, call another method
/// to process the data.
/// </summary>
private void ExcelScanIntenal(Microsoft.Office.Interop.Excel.Workbook workBookIn)
{
//
// Get sheet Count and store the number of sheets.
//
int numSheets = workBookIn.Sheets.Count;
//
// Iterate through the sheets. They are indexed starting at 1.
//
for (int sheetNum = 1; sheetNum < numSheets + 1; sheetNum++)
{
Worksheet sheet = (Worksheet)workBookIn.Sheets[sheetNum];
//
// Take the used range of the sheet. Finally, get an object array of all
// of the cells in the sheet (their values). You can do things with those
// values. See notes about compatibility.
//
Range excelRange = sheet.UsedRange;
object[,] valueArray = (object[,])excelRange.get_Value(XlRangeValueDataType.xlRangeValueDefault);
//
// Do something with the data in the array with a custom method.
//
ProcessObjects(valueArray);
}
}
private System.Data.DataTable ProcessObjects(object[,] valueArray)
{
System.Data.DataTable dt = new System.Data.DataTable();
#region Get the COLUMN names
for (int k = 1; k <= valueArray.GetLength(1); k++)
{
dt.Columns.Add((string)valueArray[1, k]); //add columns to the data table.
}
#endregion
#region Load Excel SHEET DATA into data table
object[] singleDValue = new object[valueArray.GetLength(1)];
//value array first row contains column names. so loop starts from 2 instead of 1
for (int i = 2; i <= valueArray.GetLength(0); i++)
{
for (int j = 0; j < valueArray.GetLength(1); j++)
{
if (valueArray[i, j + 1] != null)
{
singleDValue[j] = valueArray[i, j + 1].ToString();
}
else
{
singleDValue[j] = valueArray[i, j + 1];
}
}
dt.LoadDataRow(singleDValue, System.Data.LoadOption.PreserveChanges);
}
#endregion
return (dt);
}
}
}
Years after everyone's answer, I too want to present how I did it for my project
/// <summary>
/// /Reads an excel file and converts it into dataset with each sheet as each table of the dataset
/// </summary>
/// <param name="filename"></param>
/// <param name="headers">If set to true the first row will be considered as headers</param>
/// <returns></returns>
public DataSet Import(string filename, bool headers = true)
{
var _xl = new Excel.Application();
var wb = _xl.Workbooks.Open(filename);
var sheets = wb.Sheets;
DataSet dataSet = null;
if (sheets != null && sheets.Count != 0)
{
dataSet = new DataSet();
foreach (var item in sheets)
{
var sheet = (Excel.Worksheet)item;
DataTable dt = null;
if (sheet != null)
{
dt = new DataTable();
var ColumnCount = ((Excel.Range)sheet.UsedRange.Rows[1, Type.Missing]).Columns.Count;
var rowCount = ((Excel.Range)sheet.UsedRange.Columns[1, Type.Missing]).Rows.Count;
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[1, j + 1];
var column = new DataColumn(headers ? cell.Value : string.Empty);
dt.Columns.Add(column);
}
for (int i = 0; i < rowCount; i++)
{
var r = dt.NewRow();
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[i + 1 + (headers ? 1 : 0), j + 1];
r[j] = cell.Value;
}
dt.Rows.Add(r);
}
}
dataSet.Tables.Add(dt);
}
}
_xl.Quit();
return dataSet;
}
object[,] valueArray = (object[,])excelRange.get_Value(XlRangeValueDataType.xlRangeValueDefault);
//Get the column names
for (int k = 0; k < valueArray.GetLength(1); )
{
//add columns to the data table.
dt.Columns.Add((string)valueArray[1,++k]);
}
//Load data into data table
object[] singleDValue = new object[valueArray.GetLength(1)];
//value array first row contains column names. so loop starts from 1 instead of 0
for (int i = 1; i < valueArray.GetLength(0); i++)
{
Console.WriteLine(valueArray.GetLength(0) + ":" + valueArray.GetLength(1));
for (int k = 0; k < valueArray.GetLength(1); )
{
singleDValue[k] = valueArray[i+1, ++k];
}
dt.LoadDataRow(singleDValue, System.Data.LoadOption.PreserveChanges);
}
Have you seen this one? From http://www.aspspider.com/resources/Resource510.aspx:
public DataTable Import(String path)
{
Microsoft.Office.Interop.Excel.Application app = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.Workbook workBook = app.Workbooks.Open(path, 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
Microsoft.Office.Interop.Excel.Worksheet workSheet = (Microsoft.Office.Interop.Excel.Worksheet)workBook.ActiveSheet;
int index = 0;
object rowIndex = 2;
DataTable dt = new DataTable();
dt.Columns.Add("FirstName");
dt.Columns.Add("LastName");
dt.Columns.Add("Mobile");
dt.Columns.Add("Landline");
dt.Columns.Add("Email");
dt.Columns.Add("ID");
DataRow row;
while (((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 1]).Value2 != null)
{
row = dt.NewRow();
row[0] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 1]).Value2);
row[1] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 2]).Value2);
row[2] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 3]).Value2);
row[3] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 4]).Value2);
row[4] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 5]).Value2);
index++;
rowIndex = 2 + index;
dt.Rows.Add(row);
}
app.Workbooks.Close();
return dt;
}
Source: Stackoverflow.com