所以我在.NET Core Web API中创建了一个Web API。我添加了逻辑到API创建的所有代码,在swaggerUI上我测试了API,它显示错误:无法访问已释放的对象。对象名称:'ReferenceReadStream'。这是代码(C#)
using Aspose.Pdf;
using Aspose.Pdf.Text;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using System;
using System.Data;
using System.IO;
using System.Linq;
namespace TryingOutAPI.Controllers
{
[Route("api/[controller]")]
[ApiController]
public class ValuesController : ControllerBase
{
[HttpPost]
public IActionResult ProcessPdfTables(IFormFile pdfFile)
{
try
{
if (pdfFile == null || pdfFile.Length == 0)
{
return BadRequest("No PDF file uploaded.");
}
// Load the PDF document from the uploaded file
Aspose.Pdf.Document pdfDocument;
using (var stream = pdfFile.OpenReadStream())
{
pdfDocument = new Aspose.Pdf.Document(stream);
}
// Extract the pages with the tables
DataTable[] tables = ExtractTablesFromPdf(pdfDocument, new int[] { 2, 3 });
// Access the first table from the list of extracted tables
DataTable table1 = tables[0];
// Access the second table from the list of extracted tables
DataTable table2 = tables[1];
// Specify the correct column names
string[] columnsToExtract = { "Peak Name", "RT", "Area", "% Area", "RT Ratio", "Height" };
// Select the desired columns from table 1
DataTable table1Subset = SelectColumnsFromTable(table1, columnsToExtract);
table1Subset = RemoveRowsWithNullValues(table1Subset);
// Select the desired columns from table 2
DataTable table2Subset = SelectColumnsFromTable(table2, columnsToExtract);
table2Subset = RemoveRowsWithNullValues(table2Subset);
// Return the subsets of tables without null rows as JSON
return Ok(new { Table1 = table1Subset, Table2 = table2Subset });
}
catch (Exception ex)
{
// Handle any exceptions and return an error response
return StatusCode(StatusCodes.Status500InternalServerError, ex.Message);
}
}
private DataTable[] ExtractTablesFromPdf(Aspose.Pdf.Document pdfDocument, int[] pages)
{
DataTable[] tables = new DataTable[pages.Length];
for (int i = 0; i < pages.Length; i++)
{
int pageNumber = pages[i];
Page pdfPage = pdfDocument.Pages[pageNumber];
// Extract text from the page
TextAbsorber textAbsorber = new TextAbsorber();
pdfPage.Accept(textAbsorber);
string pageContent = textAbsorber.Text;
tables[i] = ConvertTextToDataTable(pageContent);
}
return tables;
}
private DataTable SelectColumnsFromTable(DataTable table, string[] columnsToExtract)
{
DataTable subset = new DataTable();
foreach (string column in columnsToExtract)
{
DataColumn existingColumn = table.Columns.Cast<DataColumn>()
.FirstOrDefault(c => c.ColumnName == column);
if (existingColumn != null)
{
subset.Columns.Add(existingColumn.ColumnName);
}
}
foreach (DataRow row in table.Rows)
{
DataRow newRow = subset.NewRow();
foreach (DataColumn column in subset.Columns)
{
newRow[column.ColumnName] = row[column.ColumnName];
}
subset.Rows.Add(newRow);
}
return subset;
}
private DataTable RemoveRowsWithNullValues(DataTable table)
{
DataTable filteredTable = table.Clone();
foreach (DataRow row in table.Rows)
{
bool hasNullValues = row.ItemArray.Any(x => x is DBNull || string.IsNullOrWhiteSpace(x.ToString()));
if (!hasNullValues)
{
filteredTable.ImportRow(row);
}
}
return filteredTable;
}
private DataTable ConvertTextToDataTable(string text)
{
DataTable dataTable = new DataTable();
// Split the text into lines
string[] lines = text.Split('\n');
// Extract column names from the first line
string[] columnNames = lines[0].Split('\t');
// Add columns to the DataTable
foreach (string columnName in columnNames)
{
dataTable.Columns.Add(columnName.Trim());
}
// Extract data rows from subsequent lines
for (int i = 1; i < lines.Length; i++)
{
string[] rowValues = lines[i].Split('\t');
// Create a new DataRow
DataRow dataRow = dataTable.NewRow();
// Set values for each column in the row
for (int j = 0; j < columnNames.Length; j++)
{
dataRow[j] = rowValues[j].Trim();
}
// Add the row to the DataTable
dataTable.Rows.Add(dataRow);
}
return dataTable;
}
}
}
字符串
我尝试上传工作的文件,但它没有显示提取的数据
2条答案
按热度按时间2wnc66cl1#
ProcessPdfTables
中的using语句将在使用stream
之前处理它。如果你使用的是C#版本8或更高版本,你可以像这样声明stream
:字符串
使用此声明,
stream
将不会被释放,直到它超出范围。jk9hmnmh2#
它可能是你的using语句,你有一个对象引用到流,你是在你的using语句和结束处置。
尝试删除using语句,并在处理完pdf文档后释放流。