阅读丑陋的csv与吨逗号csvhelper

j91ykkif  于 2023-06-19  发布在  其他
关注(0)|答案(3)|浏览(138)

我有一个csv文件,看起来像这样:

,"CompanyNane",,,,,,,,,,,,,,,,,,,,,,,,,,,,Issue,,,,2021-02-27,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,Inspection Sheet  ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,<Sub>,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/200,,,,page,,,,,
,Delivery No.,,,,,,,,SDK2302278101,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,W No.,,,,,,,,AFC1210-22SL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Manufacturer,,,,,,,,ManufacturerCompanyName,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Diameter,,,,,,,,6in,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Poly-type/Conductivity,,,,,,,,4H/n-type,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,face/Orientation,,,,,,,,Si/(0001) 4deg off,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Surface Finish,,,,,,,,CMP,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,"Coordinate data (X, Y) (mm)",,,,,,,,,,,,,Thickness (um),,,,,,,,Carrier Conc. (cm-3),,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",70.0,,),,,,,14.67,,,,,,,,6829484552804830,,,,,,,,,,,,,,,,,,14.67
,,,,,,,(,0,,", ",60.0,,),,,,,15.18,,,,,,,,7269218633966170,,,,,,,,,,,,,,,,,,15.18
,,,,,,,(,0,,", ",45.0,,),,,,,15.35,,,,,,,,7101511576788490,,,,,,,,,,,,,,,,,,15.35
,,,,,,,(,0,,", ",30.0,,),,,,,15.32,,,,,,,,6874133261805120,,,,,,,,,,,,,,,,,,15.32
,,,,,,,(,0,,", ",15.0,,),,,,,15.2,,,,,,,,6701359170793300,,,,,,,,,,,,,,,,,,15.2
,,,,,,,(,0,,", ",0.0,,),,,,,15.13,,,,,,,,6638519222094540,,,,,,,,,,,,,,,,,,15.13
,,,,,,,(,0,,", ",-15.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-30.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-45.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-60.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-67.0,,),,,,,15.04,,,,,,,,7104368264503040,,,,,,,,,,,,,,,,,,15.04
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,-70,,", ",0.0,,),,,,,14.54,,,,,,,,6828523559220620,,,,,,,,,,,,,,,,,,14.54
,,,,,,,(,-60,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,-45,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,-30,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,-15,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,15,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,30,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,45,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,60,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,70,,", ",0.0,,),,,,,14.65,,,,,,,,6866040706547180,,,,,,,,,,,,,,,,,,14.65
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,Ave.,,,,,,,,,,,,,15.15,,,,,,,,6950000000000000,,,,,,,,,,,,,,,,,,
,,,,,Uniformity(sigma/Ave.),,,,,,,,,,,,,1.6,,,,,%,,,3.1,,,,,,%,,,,,,,,,,,,
,,,,,MAX,,,,,,,,,,,,,15.35,,,,,,,,7270000000000000,,,,,,,,,,,,,,,,,,
,,,,,MIN,,,,,,,,,,,,,14.54,,,,,,,,6640000000000000,,,,,,,,,,,,,,,,,,

老实说,我甚至不知道从哪里开始,我没有控制这个csv文件,因为它是一个来自其他地方的传入文件,肯定有办法以某种方式取代这些逗号?我知道这个数据是非常丑陋的,顶线,我需要的问题与日期,然后开始在线,我需要所有的数据,从这一点向前。
线路:

,<Sub>,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/200,,,,page,,,,,
,Delivery No.,,,,,,,,SDK2302278101,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,W No.,,,,,,,,AFC1210-22SL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Manufacturer,,,,,,,,ManufacturerCompanyName,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Diameter,,,,,,,,6in,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Poly-type/Conductivity,,,,,,,,4H/n-type,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,face/Orientation,,,,,,,,Si/(0001) 4deg off,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,Surface Finish,,,,,,,,CMP,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

让他们的数据水平,通常我可以做到这一点,只是不与所有这些逗号,它使他们的索引都不同!
线路:

,,,,,"Coordinate data (X, Y) (mm)",,,,,,,,,,,,,Thickness (um),,,,,,,,Carrier Conc. (cm-3),,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",70.0,,),,,,,14.67,,,,,,,,6829484552804830,,,,,,,,,,,,,,,,,,14.67
,,,,,,,(,0,,", ",60.0,,),,,,,15.18,,,,,,,,7269218633966170,,,,,,,,,,,,,,,,,,15.18
,,,,,,,(,0,,", ",45.0,,),,,,,15.35,,,,,,,,7101511576788490,,,,,,,,,,,,,,,,,,15.35
,,,,,,,(,0,,", ",30.0,,),,,,,15.32,,,,,,,,6874133261805120,,,,,,,,,,,,,,,,,,15.32
,,,,,,,(,0,,", ",15.0,,),,,,,15.2,,,,,,,,6701359170793300,,,,,,,,,,,,,,,,,,15.2
,,,,,,,(,0,,", ",0.0,,),,,,,15.13,,,,,,,,6638519222094540,,,,,,,,,,,,,,,,,,15.13
,,,,,,,(,0,,", ",-15.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-30.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-45.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-60.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,0,,", ",-67.0,,),,,,,15.04,,,,,,,,7104368264503040,,,,,,,,,,,,,,,,,,15.04
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,(,-70,,", ",0.0,,),,,,,14.54,,,,,,,,6828523559220620,,,,,,,,,,,,,,,,,,14.54
,,,,,,,(,-60,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,-45,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,-30,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,-15,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,15,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,30,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,45,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,60,,", ",0.0,,),,,,,-,,,,,,,,-,,,,,,,,,,,,,,,,,,
,,,,,,,(,70,,", ",0.0,,),,,,,14.65,,,,,,,,6866040706547180,,,,,,,,,,,,,,,,,,14.65
,,,,,,,(,,,", ",,,),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,Ave.,,,,,,,,,,,,,15.15,,,,,,,,6950000000000000,,,,,,,,,,,,,,,,,,
,,,,,Uniformity(sigma/Ave.),,,,,,,,,,,,,1.6,,,,,%,,,3.1,,,,,,%,,,,,,,,,,,,
,,,,,MAX,,,,,,,,,,,,,15.35,,,,,,,,7270000000000000,,,,,,,,,,,,,,,,,,
,,,,,MIN,,,,,,,,,,,,,14.54,,,,,,,,6640000000000000,,,,,,,,,,,,,,,,,,

这些行就像一个传统的csv文件,头部在顶部,数据在下面,但同样,逗号使这变得真实的棘手。任何帮助将不胜感激!只是想把这些数据列成一个列表。
下面是我的当前代码:

class CsvHelperTester
    {
        static void Main(string[] args)
        {

            var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
            {
                HasHeaderRecord = false,
                HeaderValidated = null,
                IgnoreBlankLines = true,
                MissingFieldFound = null,
                AllowComments = true,
                Comment = ';',
                Delimiter = ",",
                TrimOptions = TrimOptions.Trim,
                
            };
            using (var streamReader = new StreamReader("C:filestuff"))
            {
                using (var csv = new CsvReader(streamReader, csvConfig))
                {

                    var records = new List<GeneralData>();

                    while (csv.Read())
                    {
                        if (csv.GetField(1) == "")
                        {
                            csv.Read();
                        }
                        records.Add(csv.GetRecord<GeneralData>());
                    }

                    var WRecords = csv.GetRecords<WData>().ToList();
                    var columns = records;

                }
            }
        }
        public class GeneralData
        {
            [Index(1)]
            public string Type { get; set; }
            [Index(2)]
            public string Value { get; set; }
        }
        public class WData
        {
            public string Coordinate_Data { get; set; }
        }

    }

我试着做了2个列表,1个用于水平数据,1个用于传统的csv数据,但是没有成功

ctehm74n

ctehm74n1#

要用单个逗号替换所有重复的逗号,请使用Regex.Replace
以字符串形式获取CSV文件,然后用途:

string result = Regex.Replace(csvString, ",+", ",");

最后,将结果字符串转换回CSV

zf2sa74q

zf2sa74q2#

这有点难看,但我认为你可以使用寻找已知停止点沿着使用索引来查找它们的列的数据类的组合。

void Main()
{
    var config = new CsvConfiguration(CultureInfo.InvariantCulture)
    {
        HasHeaderRecord = false
    };
    
    using (var reader = new StreamReader("C:filestuff"))
    using (var csv = new CsvReader(reader, config))
    {
        csv.Read();
        var company = csv.GetField(1);
        var issue = csv.GetField(29);
        var issueDate = csv.GetField(33);
        
        var topData = new List<TopData>();
        var coordinateData = new List<CoordinateData>();
        var summaryData = new List<SummaryData>();
        
        // Skip to <Sub>
        while (csv.Read() && csv.GetField(1) != "<Sub>") { }
        
        // Read Horizontal data
        while (csv.Read() && csv.GetField(5) != "Coordinate data (X, Y) (mm)")
        {
            if (csv.GetField(1) != string.Empty)
                topData.Add(csv.GetRecord<TopData>());
        }

        // Read Coordinate data
        while (csv.Read() && csv.GetField(5) != "Ave.")
        {
            if (csv.GetField(8) != string.Empty)
                coordinateData.Add(csv.GetRecord<CoordinateData>());
        }
        
        // Read Summary data
        summaryData.Add(csv.GetRecord<SummaryData>());
        while (csv.Read())
        {
            summaryData.Add(csv.GetRecord<SummaryData>());
        }
    }
}

public class TopData
{
    [Index(1)]
    public string Name { get; set; }
    [Index(9)]
    public string Value { get; set; }
}

public class SummaryData
{
    [Index(5)]
    public string Name { get; set; }
    [Index(18)]
    public string Value1 { get; set; }
    [Index(26)]
    public string Value2 { get; set; }
}

public class CoordinateData
{
    [Index(8)]
    public string X { get; set; }
    [Index(11)]
    public string Y { get; set; }
    [Index(18)]
    public string Mm { get; set; }
    [Index(26)]
    public string Thickness { get; set; }
}
k97glaaz

k97glaaz3#

这是我的解决方案。你只有一个输入样本,所以我的代码可能需要一些调整才能在整个输入上工作。

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;


namespace ConsoleApplication2
{
    class Program
    {

        const string FILENAME = @"c:\temp\test.csv";
        public enum State
        {
            NONE,
            GET_DELIVERY,
            GET_PART
        }
        static void Main(string[] args)
        {

            StreamReader reader = new StreamReader(FILENAME);
            string line = "";
            List<Delivery> deliveries = new List<Delivery>();
            Delivery delivery = null;
            Part part = null;
            State state = State.NONE;
            while((line = reader.ReadLine()) != null)
            {
                line = line.Trim(new char[] { ' ', ',' });
                if(line.Length > 0)
                {
                    if(line.StartsWith("<Sub>"))
                    {
                        state = State.GET_DELIVERY;
                        delivery = new Delivery();
                        deliveries.Add(delivery);
                        continue;
                    }
                    if (line.StartsWith("\"Coordinate data"))
                    {
                        //do nothing
                        continue;
                    }    
                    switch (state)
                    {
                        case State.GET_DELIVERY:
                            if (line == "(,,,\", \",,,)")
                            {
                                state = State.GET_PART;
                                part = new Part();
                                if (delivery.parts == null) delivery.parts = new List<Part>();
                                delivery.parts.Add(part);
                                continue;
                            }
                            delivery.AddProperty(line);
                            break;
                        case State.GET_PART:
                            if (line == "(,,,\", \",,,)")
                            {
                                state = State.GET_DELIVERY;
                                continue;
                            }
                            part.AddPoint(line);
                            break;
                    }
                }

            }

        }
    }
    public class Delivery
    {
        public string deliverNo { get; set; }
        public string wNo { get; set; }
        public string manufacturer { get; set; }
        public string diameter { get; set; }
        public string pTypeConductivity { get; set; }
        public string faceOrientation { get; set; }
        public string surfaceFinish { get; set; }
        public decimal average { get; set; }
        public decimal uniformitySigma { get; set; }
        public decimal uniformityAve { get; set; }
        public decimal max { get; set; }
        public decimal min { get; set; }
        public List<Part> parts { get; set; }

        public void AddProperty(string line)
        {
            string[] splitArray = line.Split(new char[] {','});
            switch(splitArray[0])
            {
                case "Delivery No.":
                    deliverNo = splitArray[8];
                    break;
                case "W No.":
                    wNo = splitArray[8];
                    break;
                case "Manufacturer":
                    manufacturer = splitArray[8];
                    break;
                case "Diameter":
                    diameter = splitArray[8];
                    break;
                case "Poly-type/Conductivity":
                    pTypeConductivity = splitArray[8];
                    break;
                case "face/Orientation":
                    faceOrientation = splitArray[8];
                    break;
                case "Surface Finish":
                    surfaceFinish = splitArray[8];
                    break;
                case "Ave.":
                    average = decimal.Parse(splitArray[13]);
                    break;
                case "Uniformity(sigma/Ave.)":
                    uniformityAve = decimal.Parse(splitArray[13]);
                    uniformitySigma = decimal.Parse(splitArray[21]);
                    break;
                case "MAX":
                    max = decimal.Parse(splitArray[13]);
                    break;
                case "MIN":
                    min = decimal.Parse(splitArray[13]);
                    break;
            }
        }
     }
    public class Part
    {
        public string part { get; set; }
        public decimal thinkness { get; set; }
        public List<Tuple<decimal, decimal, decimal>> points { get; set; }
        decimal lastThinkness = 0;

        public void AddPoint(string line)
        {
            if (points == null) points = new List<Tuple<decimal, decimal, decimal>>();
            string[] splitArray = line.Split(new char[] { ',' });
            decimal x = decimal.Parse(splitArray[1]);
            decimal y = decimal.Parse(splitArray[5]);
            decimal thickness = (splitArray[12] == "-") ? lastThinkness : decimal.Parse(splitArray[12]);
            lastThinkness = thickness;
            Tuple<decimal, decimal, decimal> point = Tuple.Create(x,y,thickness);

            points.Add(point);
        }
    }

}

相关问题