我有一个字节流输入(大约100mb)。我需要将字节流分析成一个大数据对象,其中包含200万个数据项对象(大小约50字节)。
每个数据项都有int、short和其他对象等成员。我已经试了两百万次了 Datainputstream
解决这个问题需要几秒钟。能在一秒钟内搞定吗?以下是示例:`
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
class DataItem {
private Part0 member0;
private Part1 member1;
private Part3 member3;
private Part4 member4;
private int member5;
private int member6;
public void setMember0(Part0 member) {
this.member0 = member;
}
public void setMember1(Part1 member) {
this.member1 = member;
}
public void setMember3(Part3 member) {
this.member3 = member;
}
public void setMember4(Part4 member) {
this.member4 = member;
}
public void setMember5(int member) {
this.member5 = member;
}
public void setMember6(int member) {
this.member6 = member;
}
}
class Part0 {
Part2 member1;
String member2;
public void setMember1(Part2 member) {
this.member1 = member;
}
public void setMember2(String member) {
this.member2 = member;
}
}
class Part1 {
short member1;
byte member2;
byte member3;
byte member4;
byte member5;
byte member6;
byte member7;
public void setMember5(byte member) {
this.member5 = member;
}
public void setMember6(byte member) {
this.member6 = member;
}
public void setMember7(byte member) {
this.member7 = member;
}
public void setMember1(short member) {
this.member1 = member;
}
public void setMember2(byte member) {
this.member2 = member;
}
public void setMember3(byte member) {
this.member3 = member;
}
public void setMember4(byte member) {
this.member4 = member;
}
}
class Part2 {
short member1;
short member2;
int member3;
byte member4;
byte member5;
short member6;
public void setMember1(short member) {
this.member1 = member;
}
public void setMember2(short member) {
this.member2 = member;
}
public void setMember3(int member) {
this.member3 = member;
}
public void setMember4(byte member) {
this.member4 = member;
}
public void setMember5(byte member) {
this.member5 = member;
}
public void setMember6(short member) {
this.member6 = member;
}
}
class Part3 {
short member1;
short member2;
public void setMember1(short member) {
this.member1 = member;
}
public void setMember2(short member) {
this.member2 = member;
}
}
class Part4 {
int member1;
short member2;
short member3;
public void setMember1(int member) {
this.member1 = member;
}
public void setzMember2(short member) {
this.member2 = member;
}
public void setMember3(short member) {
this.member3 = member;
}
}
public class testForHugeData {
public static void main(String[]args) throws IOException {
int runtimes = 2000000;
createFile();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
FileInputStream rd = new FileInputStream("test.txt");
BufferedInputStream ws = new BufferedInputStream(rd);
byte []buffer = new byte[1024];
int len;
while((len = ws.read(buffer,0,1024))!=-1) {
bos.write(buffer,0,len);
}
byte[] arr = bos.toByteArray();
System.out.println("a input byteStream sized "+arr.length +" is created");
ByteArrayInputStream bs = new ByteArrayInputStream(arr);
// create a datainputStream
DataInputStream ds = new DataInputStream(bs);
// create a bufferedInputStream
BufferedInputStream fs = new BufferedInputStream(ds);
runTaskForManyTimes(runtimes,ds,fs);
}
private static void runTaskForManyTimes(int runtimes, DataInputStream ds, BufferedInputStream fs) throws IOException {
HageData hugeData = new HageData();
long start = System.currentTimeMillis();
for(int i= 0;i<runtimes;i++) {
hugeData.addDataItems(taskUseDataInputStream(runtimes,ds));
}
System.out.println("use dataIuputStream to analyze byte stream:");
System.out.println(" it takes "+(System.currentTimeMillis()-start)+"ms to loop 2 million times");
HageData hugeData1 = new HageData();
start = System.currentTimeMillis();
for(int i= 0;i<runtimes;i ++) {
hugeData1.addDataItems(taskUseBufferedInputStream(runtimes,fs));
}
System.out.println("use bufferedIuputStream to analyze byte stream:");
System.out.println(" it takes "+(System.currentTimeMillis()-start)+"ms to loop 2 million times");
}
private static DataItem taskUseDataInputStream(int runtimes, DataInputStream ds) throws IOException {
DataItem item = new DataItem();
Part1 part1 = new Part1();
part1.setMember1(ds.readShort());
part1.setMember2(ds.readByte());
part1.setMember3(ds.readByte());
part1.setMember4(ds.readByte());
part1.setMember5(ds.readByte());
part1.setMember6(ds.readByte());
part1.setMember7(ds.readByte());
item.setMember1(part1);
Part0 part0 = new Part0();
Part2 part2 = new Part2();
part2.setMember1(ds.readShort());
part2.setMember3(ds.readInt());
part2.setMember5(ds.readByte());
part2.setMember2(ds.readShort());
part2.setMember6(ds.readShort());
part2.setMember4(ds.readByte());
byte[] tmp = new byte[10];
for(int i = 0; i< 10; i++) {
tmp[i] = ds.readByte();
}
part0.setMember1(part2);
part0.setMember2(new String(tmp));
item.setMember0(part0);
Part3 part3 = new Part3();
part3.setMember1(ds.readShort());
part3.setMember2(ds.readShort());
item.setMember3(part3);
Part4 part4 = new Part4();
part4.setMember1(ds.readInt());
part4.setzMember2(ds.readShort());
part4.setMember3(ds.readShort());
item.setMember4(part4);
item.setMember5(ds.readInt());
item.setMember6(ds.readInt());
return item;
}
private static DataItem taskUseBufferedInputStream(int runtimes, BufferedInputStream fs) throws IOException {
DataItem item = new DataItem();
Part1 part1 = new Part1();
part1.setMember1(readShort(fs));
part1.setMember2((byte)fs.read());
part1.setMember3((byte)fs.read());
part1.setMember4((byte)fs.read());
part1.setMember5((byte)fs.read());
part1.setMember6((byte)fs.read());
part1.setMember7((byte)fs.read());
item.setMember1(part1);
Part0 part0 = new Part0();
Part2 part2 = new Part2();
part2.setMember1(readShort(fs));
part2.setMember3(readInt(fs));
part2.setMember5((byte)fs.read());
part2.setMember2(readShort(fs));
part2.setMember6(readShort(fs));
part2.setMember4((byte)fs.read());
byte[] tmp = new byte[10];
for(int i = 0; i< 10; i++) {
tmp[i] = (byte)fs.read();
}
part0.setMember1(part2);
part0.setMember2(new String(tmp));
item.setMember0(part0);
Part3 part3 = new Part3();
part3.setMember1(readShort(fs));
part3.setMember2(readShort(fs));
item.setMember3(part3);
Part4 part4 = new Part4();
part4.setMember1(readInt(fs));
part4.setzMember2(readShort(fs));
part4.setMember3(readShort(fs));
item.setMember4(part4);
item.setMember5(readInt(fs));
item.setMember6(readInt(fs));
return item;
}
private static short readShort(BufferedInputStream fs) throws IOException {
// created to read short from BufferedInputStream
byte [] tmp = new byte[2];
tmp[0] = (byte)fs.read();
tmp[1] = (byte)fs.read();
return (short)(tmp[0]<<8|tmp[1]);
}
private static int readInt(BufferedInputStream fs) throws IOException {
// created to read int from BufferedInputStream
byte [] tmp = new byte[4];
tmp[0] = (byte)fs.read();
tmp[1] = (byte)fs.read();
tmp[2] = (byte)fs.read();
tmp[3] = (byte)fs.read();
return (int)(tmp[0]<<24|tmp[1]<<16|tmp[2]<<8|tmp[3]);
}
private static void createFile() throws IOException {
File file = new File("test.txt");
if(!file.exists()) {
file.createNewFile();
}
// so we create a random file sized 100,000,000 for test
RandomAccessFile file1 = new RandomAccessFile(file, "rw");
file1.setLength(100000000); //you can change size here
file1.close();
}
}
结果如下:`
a input byteStream sized 100000000 is created
use dataIuputStream to analyze byte stream:
it takes 4489ms to loop 2 million times
use bufferedIuputStream to analyze byte stream:
it takes 4686ms to loop 2 million times
所以看起来bufferedstream比较慢?但当我将输入字节流的大小更改为400m(通过将测试文件大小更改为400m)时,结果是:`
a input byteStream sized 400000000 is created
use dataIuputStream to analyze byte stream:
it takes 4740ms to loop 2 million times
use bufferedIuputStream to analyze byte stream:
it takes 1384ms to loop 2 million times
所以bufferedinputstream的性能似乎取决于buffersize。反正时间成本太高了。
暂无答案!
目前还没有任何答案,快来回答吧!