[英]All the domains for enum columns; null for non-enum columns.
代码示例来源:origin: h2oai/h2o-3
public NBTask(Key<Job> jobKey, DataInfo dinfo, int nres) {
_jobKey = jobKey;
_dinfo = dinfo;
_nrescat = nres;
_domains = dinfo._adaptedFrame.domains();
_npreds = dinfo._cats + dinfo._nums;
代码示例来源:origin: h2oai/h2o-2
/** Full constructor from frame: Strips out the Vecs to just the names needed
* to match columns later for future datasets.
public Model( Key selfKey, Key dataKey, Frame fr, float[] priorClassDist ) {
this(selfKey,dataKey,fr.names(),fr.domains(), priorClassDist, null, 0, 0);
public Model( Key selfKey, Key dataKey, String names[], String domains[][], float[] priorClassDist, float[] modelClassDist) {
代码示例来源:origin: h2oai/h2o-2
public NBTask(Job job, DataInfo dinfo) {
_job = job;
_dinfo = dinfo;
_nobs = 0;
String[][] domains = dinfo._adaptedFrame.domains();
int ncol = dinfo._adaptedFrame.numCols();
assert ncol-1 == dinfo._nums + dinfo._cats; // ncol-1 because we drop response col
_nres = domains[ncol-1].length;
_rescnt = new double[_nres];
_jntcnt = new double[ncol-1][][];
for(int i = 0; i < _jntcnt.length; i++) {
int ncnt = domains[i] == null ? 2 : domains[i].length;
_jntcnt[i] = new double[_nres][ncnt];
代码示例来源:origin: h2oai/h2o-3
private void setDataInfoToOutput(DataInfo dinfo) {
if (dinfo == null) return;
// update the model's expected frame format - needed for train/test adaptation
_output._domains = dinfo._adaptedFrame.domains();
_output._nums = dinfo._nums;
_output._cats = dinfo._cats;
_output._catOffsets = dinfo._catOffsets;
_output._normMul = dinfo._normMul;
_output._normSub = dinfo._normSub;
_output._normRespMul = dinfo._normRespMul;
_output._normRespSub = dinfo._normRespSub;
_output._useAllFactorLevels = dinfo._useAllFactorLevels;
代码示例来源:origin: h2oai/h2o-2
public static Frame shuffleFramePerChunk(Key outputFrameKey, Frame fr, final long seed) {
Frame r = new MRTask2() {
public void map(Chunk[] cs, NewChunk[] ncs) {
long[] idx = new long[cs[0]._len];
for (int r=0; r<idx.length; ++r) idx[r] = r;
Utils.shuffleArray(idx, seed);
for (int r=0; r<idx.length; ++r) {
for (int i = 0; i < ncs.length; i++) {
}.doAll(fr.numCols(), fr).outputFrame(outputFrameKey, fr.names(), fr.domains());
return r;
代码示例来源:origin: h2oai/h2o-2
/** Single row scoring, on a compatible Frame. */
public final float[] score( Frame fr, boolean exact, int row ) {
double tmp[] = new double[fr.numCols()];
for( int i=0; i<tmp.length; i++ )
tmp[i] = fr.vecs()[i].at(row);
return score(fr.names(),fr.domains(),exact,tmp);
代码示例来源:origin: h2oai/h2o-2
@Override void apply(Env env, int argcnt, ASTApply apply) {
final String level = env.popStr();
String skey = env.key();
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("setLevel works on a single column at a time.");
String[] doms = fr.anyVec().domain().clone();
if( doms == null )
throw new IllegalArgumentException("Cannot set the level on a non-factor column!");
final int idx = Arrays.asList(doms).indexOf(level);
if (idx == -1)
throw new IllegalArgumentException("Did not find level `" + level + "` in the column.");
Frame fr2 = new MRTask2() {
@Override public void map(Chunk c, NewChunk nc) {
for (int i=0;i<c._len;++i)
}.doAll(1, fr.anyVec()).outputFrame(null, fr.names(), fr.domains());
env.subRef(fr, skey);
env.poppush(1, fr2, null);
代码示例来源:origin: h2oai/h2o-3
@Override protected void checkMemoryFootPrint_impl() {
if (_parms._checkpoint != null) return;
long p = hex.util.LinearAlgebraUtils.numColsExp(_train,true) - (_parms._autoencoder ? 0 : _train.lastVec().cardinality());
String[][] dom = _train.domains();
代码示例来源:origin: h2oai/h2o-3
protected void checkMemoryFootPrint_impl() {
// compute memory usage for pcond matrix
long mem_usage = (_train.numCols() - 1) * _train.lastVec().cardinality();
String[][] domains = _train.domains();
long count = 0;
for (int i = 0; i < _train.numCols() - 1; i++) {
count += domains[i] == null ? 2 : domains[i].length;
mem_usage *= count;
mem_usage *= 8; //doubles
long max_mem = H2O.SELF._heartbeat.get_free_mem();
if (mem_usage > max_mem) {
String msg = "Conditional probabilities won't fit in the driver node's memory ("
+ PrettyPrint.bytes(mem_usage) + " > " + PrettyPrint.bytes(max_mem)
+ ") - try reducing the number of columns, the number of response classes or the number of categorical factors of the predictors.";
error("_train", msg);
代码示例来源:origin: h2oai/h2o-2
private Vec[][] makeTemplates(Frame dataset, float[] ratios) {
Vec anyVec = dataset.anyVec();
final long[][] espcPerSplit = computeEspcPerSplit(anyVec._espc, anyVec.length(), ratios);
final int num = dataset.numCols(); // number of columns in input frame
final int nsplits = espcPerSplit.length; // number of splits
final String[][] domains = dataset.domains(); // domains
final boolean[] uuids = dataset.uuids();
final byte [] times = dataset.times();
Vec[][] t = new Vec[nsplits][/*num*/]; // resulting vectors for all
for (int i=0; i<nsplits; i++) {
// vectors for j-th split
t[i] = new Vec(Vec.newKey(),espcPerSplit[i/*-th split*/]).makeZeros(num, domains, uuids, times);
return t;
代码示例来源:origin: h2oai/h2o-2
/** Create a templates for vector composing output frame */
protected Vec[][] makeTemplates() {
Vec anyVec = dataset.anyVec();
final long[][] espcPerSplit = computeEspcPerSplit(anyVec._espc, anyVec.length());
final int num = dataset.numCols(); // number of columns in input frame
final int nsplits = espcPerSplit.length; // number of splits
final String[][] domains = dataset.domains(); // domains
final boolean[] uuids = dataset.uuids();
final byte[] times = dataset.times();
Vec[][] t = new Vec[nsplits][/*num*/]; // resulting vectors for all
for (int i=0; i<nsplits; i++) {
// vectors for j-th split
t[i] = new Vec(Vec.newKey(),espcPerSplit[i/*-th split*/]).makeZeros(num, domains, uuids, times);
return t;
代码示例来源:origin: h2oai/h2o-3
TotSS totss = new TotSS(means,mults,modes, train().domains(), train().cardinality()).doAll(vecs);
model._output._totss = totss._tss;
代码示例来源:origin: h2oai/h2o-2
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry()) { env.poppush(Double.NaN); return; }
Frame fr = env.popAry();
String skey = env.key();
Frame fr2 = new Scale().doIt(fr.numCols(), fr).outputFrame(fr._names, fr.domains());
env.pop(); // Pop self
代码示例来源:origin: h2oai/h2o-2
public static Frame[] runifSplit(Frame f, float threshold, long seed) {
if (seed == -1) seed = new Random().nextLong();
Vec rv = new Vec(f.anyVec().group().addVecs(1)[0],f.anyVec()._espc);
Futures fs = new Futures();
DKV.put(rv._key,rv, fs);
for(int i = 0; i < rv._espc.length-1; ++i)
DKV.put(rv.chunkKey(i),new C0DChunk(0,(int)(rv._espc[i+1]-rv._espc[i])),fs);
final long zeed = seed;
new MRTask2() {
@Override public void map(Chunk c){
Random rng = new Random(zeed*c.cidx());
for(int i = 0; i < c._len; ++i)
c.set0(i, (float)rng.nextDouble());
Vec[] vecs = new Vec[f.numCols()+1];
System.arraycopy(f.vecs(), 0, vecs,0, f.numCols());
vecs[f.numCols()] = rv;
Frame doAllFr = new Frame(null, vecs);
// it would be great if there was a map call for NewChunk[][] multi frame output
Frame left = new DeepSelectThresh(threshold, true).doAll(f.numCols(),doAllFr).outputFrame(Key.make(), f.names(), f.domains());
Frame rite = new DeepSelectThresh(threshold, false).doAll(f.numCols(),doAllFr).outputFrame(Key.make(), f.names(), f.domains());
return new Frame[]{left,rite};
代码示例来源:origin: h2oai/h2o-3
private static Frame selectByPredicate(Frame fr, Frame predicateFrame) {
String[] names = fr.names().clone();
byte[] types = fr.types().clone();
String[][] domains = fr.domains().clone();
fr.add("predicate", predicateFrame.anyVec());
Frame filtered = new Frame.DeepSelect().doAll(types, fr).outputFrame(Key.<Frame>make(), names, domains);
return filtered;
代码示例来源:origin: h2oai/h2o-2
public void compute2() {
final Vec [] srcVecs = _in.vecs();
_out = new Frame(_okey,_in.names(), new Vec(_vg.addVec(),_espc).makeZeros(srcVecs.length,_in.domains(),_in.uuids(),_in.times()));
new RebalanceTask(this,srcVecs).asyncExec(_out);
代码示例来源:origin: h2oai/h2o-3
public Frame scoreExemplarMembers(Key<Frame> destination_key, final int exemplarIdx) {
Vec booleanCol = new MRTask() {
public void map(Chunk c, NewChunk nc) {
for (int i=0;i<c._len;++i)
nc.addNum(c.at8(i)==_exemplars[exemplarIdx].gid ? 1 : 0,0);
}.doAll(Vec.T_NUM, new Frame(new Vec[]{_exemplar_assignment_vec_key.get()})).outputFrame().anyVec();
Frame orig = _parms.train();
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length+1);
vecs[vecs.length-1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(),ff).outputFrame(destination_key, orig.names(), orig.domains());
return res;
代码示例来源:origin: h2oai/h2o-3
Frame res = new Frame.DeepSelect().doAll(orig.types(),ff).outputFrame(destination_key, orig.names(), orig.domains());
代码示例来源:origin: h2oai/h2o-3
@Test public void testDomains() {
Frame frame = parse_test_file("smalldata/junit/weather.csv");
for (String s : new String[]{"MaxWindSpeed", "RelHumid9am", "Cloud9am"}) {
Vec v = frame.vec(s);
Vec newV = v.toCategoricalVec();
AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._target_num_exemplars = 17;
AggregatorModel agg = new Aggregator(parms).trainModel().get();
Frame output = agg._output._output_frame.get();
Assert.assertTrue(output.numRows() <= 17);
boolean same = true;
for (int i=0;i<frame.numCols();++i) {
if (frame.vec(i).isCategorical()) {
same = (frame.domains()[i].length == output.domains()[i].length);
if (!same) break;
代码示例来源:origin: h2oai/h2o-3
parms._response_column = "petal_wid";
Model.InteractionBuilder interactionBldr = interactionBuilder(dinfo);
Model.adaptTestForTrain(frSplits[1],null,null,dinfo._adaptedFrame.names(),dinfo._adaptedFrame.domains(),parms,true,false, interactionBldr,null,null, false);
scoreInfo = dinfo.scoringInfo(dinfo._adaptedFrame._names,frSplits[1]);