org.apache.spark.sql.hive.HiveContext类的使用及代码示例

x33g5p2x  于2022-01-20 转载在 其他  
字(12.1k)|赞(0)|评价(0)|浏览(273)

本文整理了Java中org.apache.spark.sql.hive.HiveContext类的一些代码示例,展示了HiveContext类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。HiveContext类的具体详情如下:
包路径:org.apache.spark.sql.hive.HiveContext
类名称:HiveContext

HiveContext介绍

暂无

代码示例

代码示例来源:origin: Impetus/Kundera

@Override
public void initialize(Map<String, Object> puProperties)
{
  reader = new SparkEntityReader(kunderaMetadata);
  setExternalProperties(puProperties);
  initializePropertyReader();
  PersistenceUnitMetadata pum = kunderaMetadata.getApplicationMetadata().getPersistenceUnitMetadata(
      getPersistenceUnit());
  sparkconf = new SparkConf(true);
  configureClientProperties(pum);
  sparkContext = new SparkContext(sparkconf);
  sqlContext = new HiveContext(sparkContext);
}

代码示例来源:origin: Impetus/Kundera

@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  sparkClient.sqlContext.sql("use " + m.getSchema());
}

代码示例来源:origin: Impetus/Kundera

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  try
  {
    Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
    ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
    JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
    DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
    sparkClient.sqlContext.sql("use " + m.getSchema());
    if (logger.isDebugEnabled())
    {
      logger.info("Below are the registered table with hive context: ");
      sparkClient.sqlContext.sql("show tables").show();
    }
    df.write().insertInto(m.getTableName());
    return true;
  }
  catch (Exception e)
  {
    throw new KunderaException("Cannot persist object(s)", e);
  }
}

代码示例来源:origin: Quetzal-RDF/quetzal

public static void main( String[] args )
 {       
//   	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
//      	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
   SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
 
   JavaSparkContext sc = new JavaSparkContext(conf);
   
   HiveContext sqlContext = new HiveContext(sc.sc());
   DataFrame urls = sqlContext.read().json("/tmp/urls.json");
   urls.registerTempTable("urls");
   DataFrame temp = sqlContext.sql("select * from urls");
   temp.show();
   
     sqlContext.sql("add jar /tmp/quetzal.jar");
   sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
   DataFrame drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
       + " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
   drugs.show();
   System.out.println("Num rows:" + drugs.count());
 }

代码示例来源:origin: ddf-project/DDF

private void initialize(SparkContext sparkContext, Map<String, String> params) throws DDFException {
 this.setSparkContext(sparkContext == null ? this.createSparkContext(params) : sparkContext);
 this.mHiveContext = new HiveContext(this.mSparkContext);
 String compression = System.getProperty("spark.sql.inMemoryColumnarStorage.compressed", "true");
 String batchSize = System.getProperty("spark.sql.inMemoryColumnarStorage.batchSize", "1000");
 mLog.info(">>>> spark.sql.inMemoryColumnarStorage.compressed= " + compression);
 mLog.info(">>>> spark.sql.inMemoryColumnarStorage.batchSize= " + batchSize);
 this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.compressed", compression);
 this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.batchSize", batchSize);
 // register SparkSQL UDFs
 this.registerUDFs();
 this.mDataSourceManager = new SparkDataSourceManager(this);
}
// TODO: Dynamically load UDFs

代码示例来源:origin: Impetus/Kundera

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
  ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
  JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
  DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
  String outputFilePath = getOutputFilePath(sparkClient.properties);
  String ext = (String) sparkClient.properties.get("format");
  FileType fileType = FileFormatConstants.extension.get(ext);
  switch (fileType)
  {
  case CSV:
    return writeDataInCsvFile(df, outputFilePath);
  case JSON:
    return writeDataInJsonFile(df, outputFilePath);
  default:
    throw new UnsupportedOperationException("Files of type " + ext + " are not yet supported.");
  }
}

代码示例来源:origin: Impetus/Kundera

/**
 * Register table for csv.
 * 
 * @param tableName
 *            the table name
 * @param dataSourcePath
 *            the data source path
 * @param sqlContext
 *            the sql context
 */
private void registerTableForCsv(String tableName, String dataSourcePath, HiveContext sqlContext)
{
  HashMap<String, String> options = new HashMap<String, String>();
  options.put("header", "true");
  options.put("path", dataSourcePath);
  sqlContext.load(SparkPropertiesConstants.SOURCE_CSV, options).registerTempTable(tableName);
}

代码示例来源:origin: ddf-project/DDF

@Override
public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten) throws DDFException {
  SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
  HiveContext sqlContext = sparkDDFManager.getHiveContext();
  DataFrame jdf = null;
  switch (format) {
    case JSON:
      jdf = sqlContext.jsonFile(fileURI.toString());
      break;
    case PQT:
      jdf = sqlContext.parquetFile(fileURI.toString());
      break;
    default:
      throw new DDFException(String.format("Unsupported data format: %s", format.toString()));
  }
  DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf);
  DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
    null, SparkUtils.schemaFromDataFrame(df));
  if(flatten == true)
    return ddf.getFlattenedDDF();
  else
    return ddf;
}

代码示例来源:origin: Impetus/Kundera

/**
 * Register table for json.
 * 
 * @param tableName
 *            the table name
 * @param dataSourcePath
 *            the data source path
 * @param sqlContext
 *            the sql context
 */
private void registerTableForJson(String tableName, String dataSourcePath, HiveContext sqlContext)
{
  sqlContext.jsonFile(dataSourcePath).registerTempTable(tableName);
}

代码示例来源:origin: Impetus/Kundera

@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkClient.sparkContext);
  Class clazz = m.getEntityClazz();
  JavaRDD cassandraRowsRDD = functions.cassandraTable(m.getSchema(), m.getTableName(),
      CassandraJavaUtil.mapRowTo(clazz));
  sparkClient.sqlContext.createDataFrame(cassandraRowsRDD, clazz).registerTempTable(m.getTableName());
}

代码示例来源:origin: Impetus/Kundera

@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  String conn = getConnectionString(m);
  Map<String, String> options = new HashMap<String, String>();
  options.put("url", conn);
  options.put("dbtable", m.getTableName());
  sparkClient.sqlContext.load("jdbc", options).registerTempTable(m.getTableName());
}

代码示例来源:origin: Impetus/Kundera

/**
 * Gets the data frame.
 * 
 * @param query
 *            the query
 * @param m
 *            the m
 * @param kunderaQuery
 *            the kundera query
 * @return the data frame
 */
public DataFrame getDataFrame(String query, EntityMetadata m, KunderaQuery kunderaQuery)
{
  PersistenceUnitMetadata puMetadata = kunderaMetadata.getApplicationMetadata().getPersistenceUnitMetadata(
      persistenceUnit);
  String clientName = puMetadata.getProperty(DATA_CLIENT).toLowerCase();
  SparkDataClient dataClient = SparkDataClientFactory.getDataClient(clientName);
  if (registeredTables.get(m.getTableName()) == null || !registeredTables.get(m.getTableName()))
  {
    dataClient.registerTable(m, this);
    registeredTables.put(m.getTableName(), true);
  }
  // at this level temp table or table should be ready
  DataFrame dataFrame = sqlContext.sql(query);
  return dataFrame;
}

代码示例来源:origin: com.cloudera.livy/livy-rsc

@Override
public HiveContext hivectx() {
 if (hivectx == null) {
  synchronized (this) {
   if (hivectx == null) {
    hivectx = new HiveContext(sc.sc());
   }
  }
 }
 return hivectx;
}

代码示例来源:origin: Impetus/Kundera

public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  final Class clazz = m.getEntityClazz();
  SparkContext sc = sparkClient.sparkContext;
  Configuration config = new Configuration();
  config.set(
      "mongo.input.uri",
      buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
          m.getTableName()));
  JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
      Object.class, BSONObject.class).toJavaRDD();
  JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
  {
    @Override
    public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
    {
      BSONObject obj = arg._2();
      Object javaObject = generateJavaObjectFromBSON(obj, clazz);
      return Arrays.asList(javaObject);
    }
  });
  sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}

代码示例来源:origin: ddf-project/DDF

@Override
public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException {
  SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
  HiveContext sqlContext = sparkDDFManager.getHiveContext();
  JDBCDataSourceCredentials cred = (JDBCDataSourceCredentials)dataSource.getDataSourceCredentials();
  String fullURL = dataSource.getDataSourceUri().getUri().toString();
  if (cred.getUsername() != null &&  !cred.getUsername().equals("")) {
    fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword());
  }
  Map<String, String> options = new HashMap<String, String>();
  options.put("url", fullURL);
  options.put("dbtable", dataSource.getDbTable());
  DataFrame df = sqlContext.load("jdbc", options);
  DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
    null, SparkUtils.schemaFromDataFrame(df));
  // TODO?
  ddf.getRepresentationHandler().get(RDD.class, Row.class);
  ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource);
  return ddf;
}

代码示例来源:origin: org.apache.camel/camel-spark

@Override
public void process(Exchange exchange) throws Exception {
  HiveContext hiveContext = resolveHiveContext();
  String sql = exchange.getIn().getBody(String.class);
  Dataset<Row> resultFrame = hiveContext.sql(sql);
  exchange.getIn().setBody(getEndpoint().isCollect() ? resultFrame.collectAsList() : resultFrame.count());
}

代码示例来源:origin: oeljeklaus-you/UserActionAnalyzePlatform

/**
 * 用于判断是否是生产环境
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc)
{
  boolean local= ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
  if(local)
  {
    return new SQLContext(sc);
  }
  return new HiveContext(sc);
}

代码示例来源:origin: ddf-project/DDF

@Override
public SqlResult sql(String command, Integer maxRows, DataSourceDescriptor dataSource) throws DDFException {
 // TODO: handle other dataSources and dataFormats
 DataFrame  rdd = this.getHiveContext().sql(command);
 Schema schema = SparkUtils.schemaFromDataFrame(rdd);
 String[] strResult = SparkUtils.df2txt(rdd, "\t");
 return new SqlResult(schema,Arrays.asList(strResult));
}

代码示例来源:origin: Erik-ly/SprakProject

return new SQLContext(sc);
}else {
  return new HiveContext(sc);

代码示例来源:origin: ddf-project/DDF

@Override
public SqlTypedResult sqlTyped(String command, Integer maxRows, DataSourceDescriptor dataSource) throws  DDFException {
 DataFrame rdd = ((SparkDDFManager) this.getManager()).getHiveContext().sql(command);
 Schema schema = SparkUtils.schemaFromDataFrame(rdd);
 int columnSize = schema.getNumColumns();
 Row[] rddRows = rdd.collect();
 List<List<SqlTypedCell>> sqlTypedResult = new ArrayList<List<SqlTypedCell>>();
 // Scan every cell and add the type information.
 for (int rowIdx = 0; rowIdx < rddRows.length; ++rowIdx) {
  List<SqlTypedCell> row = new ArrayList<SqlTypedCell>();
  for (int colIdx = 0; colIdx < columnSize; ++ colIdx) {
   // TODO: Optimize by reducing getType().
   row.add(new SqlTypedCell(schema.getColumn(colIdx).getType(), rddRows[rowIdx].get(colIdx).toString()));
  }
  sqlTypedResult.add(row);
 }
 return new SqlTypedResult(schema, sqlTypedResult);
}

相关文章