下面的代码可以很好地将流作业提交到集群。
string statusFolderName = @"/tutorials/wordcountstreaming/status";
var jobcred = new BasicAuthCredential();
jobcred.UserName = "username";
jobcred.Password = "pass";
jobcred.Server = new Uri("https://something.azurehdinsight.net");
// Define the Hadoop streaming MapReduce job
StreamingMapReduceJobCreateParameters myJobDefinition = new StreamingMapReduceJobCreateParameters()
{
JobName = "my word counting job",
StatusFolder = statusFolderName,
Input = "/example/data/gutenberg/davinci.txt",
Output = "/tutorials/wordcountstreaming/output",
Reducer = "wc.exe",
Mapper = "cat.exe"
};
myJobDefinition.Files.Add("/example/apps/wc.exe");
myJobDefinition.Files.Add("/example/apps/cat.exe");
var jobClient = JobSubmissionClientFactory.Connect(jobcred);
// Run the MapReduce job
JobCreationResults mrJobResults = jobClient.CreateStreamingJob(myJobDefinition);
3条答案
按热度按时间acruukt91#
希望这有帮助。
koaltpgm2#
如何获取文本文件的名称作为密钥?我希望输出显示键值。关键字是文件名,值是文件中的字数我有多个文件。
slhcrj9b3#
--制图器