持久日志存储-hadoop集群,8042/logs/userlogs/执行中可见,终止时消失

0ve6wy6x  于 2021-06-02  发布在  Hadoop
关注(0)|答案(1)|浏览(411)

发布map reduce作业后,我导航到目录 /usr/local/hadoop/logs/userlogs ,在我的两个从属节点上。
由于工作很小,结果只出现在其中一个——有趣的是发行 ll 命令连续两次首先演示正在运行的作业,但随后-作业终止后-包含其日志的目录消失,请参见此处:

我对我的电脑做了以下修改 yarn-site.xml 以及 mapred-site.xml 文件,正如这里所建议的,但最终并没有解决我的问题。
如果是重要的,我会用 log4j ,其配置如下:


# Licensed to the Apache Software Foundation (ASF) under one

# or more contributor license agreements.  See the NOTICE file

# distributed with this work for additional information

# regarding copyright ownership.  The ASF licenses this file

# to you under the Apache License, Version 2.0 (the

# "License"); you may not use this file except in compliance

# with the License.  You may obtain a copy of the License at

# 

# http://www.apache.org/licenses/LICENSE-2.0

# 

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# Define some default values that can be overridden by system properties

# hadoop.root.logger=INFO,console

hadoop.root.logger=INFO
hadoop.log.dir=.
hadoop.log.file=hadoop.log

# Define the root logger to the system property "hadoop.root.logger".

log4j.rootLogger=DEBUG, DebugAppender

# 

# log4j.rootLogger=${hadoop.root.logger}, EventCounter

# 

# Logging Threshold

log4j.threshold=ALL

# Null Appender

log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender

# 

# Rolling File Appender - cap space usage at 5gb.

# 

hadoop.log.maxfilesize=256MB
hadoop.log.maxbackupindex=20
log4j.appender.RFA=org.apache.log4j.RollingFileAppender
log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}

log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}

log4j.appender.RFA.layout=org.apache.log4j.PatternLayout

# Pattern format: Date LogLevel LoggerName LogMessage

log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n

# Debugging Pattern format

# log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n

# 

# Daily Rolling File Appender

# 

log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}

# Rollover at midnight

log4j.appender.DRFA.DatePattern=.yyyy-MM-dd

log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout

# Pattern format: Date LogLevel LoggerName LogMessage

log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n

# Debugging Pattern format

# log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n

# 

# console

# Add "console" to rootlogger above if you want to use this

# 

log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n

# 

# TaskLog Appender

# 

# Default values

hadoop.tasklog.taskid=null
hadoop.tasklog.iscleanup=false
hadoop.tasklog.noKeepSplits=4
hadoop.tasklog.totalLogFileSize=100
hadoop.tasklog.purgeLogSplits=true
hadoop.tasklog.logsRetainHours=12

log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}

log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n

# 

# HDFS block state change log from block manager

# 

# Uncomment the following to log normal block state change

# messages from BlockManager in NameNode.

log4j.logger.BlockStateChange=DEBUG

# 

# Security appender

# 

hadoop.security.logger=INFO,NullAppender
hadoop.security.log.maxfilesize=256MB
hadoop.security.log.maxbackupindex=20
log4j.category.SecurityLogger=${hadoop.security.logger}
hadoop.security.log.file=SecurityAuth-${user.name}.audit
log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}

# 

# Daily Rolling Security appender

# 

log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd

# 

# hadoop configuration logging

# 

# Uncomment the following line to turn off configuration deprecation warnings.

log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN

# 

# hdfs audit logging

# 

hdfs.audit.logger=INFO,NullAppender
hdfs.audit.log.maxfilesize=256MB
hdfs.audit.log.maxbackupindex=20
log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}

# 

# NameNode metrics logging.

# The default is to retain two namenode-metrics.log files up to 64MB each.

# 

namenode.metrics.logger=INFO,NullAppender
log4j.logger.NameNodeMetricsLog=${namenode.metrics.logger}
log4j.additivity.NameNodeMetricsLog=false
log4j.appender.NNMETRICSRFA=org.apache.log4j.RollingFileAppender
log4j.appender.NNMETRICSRFA.File=${hadoop.log.dir}/namenode-metrics.log
log4j.appender.NNMETRICSRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.NNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n
log4j.appender.NNMETRICSRFA.MaxBackupIndex=1
log4j.appender.NNMETRICSRFA.MaxFileSize=64MB

# 

# DataNode metrics logging.

# The default is to retain two datanode-metrics.log files up to 64MB each.

# 

datanode.metrics.logger=INFO,NullAppender
log4j.logger.DataNodeMetricsLog=${datanode.metrics.logger}
log4j.additivity.DataNodeMetricsLog=false
log4j.appender.DNMETRICSRFA=org.apache.log4j.RollingFileAppender
log4j.appender.DNMETRICSRFA.File=${hadoop.log.dir}/datanode-metrics.log
log4j.appender.DNMETRICSRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.DNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n
log4j.appender.DNMETRICSRFA.MaxBackupIndex=1
log4j.appender.DNMETRICSRFA.MaxFileSize=64MB

# 

# mapred audit logging

# 

mapred.audit.logger=INFO,NullAppender
mapred.audit.log.maxfilesize=256MB
mapred.audit.log.maxbackupindex=20
log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}

# Custom Logging levels

log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG

# Jets3t library

log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR

# AWS SDK & S3A FileSystem

log4j.logger.com.amazonaws=ERROR
log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN

# 

# Event Counter Appender

# Sends counts of logging messages at different severity levels to Hadoop Metrics.

# 

log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter

# 

# Job Summary Appender

# 

# Use following logger to send summary to separate file defined by

# hadoop.mapreduce.jobsummary.log.file :

# hadoop.mapreduce.jobsummary.logger=INFO,JSA

# 

hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
hadoop.mapreduce.jobsummary.log.maxbackupindex=20
log4j.appender.JSA=org.apache.log4j.RollingFileAppender
log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false

# 

# shuffle connection log from shuffleHandler

# Uncomment the following line to enable logging of shuffle connections

# log4j.logger.org.apache.hadoop.mapred.ShuffleHandler.audit=DEBUG

# 

# Yarn ResourceManager Application Summary Log

# 

# Set the ResourceManager summary log filename

yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log

# Set the ResourceManager summary log level and appender

yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}

# yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY

# To enable AppSummaryLogging for the RM,

# set yarn.server.resourcemanager.appsummary.logger to

# <LEVEL>,RMSUMMARY in hadoop-env.sh

# Appender for ResourceManager Application Summary Log

# Requires the following properties to be set

# - hadoop.log.dir (Hadoop Log directory)

# - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)

# - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)

log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
log4j.appender.RMSUMMARY.MaxFileSize=256MB
log4j.appender.RMSUMMARY.MaxBackupIndex=20
log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n

# HS audit log configs

# mapreduce.hs.audit.logger=INFO,HSAUDIT

# log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}

# log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false

# log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender

# log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log

# log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout

# log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n

# log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd

# Http Server Request Logs

# log4j.logger.http.requests.namenode=INFO,namenoderequestlog

# log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender

# log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log

# log4j.appender.namenoderequestlog.RetainDays=3

# log4j.logger.http.requests.datanode=INFO,datanoderequestlog

# log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender

# log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log

# log4j.appender.datanoderequestlog.RetainDays=3

# log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog

# log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender

# log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log

# log4j.appender.resourcemanagerrequestlog.RetainDays=3

# log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog

# log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender

# log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log

# log4j.appender.jobhistoryrequestlog.RetainDays=3

# log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog

# log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender

# log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log

# log4j.appender.nodemanagerrequestlog.RetainDays=3

# WebHdfs request log on datanodes

# Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to

# direct the log to a separate file.

# datanode.webhdfs.logger=INFO,console

# log4j.logger.datanode.webhdfs=${datanode.webhdfs.logger}

# log4j.appender.HTTPDRFA=org.apache.log4j.DailyRollingFileAppender

# log4j.appender.HTTPDRFA.File=${hadoop.log.dir}/hadoop-datanode-webhdfs.log

# log4j.appender.HTTPDRFA.layout=org.apache.log4j.PatternLayout

# log4j.appender.HTTPDRFA.layout.ConversionPattern=%d{ISO8601} %m%n

# log4j.appender.HTTPDRFA.DatePattern=.yyyy-MM-dd

# Appender for viewing information for errors and warnings

yarn.ewma.cleanupInterval=300
yarn.ewma.messageAgeLimitSeconds=86400
yarn.ewma.maxUniqueMessages=250
log4j.appender.EWMA=org.apache.hadoop.yarn.util.Log4jWarningErrorMetricsAppender
log4j.appender.EWMA.cleanupInterval=${yarn.ewma.cleanupInterval}
log4j.appender.EWMA.messageAgeLimitSeconds=${yarn.ewma.messageAgeLimitSeconds}
log4j.appender.EWMA.maxUniqueMessages=${yarn.ewma.maxUniqueMessages}

这是我要执行的程序:

import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class OpcodeCount {

  public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{

    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    // deek logger
    private final Logger LOG = org.apache.log4j.Logger.getLogger(this.getClass());

    public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

      // debugging output
      LOG.warn("anything :D");

      StringTokenizer itr = new StringTokenizer(value.toString());
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        context.write(word, one);
      }
    }
  }

  public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    private IntWritable result = new IntWritable();

    // deek logger
    private final Logger LOG = org.apache.log4j.Logger.getLogger(this.getClass());

      public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

      // debugging output
      LOG.warn("anything :D");

      int sum = 0;
      for (IntWritable val : values) {
        sum += val.get();
      }
      result.set(sum);
      context.write(key, result);
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "opcode count: 03");
    job.setJarByClass(OpcodeCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

非常奇怪的是,以前我的日志是持久的,但在重新格式化namenode之后,情况就不一样了。

prdp8dxp

prdp8dxp1#

因此,显然,除了从执行作业的从属节点中挖掘它们之外,您还可以直接将它们取出来 hdfs ,例如:

hadoop fs -cat /tmp/logs/ubuntu/logs/application_1507243913606_0002/slave2_46063

对那些在cloudera为我们指路的家伙来说真是太棒了。

  • 直到作业完成并删除自身

相关问题