Skip to content

Commit 37a4ec3

Browse files
Merge pull request #18654 from nak3/diagnostics
Automatic merge from submit-queue. [Diagnostics] Fix AnalyzeLogs to provide more clear debug message When we run `oc adm diagnostics AnalyzeLogs`, diagnostics command often misses error messsages in journal logs. So, as an admin we are trying to find out the reason, but debug message gives us unclear message: ``` $ oc adm diagnostics AnalyzeLogs -l=0 ... debug: Stopped reading docker log: timestamp 1518835107917828 too old ... ``` So, this patch changes regarding AnayzeLogs as: - Output field of struct of discovered systemd unit - Produce correct error message if journal log has invalid timestamp - Output why diagnostics stopped reading logs with readable timestamp. Here is the message after applied this patch: ``` debug: Stopped reading docker log: timestamp 2018-02-17 11:33:58 +0900 JST more than 1 hour ago ```
2 parents 2f9dbe3 + 8adfb26 commit 37a4ec3

File tree

3 files changed

+8
-12
lines changed

3 files changed

+8
-12
lines changed

pkg/oc/admin/diagnostics/diagnostics/cluster/master_node.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func resolveServerIP(serverUrl string, fn dnsResolver) ([]string, error) {
147147

148148
func searchNodesForIP(nodes []kapi.Node, ips []string) types.DiagnosticResult {
149149
r := types.NewDiagnosticResult(MasterNodeName)
150-
r.Debug("DClu3005", fmt.Sprintf("Seaching for a node with master IP: %s", ips))
150+
r.Debug("DClu3005", fmt.Sprintf("Searching for a node with master IP: %s", ips))
151151

152152
// Loops = # of nodes * number of IPs per node (2 commonly) * # of IPs the
153153
// server hostname resolves to. (should usually be 1)

pkg/oc/admin/diagnostics/diagnostics/systemd/analyze_logs.go

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,13 @@ func (d AnalyzeLogs) Check() types.DiagnosticResult {
9999
if err := json.Unmarshal(bytes, &entry); err != nil {
100100
r.Debug("DS0003", fmt.Sprintf("Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err)))
101101
} else {
102-
if lineCount > 500 && stampTooOld(entry.TimeStamp, timeLimit) {
103-
r.Debug("DS0004", fmt.Sprintf("Stopped reading %s log: timestamp %s too old", unitName, entry.TimeStamp))
104-
break // if we've analyzed at least 500 entries, stop when age limit reached (don't scan days of logs)
102+
epochns, err := strconv.ParseInt(entry.TimeStamp, 10, 64)
103+
if err == nil && time.Unix(epochns/1000000, 0).Before(timeLimit) && lineCount > 500 {
104+
r.Debug("DS0005", fmt.Sprintf("Stopped reading %s log: timestamp %s more than 1 hour ago", unitName, time.Unix(epochns/1000000, 0)))
105+
break
106+
} else if err != nil {
107+
r.Warn("DS0004", err, fmt.Sprintf("Find invalid timestamp %s in %s log", entry.TimeStamp, unitName))
108+
continue
105109
}
106110
if unit.StartMatch.MatchString(entry.Message) {
107111
break // saw log message for unit startup; don't analyze previous logs
@@ -142,10 +146,3 @@ func (d AnalyzeLogs) Check() types.DiagnosticResult {
142146

143147
return r
144148
}
145-
146-
func stampTooOld(stamp string, timeLimit time.Time) bool {
147-
if epochns, err := strconv.ParseInt(stamp, 10, 64); err == nil {
148-
return time.Unix(epochns/1000000, 0).Before(timeLimit)
149-
}
150-
return true // something went wrong, stop looking...
151-
}

pkg/oc/admin/diagnostics/diagnostics/systemd/locate_units.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ func GetSystemdUnits(logger *log.Logger) map[string]types.SystemdUnit {
3838
}
3939
}
4040

41-
logger.Debug("DS1003", fmt.Sprintf("%v", systemdUnits))
4241
return systemdUnits
4342
}
4443

0 commit comments

Comments
 (0)