OMS – Random IT Utensils https://blog.adamfurmanek.pl IT, operating systems, maths, and more. Mon, 23 Oct 2017 04:58:03 +0000 en-US hourly 1 https://wordpress.org/?v=6.6.2 Logging in distributed system Part 5 — Parsing logs https://blog.adamfurmanek.pl/2018/01/13/logging-in-distributed-system-part-5/ https://blog.adamfurmanek.pl/2018/01/13/logging-in-distributed-system-part-5/#comments Sat, 13 Jan 2018 09:00:27 +0000 https://blog.adamfurmanek.pl/?p=2307 Continue reading Logging in distributed system Part 5 — Parsing logs]]>

This is the fifth part of the Logging series. For your convenience you can find other parts in the table of contents in Part 1 – Correlations

We start with classes representing logs:

using System;
using System.Globalization;

namespace LogHandler
{
	public class LogEntry
	{
		public string Path { get; set; }
		public string Content { get; set; }
		public string ApplicationName { get; set; }
		public string ServerId { get; set; }
		public string ThreadId { get; set; }
		public string CorrelationId { get; set; }
		public int Year => ParseTime().Year;
		public int Month => ParseTime().Month;
		public int Day => ParseTime().Day;
		public string Date => ParseTime().ToString("yyyy-MM-dd");
		public int Hour => ParseTime().Hour;
		public int Minute => ParseTime().Minute;
		public string Time => ParseTime().TimeOfDay.ToString();
		public string Timestamp { get; set; }
		public string LogLevel { get; set; }
		public string Activity { get; set; }
		public string LogicalTime { get; set; }
		public string LoggerId { get; set; }
		public DateTime GenerationTime => ParseTime();

		private DateTime ParseTime()
		{
			var formats = new[]
			{
				"yyyy-MM-dd HH:mm:ss.fff",
				"yyyy-MM-dd HH.mm.ss.fff"
			};
			DateTime result;
			DateTime.TryParseExact(Timestamp, formats, CultureInfo.InvariantCulture, DateTimeStyles.None, out result);

			return result;
		}
	}
}

namespace LogHandler
{
	public class RawLogEntry
	{
		public string Path { get; set; }
		public string Content { get; set; }
		public string LineNumber { get; set; }
	}
}

Since our logs might contain additional data, we do not want to lose it. So we push two types of logs to the OMS: parsed log and raw log. Raw log is only splitted by each line (you could as well push whole file), parsed log contains parsed data so it is easier to filter it and create alerts.

Now the parsers:

using System;
using System.Collections.Generic;
using System.Linq;

namespace LogHandler
{
	public static class RawLogParser
	{
		public static IEnumerable<RawLogEntry> ParseLog(string content, string path)
		{
			var entries = content
				.Split(new[] {Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries)
				.Select((line, index) => new RawLogEntry
				{
					Content = line,
					Path = path,
					LineNumber = (index + 1).ToString()
				}).ToArray();

			return entries;
		}
	}
}

using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace LogHandler
{
	public class LogParser
	{
		private static readonly string LogHeaderGeneralPattern = "([[].*?[]]){9,}";

		private const string Timestamp = "timestamp";
		private const string ApplicationName = "applicationName";
		private const string ServerId = "serverId";
		private const string ThreadId = "threadId";
		private const string CorrelationId = "correlationId";
		private const string LogLevel = "logLevel";
		private const string Activity = "activity";
		private const string LogicalTime = "logicalTime";
		private const string LoggerId = "loggerId";
		private static readonly string LogHeaderSpecificPattern = string.Join("", new[]
		{
			Timestamp,
			ApplicationName,
			ServerId,
			ThreadId,
			CorrelationId,
			LogLevel,
			Activity,
			LogicalTime,
			LoggerId
		}.Select(group => $"[[](?<{group}>.*?)[]]"));

		public static IEnumerable<LogEntry> ParseLog(string log, string path)
		{
			// Not using Environment.NewLine because it handles new lines incorrectly
			var lines = log.Split('\n');

			var buffer = new StringBuilder();
			LogEntry currentEntry = null;

			foreach (var line in lines)
			{
				if (Regex.IsMatch(line, LogHeaderGeneralPattern))
				{
					if (currentEntry != null)
					{
						currentEntry.Content = buffer.ToString();
						yield return currentEntry;
					}

					buffer.Clear();
					var match = Regex.Match(line, LogHeaderSpecificPattern);
					currentEntry = new LogEntry
					{
						Timestamp = match.Groups[Timestamp].Value,
						ApplicationName = match.Groups[ApplicationName].Value,
						ServerId = match.Groups[ServerId].Value,
						ThreadId = match.Groups[ThreadId].Value,
						CorrelationId = match.Groups[CorrelationId].Value,
						LogLevel = match.Groups[LogLevel].Value,
						Activity = match.Groups[Activity].Value,
						LogicalTime = match.Groups[LogicalTime].Value,
						LoggerId = match.Groups[LoggerId].Value,
						Path = path,
					};
				}
				else
				{
					buffer.AppendLine(line);
				}
			}

			if (currentEntry != null)
			{
				currentEntry.Content = buffer.ToString();
				yield return currentEntry;
			}
		}
	}
}

Now when pushing data to OMS do not forget to set timestamp field header in order to avoid duplicates for parsed logs:

client.DefaultRequestHeaders.Add("time-generated-field", nameof(LogEntry.GenerationTime));

Summary

This short series shows how to implement logging infrastructure for distributed system. Please be advised that this is only an initial implementation which you should adapt to your needs.

]]>
https://blog.adamfurmanek.pl/2018/01/13/logging-in-distributed-system-part-5/feed/ 1