Skip to content

Commit

Permalink
Fix ContentEncoding in IApiResponse (#5748)
Browse files Browse the repository at this point in the history
## Summary of changes

- The `IApiResponse.ContentEncoding` property was confusing and
incorrectly implemented
- Refactored as `GetCharsetEncoding()`
- Added `GetContentEncodingType`

## Reason for change

The `ContentEncoding` property was meant to return [the `charset`
associated with a
`Content-type`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type#charset)
i.e. the `charset=utf-8` part of `text/plain;charset=utf-8`, converted
into a .NET `Encoding` object. However, there's also [a
`ContentEncoding`
_header_](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding)
which defines whether the content is encoded with a compression
algorithm, e.g. gzip/brotli etc.

This PR aims to fix the bug, clear up the confusion by renaming, add
some unit tests, and make the behaviour consistent across our various
`IApiResponse` implementations.

## Implementation details

- Expose `ContentTypeHeader` and `ContentEncodingHeader` as values on
the `IApiResponse`.
- This is necessary because you _can't_ necessarily get these values
directly from `GetHeaders` in some cases (e.g. `HttpClient`)
- Rename `ContentEncoding` to `GetCharsetEncoding()`
- Made it a method so we don't bother processing the header until we
need it. In most cases we never use it (and use it at most once)
- Re-used the optimized implementation we were using in `HttpMessage`
where possible. Tweaked it slightly to always return UTF-8 by default
(as our calling code wasn't resistant to it anyway and would have
thrown)
- Made it fallback to "any" charset, as was previously _always_
happening for `HttpClientResponse`
- Add `GetContentEncodingType()` which returns an enum of compression
values
- We don't currently use this, but I could do with it in
#5747

## Test coverage

Added unit tests for the parsing logic used for decoding the `charset`
and the content-encoding

## Other details
I initially based this on
#5658 but ultimately
changed a lot (and we still don't need that mime-type handling yet)

<!-- ⚠️ Note: where possible, please obtain 2 approvals prior to
merging. Unless CODEOWNERS specifies otherwise, for external teams it is
typically best to have one review from a team member, and one review
from apm-dotnet. Trivial changes do not require 2 reviews. -->
  • Loading branch information
andrewlock committed Jul 3, 2024
1 parent fab003e commit dae1d15
Show file tree
Hide file tree
Showing 13 changed files with 405 additions and 69 deletions.
50 changes: 50 additions & 0 deletions tracer/src/Datadog.Trace/Agent/ContentEncodingType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// <copyright file="ContentEncodingType.cs" company="Datadog">
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
// </copyright>

#nullable enable

namespace Datadog.Trace.Agent;

/// <summary>
/// The encoding used on the content body.
/// <see href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding">Mozilla</see> for the content-encoding header
/// </summary>
internal enum ContentEncodingType
{
/// <summary>
/// The Content-Encoding header is not present or was empty
/// </summary>
None,

/// <summary>
/// The Content-Encoding header specified 'gzip'
/// </summary>
GZip,

/// <summary>
/// The Content-Encoding header specified 'deflate'
/// </summary>
Deflate,

/// <summary>
/// The Content-Encoding header specified 'compress'
/// </summary>
Compress,

/// <summary>
/// The Content-Encoding header specified 'br'
/// </summary>
Brotli,

/// <summary>
/// The Content-Encoding header was not recognized
/// </summary>
Other,

/// <summary>
/// The Content-Encoding header indicated multiple headers were specified
/// </summary>
Multiple,
}
97 changes: 95 additions & 2 deletions tracer/src/Datadog.Trace/Agent/IApiResponse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
// </copyright>

using System;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Text;
using System.Threading.Tasks;
using Datadog.Trace.Util;
using Datadog.Trace.Util.Streams;
using Datadog.Trace.Vendors.Newtonsoft.Json;

Expand All @@ -18,10 +20,22 @@ internal interface IApiResponse : IDisposable

long ContentLength { get; }

Encoding ContentEncoding { get; }
/// <summary>
/// Gets the "raw" content-type header, which may contain additional information like charset or boundary.
/// </summary>
string ContentTypeHeader { get; }

/// <summary>
/// Gets the "raw" content-encoding header, which may contain multiple values
/// </summary>
string ContentEncodingHeader { get; }

string GetHeader(string headerName);

Encoding GetCharsetEncoding();

ContentEncodingType GetContentEncodingType();

Task<Stream> GetStreamAsync();
}

Expand Down Expand Up @@ -63,7 +77,7 @@ private static StreamReader GetStreamReader(IApiResponse apiResponse, Stream str
// Server may not send the content length, in that case we use a default value.
// https://source.dot.net/#System.Private.CoreLib/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs,25
var length = apiResponse.ContentLength is > 0 and < DefaultBufferSize ? (int)apiResponse.ContentLength : DefaultBufferSize;
return new StreamReader(stream, apiResponse.ContentEncoding, detectEncodingFromByteOrderMarks: false, length, leaveOpen: true);
return new StreamReader(stream, apiResponse.GetCharsetEncoding(), detectEncodingFromByteOrderMarks: false, length, leaveOpen: true);
}

public static bool ShouldRetry(this IApiResponse response)
Expand All @@ -82,5 +96,84 @@ public static bool ShouldRetry(this IApiResponse response)

return shouldRetry;
}

/// <summary>
/// Gets the <see cref="Encoding"/> represented by the charset defined in the content type header.
/// </summary>
/// <param name="contentTypeHeader">The raw content-type header, for example <c>"application/json;charset=utf-8"</c></param>
/// <returns>The encoding associated with the charset, or <see cref="EncodingHelpers.Utf8NoBom"/> if the content-type header was not provided,
/// if the charset was not provided, or if the charset was not recognized</returns>
public static Encoding GetCharsetEncoding(string contentTypeHeader)
{
// special casing application/json because it's so common
if (string.IsNullOrEmpty(contentTypeHeader)
|| string.Equals("application/json", contentTypeHeader, StringComparison.OrdinalIgnoreCase))
{
// Default
return EncodingHelpers.Utf8NoBom;
}

// text/plain; charset=utf-8; boundary=foo
foreach (var pair in contentTypeHeader.SplitIntoSpans(';'))
{
var parts = pair.AsSpan();
var index = parts.IndexOf('=');

if (index != -1)
{
var firstPart = parts.Slice(0, index).Trim();

if (!firstPart.Equals("charset".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
continue;
}

var secondPart = parts.Slice(index + 1).Trim();
if (EncodingHelpers.TryGetWellKnownCharset(secondPart, out var encoding))
{
return encoding;
}

return EncodingHelpers.TryGetFromCharset(secondPart.ToString(), out var parsed)
? parsed
: EncodingHelpers.Utf8NoBom;
}
}

return EncodingHelpers.Utf8NoBom;
}

public static ContentEncodingType GetContentEncodingType(string contentEncodingHeader)
{
if (string.IsNullOrEmpty(contentEncodingHeader))
{
return ContentEncodingType.None;
}

if (contentEncodingHeader.Contains(","))
{
return ContentEncodingType.Multiple;
}

var encoding = contentEncodingHeader.AsSpan().Trim();
if (encoding.Equals("gzip".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
return ContentEncodingType.GZip;
}
else if (encoding.Equals("deflate".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
return ContentEncodingType.Deflate;
}
else if (encoding.Equals("compress".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
return ContentEncodingType.Compress;
}
else if (encoding.Equals("br".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
return ContentEncodingType.Brotli;
}

return ContentEncodingType.Other;
}
}
}
9 changes: 7 additions & 2 deletions tracer/src/Datadog.Trace/Agent/Transports/ApiWebResponse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,22 @@ internal class ApiWebResponse : IApiResponse, IDisposable
public ApiWebResponse(HttpWebResponse response)
{
_response = response;
ContentEncoding = !string.IsNullOrEmpty(response.ContentEncoding) ? Encoding.GetEncoding(response.ContentEncoding) : Encoding.UTF8;
}

public int StatusCode => (int)_response.StatusCode;

public long ContentLength => _response.ContentLength;

public Encoding ContentEncoding { get; }
public string ContentTypeHeader => _response.ContentType;

public string ContentEncodingHeader => _response.ContentEncoding;

public string GetHeader(string headerName) => _response.Headers[headerName];

public Encoding GetCharsetEncoding() => ApiResponseExtensions.GetCharsetEncoding(ContentTypeHeader);

public ContentEncodingType GetContentEncodingType() => ApiResponseExtensions.GetContentEncodingType(ContentEncodingHeader);

public Task<Stream> GetStreamAsync()
{
return Task.FromResult(_response.GetResponseStream());
Expand Down
34 changes: 30 additions & 4 deletions tracer/src/Datadog.Trace/Agent/Transports/HttpClientResponse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using Datadog.Trace.Util;

namespace Datadog.Trace.Agent.Transports
{
Expand All @@ -19,16 +20,41 @@ internal class HttpClientResponse : IApiResponse
public HttpClientResponse(HttpResponseMessage response)
{
_response = response;

var encoding = _response.Content?.Headers?.ContentEncoding?.FirstOrDefault();
ContentEncoding = !string.IsNullOrEmpty(encoding) ? Encoding.GetEncoding(encoding) : Encoding.UTF8;
}

public int StatusCode => (int)_response.StatusCode;

public long ContentLength => _response.Content.Headers.ContentLength ?? -1;

public Encoding ContentEncoding { get; }
public string ContentEncodingHeader => string.Join(',', _response.Content.Headers.ContentEncoding);

public string ContentTypeHeader => _response.Content.Headers.ContentType?.ToString();

public ContentEncodingType GetContentEncodingType() =>
_response.Content.Headers.ContentEncoding.Count switch
{
0 => ContentEncodingType.None,
1 => ApiResponseExtensions.GetContentEncodingType(_response.Content.Headers.ContentEncoding.First()),
_ => ContentEncodingType.Multiple,
};

public Encoding GetCharsetEncoding()
{
var charset = _response.Content.Headers.ContentType.CharSet;
if (string.IsNullOrEmpty(charset))
{
return EncodingHelpers.Utf8NoBom;
}

if (EncodingHelpers.TryGetWellKnownCharset(charset, out var wellKnown))
{
return wellKnown;
}

return EncodingHelpers.TryGetFromCharset(charset, out var parsed)
? parsed
: EncodingHelpers.Utf8NoBom;
}

public void Dispose()
{
Expand Down
11 changes: 9 additions & 2 deletions tracer/src/Datadog.Trace/Agent/Transports/HttpStreamResponse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,25 @@ namespace Datadog.Trace.Agent.Transports
{
internal class HttpStreamResponse : IApiResponse
{
private readonly Encoding _encoding;
private readonly HttpHeaders _headers;

public HttpStreamResponse(int statusCode, long contentLength, Encoding encoding, Stream responseStream, HttpHeaders headers)
{
StatusCode = statusCode;
ContentLength = contentLength;
ContentEncoding = encoding;
ResponseStream = responseStream;
_encoding = encoding;
_headers = headers;
}

public int StatusCode { get; }

public long ContentLength { get; }

public Encoding ContentEncoding { get; }
public string ContentTypeHeader => _headers.GetValue("Content-Type");

public string ContentEncodingHeader => _headers.GetValue("Content-Encoding");

public Stream ResponseStream { get; }

Expand All @@ -37,6 +40,10 @@ public void Dispose()

public string GetHeader(string headerName) => _headers.GetValue(headerName);

public Encoding GetCharsetEncoding() => _encoding;

public ContentEncodingType GetContentEncodingType() => ApiResponseExtensions.GetContentEncodingType(ContentEncodingHeader);

public Task<Stream> GetStreamAsync()
{
return Task.FromResult(ResponseStream);
Expand Down
52 changes: 2 additions & 50 deletions tracer/src/Datadog.Trace/HttpOverStreams/HttpMessage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@

using System;
using System.Text;
using Datadog.Trace.Agent;
using Datadog.Trace.Logging;
using Datadog.Trace.Util;

namespace Datadog.Trace.HttpOverStreams
{
internal abstract class HttpMessage
{
private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor<HttpMessage>();

public HttpMessage(HttpHeaders headers, IHttpContent content)
{
Headers = headers;
Expand All @@ -28,53 +27,6 @@ public HttpMessage(HttpHeaders headers, IHttpContent content)

public string ContentType => Headers.GetValue("Content-Type");

public Encoding GetContentEncoding()
{
// reduce getter calls
var contentType = ContentType;

if (contentType == null)
{
return null;
}

if (string.Equals("application/json", contentType, StringComparison.OrdinalIgnoreCase))
{
// Default
return EncodingHelpers.Utf8NoBom;
}

// text/plain; charset=utf-8
foreach (var pair in contentType.SplitIntoSpans(';'))
{
var parts = pair.AsSpan();
var index = parts.IndexOf('=');

if (index != -1)
{
var firstPart = parts.Slice(0, index).Trim();

if (!firstPart.Equals("charset".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
continue;
}

var secondPart = parts.Slice(index + 1).Trim();

if (secondPart.Equals("utf-8".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
return EncodingHelpers.Utf8NoBom;
}

if (secondPart.Equals("us-ascii".AsSpan(), StringComparison.OrdinalIgnoreCase))
{
return Encoding.ASCII;
}
}
}

Log.Warning("Assuming default UTF-8, Could not find an encoding for: {ContentType}", contentType);
return EncodingHelpers.Utf8NoBom;
}
public Encoding GetContentEncoding() => ApiResponseExtensions.GetCharsetEncoding(ContentType);
}
}
Loading

0 comments on commit dae1d15

Please sign in to comment.