6

With the database setup

CREATE TYPE dbo.TableType AS TABLE (
prop1 int, 
prop2 datetime2, 
prop3 varchar(1000)
);

GO

CREATE OR ALTER PROC dbo.TestTableTypePerf @Data dbo.TableType READONLY AS SELECT COUNT(*) FROM @Data;

The following code passes the TVP values in a streaming fashion. The enumerable isn't evaluated until after the ExecuteScalarAsync call and there is no need for the whole 5,000,000 elements to be materialized into a collection in the client.

It is becoming increasingly popular to eschew TVPs in favour of JSON strings, can anything similar be done for JSON?

using System.Data;
using Microsoft.Data.SqlClient;
using Microsoft.Data.SqlClient.Server;

const string connectionString = @"...";

await TvpTest();

return;

static async Task TvpTest() { await using var conn = new SqlConnection(connectionString); await conn.OpenAsync(); await using var cmd = new SqlCommand("dbo.TestTableTypePerf", conn); cmd.CommandType = CommandType.StoredProcedure;

cmd.Parameters.Add(new SqlParameter
{
    ParameterName = "@Data",
    SqlDbType = SqlDbType.Structured,
    TypeName = "dbo.TableType",
    Value = GetEnumerableOfRandomSqlDataRecords(5_000_000)
});

Console.WriteLine($"calling ExecuteScalarAsync at {DateTime.Now:O}");
var result = await cmd.ExecuteScalarAsync();

Console.WriteLine($"writing result at {DateTime.Now:O}");
Console.WriteLine(result);

}

static IEnumerable<SqlDataRecord> GetEnumerableOfRandomSqlDataRecords(uint length) { SqlMetaData[] metaData = [ new SqlMetaData("prop1", SqlDbType.Int), new SqlMetaData("prop2", SqlDbType.DateTime2), new SqlMetaData("prop3", SqlDbType.VarChar, 1000) ];

foreach (var dto in GetEnumerableOfRandomDto(length))
{
    var record = new SqlDataRecord(metaData);
    record.SetInt32(0, dto.Prop1);
    record.SetDateTime(1, dto.Prop2);
    record.SetString(2, dto.Prop3);

    yield return record;
}

}

static IEnumerable<Dto> GetEnumerableOfRandomDto(uint length) { var rnd = new Random();

for (var i = 0; i &lt; length; i++)
{
    yield return new Dto(rnd.Next(1, int.MaxValue), 
                         DateTime.Now.AddMinutes(rnd.Next(1, 10000)),
                         Guid.NewGuid().ToString()
                         );

    if ((i + 1) % 100_000 == 0)
        Console.WriteLine($&quot;Generated enumerable {i + 1} at {DateTime.Now:O}&quot;);
}

}

internal record Dto(int Prop1, DateTime Prop2, string Prop3);

Martin Smith
  • 87,941
  • 15
  • 255
  • 354

2 Answers2

5

I have made a more efficient and simpler version of your JSON test, and also compared it with the TVP test, using BenchmarkDotNet (the gold standard in .NET benchmarking).

Improvements:

  • We use the sync API for SqlClient, as the async API currently has a serious performance disadvantage.
  • To ensure we're actually reading all three columns, I've added a derived subquery SELECT DISTINCT over all the columns.
[Params(1, 100, 1000, 50000)]
public uint Length { get; set;}

[Benchmark] public int JsonTest() { const string query = """ SELECT COUNT(*) FROM ( SELECT DISTINCT Prop1, Prop2, Prop3 FROM OPENJSON(@JsonData) WITH ( Prop1 int, Prop2 datetime2(7), Prop3 nvarchar(1000) ) j ) t; """; using var conn = new SqlConnection(connectionString); conn.Open(); using var cmd = new SqlCommand(query, conn); cmd.CommandTimeout = 0;

var data = GetEnumerableOfRandomDto(Length);
using var reader = new StreamReader(GetJsonStream(data));
cmd.Parameters.Add(&quot;@JsonData&quot;, SqlDbType.NVarChar, -1).Value = reader;

return (int)cmd.ExecuteScalar();

}

  • The DTO is created using identical values for prop2 and prop3, to avoid overheads of Random and string interpolation.
static IEnumerable<Dto> GetEnumerableOfRandomDto(uint length)
{
    var rnd = new Random();
    var s = Guid.NewGuid().ToString();
    var dt = DateTime.Now.AddMinutes(rnd.Next(1, 10000));
for (var i = 0; i &lt; length; i++)
{
    yield return new Dto(i,
        dt,
        s
    );
}

}

  • We get the TextReader using the new System.IO.Pipelines API as follows:
    • Create a Pipe.
    • On a separate thread, push the DTOs to the pipe writer as fast as it will take them.
    • Get the pipe reader as a Stream and pass it into StreamReader.
    • This avoids having to use that whole fake pass-through Stream complication, as the Pipe does this all for us.
public static Stream GetJsonStream<T>(T obj, JsonSerializerOptions? options = null)
{
    var pipe = new Pipe();
    _ = Task.Run(() =>
    {
        try
        {
            var utf8writer = new Utf8JsonWriter(pipe.Writer);
            JsonSerializer.Serialize(utf8writer, obj);
            pipe.Writer.Complete();
        }
        catch(Exception ex)
        {
            pipe.Writer.Complete(ex);
        }
    });
    var stream = pipe.Reader.AsStream();
    return stream;
}
  • For TvpTest, you are not supposed to recreate SqlDataRecord. Contrary to every other usage of IEnumerable on the planet, it is recommended to reuse the SqlDataRecord. Note the DateTime2 scale is explicitly specified.
static IEnumerable<SqlDataRecord> GetRecordsOfRandomDto(uint length)
{
    var list = GetEnumerableOfRandomDto(length);
    var record = new SqlDataRecord(
        new("prop1", SqlDbType.Int),
        new("prop2", SqlDbType.DateTime2, 0, 7),
        new("prop3", SqlDbType.NVarChar, 1000));
foreach (var dto in list)
{
    record.SetInt32(0, dto.Prop1);
    record.SetDateTime(1, dto.Prop2);
    record.SetString(2, dto.Prop3);
    yield return record;
}

}

The BenchmarkDotNet results are below. You can see that using JSON is quite a lot faster for very small datasets, but TVPs pull away massively at the upper end. So it seems best to use TVPs only when the dataset is likely to reach 1000 rows, due to a clearly larger startup cost.

Method Length Mean Error StdDev Median Gen0 Gen1 Allocated
JsonTest 1 235.5 μs 9.78 μs 27.41 μs 229.0 μs 2.9297 - 10.07 KB
TvpTest 1 1,014.8 μs 36.34 μs 102.49 μs 991.1 μs 3.9063 - 12.26 KB
JsonTest 100 929.4 μs 21.54 μs 60.03 μs 921.2 μs 3.9063 - 16.19 KB
TvpTest 100 1,283.3 μs 58.77 μs 168.62 μs 1,230.0 μs 5.8594 - 21.54 KB
JsonTest 1000 5,925.9 μs 117.72 μs 243.12 μs 5,898.3 μs 15.6250 - 54.16 KB
TvpTest 1000 2,449.4 μs 43.35 μs 96.07 μs 2,424.2 μs 31.2500 - 106.3 KB
JsonTest 50000 398,435.2 μs 7,684.96 μs 11,021.55 μs 397,393.5 μs 1000.0000 - 6877.39 KB
TvpTest 50000 47,100.7 μs 831.21 μs 736.85 μs 47,121.9 μs 909.0909 545.4545 5045.5 KB

I also tested with a single Prop1 int column in both, with the query SELECT MAX(Prop1) FROM for each. The results are interesting: the TVP version now uses much less memory even for 50000 rows, but JSON used the same, and the speed difference on high rowcounts was also larger.

Method Length Mean Error StdDev Median Gen0 Gen1 Allocated
JsonTest 1 249.2 μs 13.82 μs 38.06 μs 246.8 μs 2.9297 - 10.07 KB
TvpTest 1 1,003.7 μs 39.79 μs 110.26 μs 984.1 μs 0.9766 - 5.33 KB
JsonTest 100 743.7 μs 19.87 μs 54.05 μs 738.7 μs 4.8828 - 16.21 KB
TvpTest 100 966.3 μs 28.30 μs 76.98 μs 937.2 μs 0.9766 - 5.33 KB
JsonTest 1000 4,167.8 μs 83.25 μs 197.85 μs 4,179.7 μs 15.6250 - 54.16 KB
TvpTest 1000 1,413.6 μs 21.73 μs 22.32 μs 1,415.0 μs - - 5.33 KB
JsonTest 50000 282,662.2 μs 5,414.21 μs 13,780.90 μs 279,611.1 μs 1000.0000 500.0000 6874.92 KB
TvpTest 50000 19,903.0 μs 393.06 μs 482.71 μs 19,872.4 μs - - 5.47 KB

Further notes:

  • Using a StreamReader does have the disadvantage of converting from UTF-8 to UTF-16. I did see a small improvement in some cases by using a varbinary and writing the UTF-8 Stream direct to the server, then CASTing server-side as varchar and then as nvarchar, but it wasn't consistent. Unfortunately, SqlClient does not accept a Stream of UTF-8 bytes for a varchar value.
  • Even more unfortunately, there is no way to use System.Text.Json to write UTF-16, and OPENJSON only accepts nvarchar, so there is almost certainly some efficiency lost in conversion.
  • I also tried using XML and .nodes and .values using various methods. All were less efficient than the other two methods in all cases.
    • XmlReader needs string values to be passed in, meaning that a lot of strings are generated for the converted int and DateTime values. This may still be an option for a usecase which only involves strings.
    • XmlSerializer will only use an XmlReader output.
    • XDocument can generate an XmlReader, but you still need to generate all those strings.
    • I tried generating the XML manually in both UTF-8 and UTF-16 using the Pipe and reading using a StreamReader, as well as passing it as a Stream directly and CASTing it on the server.

This was tested using SqlClient v6.0.1, and LocalDB 2019 RTM-CU29-GDR. I did not test using TDS v8.0, but some improvements have been made there to streaming large unknown-length parameters.

Charlieface
  • 17,078
  • 22
  • 44
4

The topic Sample: Streaming to SQL Server does show how parameters can be set from a stream.

Whilst System.Text.Json can serialize to a stream it wasn't apparent to me if/how it is possible to get it to serialize an enumerable and have the evaluation of the enumerable deferred until used as a parameter value for the equivalent stored proc.

CREATE OR ALTER PROC dbo.TestJsonPerf
@JsonData NVARCHAR(MAX)
AS

SELECT COUNT(*) FROM OPENJSON(@JsonData) WITH (Prop1 int, Prop2 datetime2, Prop3 varchar(1000))

I did find one possible solution for this in this StackOverflow answer though. The below is (now very loosely) based on this and defers evaluating the enumerable until after the ExecuteScalarAsync is called. In the client process memory is very low throughout.

⚠️ Performance of this method is still worse than using TVPs though. With the TVP code in the question on my local machine a typical run with 5 million rows took 8 to 9 seconds, the JSON streaming version took around 45 seconds. Of which around 25 seconds appeared to be spent sending the parameter value, so both the parameter passing and query execution times were much worse!

The parameter passing code likely still has some performance improvements that can be made but that won't help the query performance.

The TVP query ran at DOP 4 in batch mode on my machine and averaged 80ms per durations shown in query store whereas the JSON one got a serial plan and > 20 seconds average duration. In practice though it is unlikely that anyone will be sending 5 million JSON elements to SQL Server just to count them and the difference could well be less stark in more realistic use cases.

using System.Data;
using System.Text;
using System.Text.Json;
using Microsoft.Data.SqlClient;

const string connectionString = @"...";

await JsonTest();

return;

static async Task JsonTest() { await using var conn = new SqlConnection(connectionString); await conn.OpenAsync(); await using var cmd = new SqlCommand("dbo.TestJsonPerf", conn); cmd.CommandType = CommandType.StoredProcedure; cmd.CommandTimeout = 300;

var data = GetEnumerableOfRandomDto(5_000_000);
var stream = new JsonArrayStream&lt;Dto&gt;(data);

cmd.Parameters.Add(&quot;@JsonData&quot;, SqlDbType.NVarChar, -1).Value =
    new StreamReader(stream, Encoding.UTF8, bufferSize: 1_048_576);

Console.WriteLine($&quot;calling ExecuteScalarAsync at {DateTime.Now:O}&quot;);
var result = await cmd.ExecuteScalarAsync();

Console.WriteLine($&quot;writing result at {DateTime.Now:O}. Result was {result}&quot;);

}

static IEnumerable<Dto> GetEnumerableOfRandomDto(uint length) { var rnd = new Random();

for (var i = 0; i &lt; length; i++)
{
    yield return new Dto(i,
        DateTime.UtcNow.AddMinutes(rnd.Next(1, 10000)),
        Guid.NewGuid().ToString()
    );

    if ((i + 1) % 100_000 == 0)
        Console.WriteLine($&quot;Generated enumerable {i + 1} at {DateTime.Now:O}&quot;);
}

}

internal record Dto(int Prop1, DateTime Prop2, string Prop3);

public class JsonArrayStream<T> : Stream, IDisposable { private readonly IEnumerator<T> _input; private bool _arrayClosed;

private byte[] _bytesToAppendNextRead = [(byte) '['];
private bool _firstElement = true;
private bool _disposed;

public JsonArrayStream(IEnumerable&lt;T&gt; input)
{
    _input = input.GetEnumerator();
}

public override bool CanRead =&gt; true;
public override bool CanSeek =&gt; false;
public override bool CanWrite =&gt; false;
public override long Length =&gt; 0;
public override long Position { get; set; }

void IDisposable.Dispose()
{
    if (_disposed)
        return;
    _disposed = true;
    _input.Dispose();
}

public override int Read(byte[] buffer, int offset, int count)
{
    var bytesWrittenThisRead = 0;

    if (!TryAppendToBuffer(_bytesToAppendNextRead, buffer, ref offset, count, ref bytesWrittenThisRead))
        return bytesWrittenThisRead;


    while (_input.MoveNext())
    {
        if (!_firstElement)
        {
            //regardless of result of this call will proceed to the next step to ensure bytes for _input.Current are saved before MoveNext called
            TryAppendToBuffer([(byte) ','], buffer, ref offset, count, ref bytesWrittenThisRead);
        }

        _firstElement = false;

        if (!TryAppendToBuffer(JsonSerializer.SerializeToUtf8Bytes(_input.Current), buffer, ref offset, count, ref bytesWrittenThisRead))
            return bytesWrittenThisRead;
    }

    if (!_arrayClosed)
    {
        TryAppendToBuffer([(byte)']'], buffer, ref offset, count, ref bytesWrittenThisRead);
        _arrayClosed = true;
    }

    return bytesWrittenThisRead;
}

//Appends to stream buffer if possible. If no capacity saves any excess in _bytesToAppendNextRead
private bool TryAppendToBuffer(byte[] bytesToAppend, byte[] buffer, ref int offset, int count,
    ref int bytesWrittenThisRead)
{
    var bytesToAppendLength = bytesToAppend.Length;

    if (bytesToAppendLength &gt; 0)
    {
        var lengthToWrite = Math.Min(bytesToAppendLength, count - bytesWrittenThisRead);
        Buffer.BlockCopy(bytesToAppend, 0, buffer, offset, lengthToWrite);
        offset += lengthToWrite;
        bytesWrittenThisRead += lengthToWrite;

        if (bytesToAppendLength &gt; lengthToWrite)
        {
            _bytesToAppendNextRead = _bytesToAppendNextRead.Length == 0 ? 
                bytesToAppend[lengthToWrite..] : 
                _bytesToAppendNextRead.Concat(bytesToAppend[lengthToWrite..]).ToArray();

            return false;
        }

        _bytesToAppendNextRead = [];
    }

    return true;
}

public override long Seek(long offset, SeekOrigin origin)
{
    throw new NotSupportedException();
}

public override void SetLength(long value)
{
    throw new NotSupportedException();
}

public override void Write(byte[] buffer, int offset, int count)
{
    throw new NotSupportedException();
}

public override void Flush()
{
    throw new NotSupportedException();
}

}

Martin Smith
  • 87,941
  • 15
  • 255
  • 354