using System;
using Unity.PerformanceTesting;
using Unity.PerformanceTesting.Benchmark;
using Unity.Burst;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs;
namespace Unity.Collections.PerformanceTests
{
    /// 
    /// Specifies a class containing performance test methods which should be included in allocator benchmarking.
    /// The values specified in this enum are unlikely to be needed in user code, but user code will specify the enum type
    /// in a couple places:
    /// [Benchmark(typeof(BenchmarkAllocatorType))]  // <---- HERE
    /// class FooAllocatorPerformanceTestMethods
    /// and
    /// [Test, Performance]
    /// public unsafe void AllocatorPerfTestExample(
    ///     [Values(1, 2, 4, 8)] int workerThreads,
    ///     [Values(1024, 1024 * 1024)] int allocSize,
    ///     [Values] BenchmarkAllocatorType type)  // <---- HERE
    /// {
    /// Though values may be specified in the performance test method parameter, it is recommended to leave the argument implicitly
    /// covering all enum values as seen in the example above.
    /// 
    [BenchmarkComparison(BenchmarkAllocatorConfig.Persistent, "Persistent (E)")]
    [BenchmarkComparisonExternal(BenchmarkAllocatorConfig.TempJob, "TempJob (E)")]
    [BenchmarkComparisonExternal(BenchmarkAllocatorConfig.Temp, "Temp (E)")]
    [BenchmarkComparisonDisplay(SampleUnit.Microsecond, 1, BenchmarkAllocatorConfig.kRankingStat)]
    public enum BenchmarkAllocatorType : int
    {
        /// Allocator performance test will execute on a managed (not burst compiled) code path
        [BenchmarkName("{0} (S)")] Managed,
        /// Allocator performance test will execute on a burst compile code path, with safety checks enabled
        [BenchmarkName("{0} (S+B)")] BurstSafety,
        /// Allocator performance test will execute on a burst compile code path, with safety checks disabled
        [BenchmarkName("{0} (B)")] BurstNoSafety,
    }
    internal static class BenchmarkAllocatorConfig
    {
        internal const int Temp = -1;
        internal const int TempJob = -2;
        internal const int Persistent = -3;
        internal const BenchmarkRankingStatistic kRankingStat = BenchmarkRankingStatistic.Min;
        internal const int kCountWarmup = 5;
        internal const int kCountMeasure = 50;
#if UNITY_STANDALONE || UNITY_EDITOR
        internal const int kCountAllocations = 150;
#else
        // Still allows allocator tests on non-desktop platforms, but with a much lower memory requirement
        internal const int kCountAllocations = 25;
#endif
#if UNITY_EDITOR
        [UnityEditor.MenuItem("DOTS/Unity.Collections/Generate Allocator Benchmarks")]
#endif
        static void RunBenchmarks()
        {
            BenchmarkGenerator.GenerateMarkdown(
                "Allocators",
                typeof(BenchmarkAllocatorType),
                "../../Packages/com.unity.collections/Documentation~/performance-comparison-allocators.md",
                $"The following benchmarks make **{kCountAllocations} consecutive allocations** per sample set."
                    + $"
Multithreaded benchmarks make the full **{kCountAllocations} consecutive allocations *per worker thread*** per sample set."
                    + $"
The **{kRankingStat} of {kCountMeasure} sample sets** is compared against the baseline on the far right side of the table."
                    + $"
{kCountWarmup} extra sample sets are run as warmup."
                    ,
                "Legend",
                new string[]
                {
                    "`(S)` = Safety Enabled",
                    "`(B)` = Burst Compiled *with Safety Disabled*",
                    "`(S+B)` = Burst Compiled *with Safety Enabled*",
                    "`(E)` = Engine Provided",
                    "",
                    "*`italic`* results are for benchmarking comparison only; these are not included in standard Performance Framework tests",
                });
        }
    }
    /// 
    /// Interface to implement allocator performance tests which will run using .
    /// Deriving tests from this interface enables both Performance Test Framework and Benchmark Framework to generate and run
    /// tests for the contexts described by .
    /// 
    public interface IBenchmarkAllocator
    {
        /// 
        /// Override this to add extra int arguments to a performance test implementation as fields in the implementing type. These arguments
        /// are optionally passed in through .
        /// 
        /// A variable number of extra arguments to passed through to the test implementation
        public void SetParams(params int[] args) { }
        /// 
        /// Used to create the allocator used in performance testing.
        /// 
        /// When this is , create the custom allocator type.
        /// Otherwise use the provided  enum for allocations in performance testing.
        public void CreateAllocator(Allocator builtinOverride);
        /// 
        /// Used to free memory and destroy the custom allocator if it wasn't allocated with an  type.
        /// 
        public void DestroyAllocator();
        /// 
        /// Actions performed prior to each measurement of a sample set. Typically used to set up initial state to ensure each sample measured is executed in the same way.
        /// 
        /// Number of job workers for this allocation test. Work is duplicated across job workers rather than split across job workers.
        /// The base size of each allocation in a single measurement.
        /// The number of allocations in a single measurement.
        public void Setup(int workers, int size, int allocations);
        /// 
        /// Actions performed following each measurement of a sample set. Typically used to dispose or invalidate the state set up during .
        /// 
        public void Teardown();
        /// 
        /// The code which will be executed during performance measurement. This should usually be general enough to work with any allocator, so if making
        /// allocations or freeing, the recommendation is to interface through .
        /// 
        /// 
        public void Measure(int workerI);
    }
    /// 
    /// Provides the API for running allocator based Performance Framework tests and Benchmark Framework measurements.
    /// This will typically be the sole call from a performance test. See 
    /// for more information.
    /// 
    /// An implementation conforming to the  interface for running allocator performance tests and benchmarks.
    [BurstCompile(CompileSynchronously = true)]
    public static class BenchmarkAllocatorRunner where T : unmanaged, IBenchmarkAllocator
    {
        internal unsafe struct JobST : IJob
        {
            [NativeDisableUnsafePtrRestriction] public T* methods;
            public void Execute() => methods->Measure(0);
        }
        [BurstCompile(CompileSynchronously = true, DisableSafetyChecks = true)]
        internal unsafe struct JobBurstST : IJob
        {
            [NativeDisableUnsafePtrRestriction] public T* methods;
            public void Execute() => methods->Measure(0);
        }
        [BurstCompile(CompileSynchronously = true, DisableSafetyChecks = false)]
        internal unsafe struct JobSafetyBurstST : IJob
        {
            [NativeDisableUnsafePtrRestriction] public T* methods;
            public void Execute() => methods->Measure(0);
        }
        internal unsafe struct JobMT : IJobParallelFor
        {
            [NativeDisableUnsafePtrRestriction] public T* methods;
            public void Execute(int index) => methods->Measure(index);
        }
        [BurstCompile(CompileSynchronously = true, DisableSafetyChecks = true)]
        internal unsafe struct JobBurstMT : IJobParallelFor
        {
            [NativeDisableUnsafePtrRestriction] public T* methods;
            public void Execute(int index) => methods->Measure(index);
        }
        [BurstCompile(CompileSynchronously = true, DisableSafetyChecks = false)]
        internal unsafe struct JobSafetyBurstMT : IJobParallelFor
        {
            [NativeDisableUnsafePtrRestriction] public T* methods;
            public void Execute(int index) => methods->Measure(index);
        }
        static unsafe void RunST(BenchmarkAllocatorType type, int baseSize, int allocations, params int[] args)
        {
            var methods = new T();
            methods.SetParams(args);
            switch (type)
            {
                case (BenchmarkAllocatorType)(BenchmarkAllocatorConfig.Temp):
                    methods.CreateAllocator(Allocator.Temp);
                    BenchmarkMeasure.Measure(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobST { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule().Complete(),
                        () => methods.Setup(1, baseSize, allocations), () => methods.Teardown());
                    break;
                case (BenchmarkAllocatorType)(BenchmarkAllocatorConfig.TempJob):
                    methods.CreateAllocator(Allocator.TempJob);
                    BenchmarkMeasure.Measure(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobST { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule().Complete(),
                        () => methods.Setup(1, baseSize, allocations), () => methods.Teardown());
                    break;
                case (BenchmarkAllocatorType)(BenchmarkAllocatorConfig.Persistent):
                    methods.CreateAllocator(Allocator.Persistent);
                    BenchmarkMeasure.Measure(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobST { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule().Complete(),
                        () => methods.Setup(1, baseSize, allocations), () => methods.Teardown());
                    break;
                case BenchmarkAllocatorType.Managed:
                    methods.CreateAllocator(Allocator.None);
                    BenchmarkMeasure.Measure(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobST { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule().Complete(),
                        () => methods.Setup(1, baseSize, allocations), () => methods.Teardown());
                    break;
                case BenchmarkAllocatorType.BurstSafety:
                    methods.CreateAllocator(Allocator.None);
                    BenchmarkMeasure.Measure(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobSafetyBurstST { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Run(),
                        () => methods.Setup(1, baseSize, allocations), () => methods.Teardown());
                    break;
                case BenchmarkAllocatorType.BurstNoSafety:
                    methods.CreateAllocator(Allocator.None);
                    BenchmarkMeasure.Measure(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobBurstST { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Run(),
                        () => methods.Setup(1, baseSize, allocations), () => methods.Teardown());
                    break;
            }
            methods.DestroyAllocator();
        }
        static unsafe void RunMT(BenchmarkAllocatorType type, int baseSize, int allocations, int workers, params int[] args)
        {
            var methods = new T();
            methods.SetParams(args);
            switch (type)
            {
                case (BenchmarkAllocatorType)(BenchmarkAllocatorConfig.Temp):
                    methods.CreateAllocator(Allocator.Temp);
                    BenchmarkMeasure.MeasureParallel(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobMT { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule(workers, 1).Complete(),
                        () => methods.Setup(workers, baseSize, allocations), () => methods.Teardown());
                    break;
                case (BenchmarkAllocatorType)(BenchmarkAllocatorConfig.TempJob):
                    methods.CreateAllocator(Allocator.TempJob);
                    BenchmarkMeasure.MeasureParallel(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobMT { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule(workers, 1).Complete(),
                        () => methods.Setup(workers, baseSize, allocations), () => methods.Teardown());
                    break;
                case (BenchmarkAllocatorType)(BenchmarkAllocatorConfig.Persistent):
                    methods.CreateAllocator(Allocator.Persistent);
                    BenchmarkMeasure.MeasureParallel(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobMT { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule(workers, 1).Complete(),
                        () => methods.Setup(workers, baseSize, allocations), () => methods.Teardown());
                    break;
                case BenchmarkAllocatorType.Managed:
                    methods.CreateAllocator(Allocator.None);
                    BenchmarkMeasure.MeasureParallel(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobMT { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule(workers, 1).Complete(),
                        () => methods.Setup(workers, baseSize, allocations), () => methods.Teardown());
                    break;
                case BenchmarkAllocatorType.BurstSafety:
                    methods.CreateAllocator(Allocator.None);
                    BenchmarkMeasure.MeasureParallel(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobSafetyBurstMT { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule(workers, 1).Complete(),
                        () => methods.Setup(workers, baseSize, allocations), () => methods.Teardown());
                    break;
                case BenchmarkAllocatorType.BurstNoSafety:
                    methods.CreateAllocator(Allocator.None);
                    BenchmarkMeasure.MeasureParallel(typeof(T),
                        BenchmarkAllocatorConfig.kCountWarmup, BenchmarkAllocatorConfig.kCountMeasure,
                        () => new JobBurstMT { methods = (T*)UnsafeUtility.AddressOf(ref methods) }.Schedule(workers, 1).Complete(),
                        () => methods.Setup(workers, baseSize, allocations), () => methods.Teardown());
                    break;
            }
            methods.DestroyAllocator();
        }
        /// 
        /// Called from a typical performance test method to provide both Performance Framework measurements as well as
        /// Benchmark Framework measurements. A typical usage is similar to:
        /// [Test, Performance]
        /// [Category("Performance")]
        /// [BenchmarkTestFootnote]
        /// public unsafe void FixedSize(
        ///     [Values(1, 2, 4, 8)] int workerThreads,
        ///     [Values(1024, 1024 * 1024)] int allocSize,
        ///     [Values] BenchmarkAllocatorType type)
        /// {
        ///     BenchmarkAllocatorRunner<Rewindable_FixedSize>.Run(type, allocSize, workerThreads);
        /// }
        /// 
        /// The benchmark or performance measurement type to run for allocators i.e.  etc.
        /// The size to base allocations off of, whether fixed for all allocations, increasing in size, or anything else.
        /// The number of job workers to run performance tests on. These are duplicated across workers rather than split across workers.
        /// Optional arguments that can be stored in a test implementation class.
        public static unsafe void Run(BenchmarkAllocatorType type, int baseSize, int workers, params int[] args)
        {
            if (workers == 1)
                RunST(type, baseSize, BenchmarkAllocatorConfig.kCountAllocations, args);
            else
                RunMT(type, baseSize, BenchmarkAllocatorConfig.kCountAllocations, workers, args);
        }
    }
    /// 
    /// A useful set of functionality commonly found in allocator performance and benchmark tests for most allocator types. Typically
    /// wrapped in a separate utility class for a set of tests to a specific allocator type.
    /// 
    public struct BenchmarkAllocatorUtil
    {
        /// 
        /// [worker][sequential allocation]
        /// Used to store the pointer from allocations so it may be freed later.
        /// 
        public NativeArray> AllocPtr { get; private set; }
        /// 
        /// [sequential allocation]
        /// Used to store the size of allocations so it may be freed later, as some allocators require the size to be explicitly given when freed.
        /// Separate arrays for each worker are not provided because workers duplicate the same work rather than splitting it in some manner.
        /// 
        public NativeArray AllocSize { get; private set; }
        /// 
        /// To be called prior to each measurement. Sets up the allocation and size storage used for freeing allocations, whether this happens
        /// during teardown following each measurement, or freeing is the functionality being measured itself.
        /// 
        /// The number of job workers to run performance tests on. These are duplicated across workers rather than split across workers.
        /// The size to base allocations off of, whether fixed for all allocations, increasing in size, or anything else.
        /// 
        /// - If < 0, a performance measurement's allocations start at the largest size and decrease linearly to the `baseSize`.
        /// - If > 0, a performance measurement's allocations start at the `baseSize` and increase linearly
        /// - If 0, the allocation size is equivalent to the `baseSize` for all of a performance measurement's allocations
        /// 
        /// The number of allocations in a single measurement.
        public void Setup(int workers, int baseSize, int growthRate, int allocations)
        {
            var allocStorage = new NativeArray>(workers, Allocator.Persistent);
            for (int i = 0; i < workers; i++)
                allocStorage[i] = new NativeArray(allocations, Allocator.Persistent);
            AllocPtr = allocStorage;
            var sizeStorage = new NativeArray(allocations, Allocator.Persistent);
            for (int i = 0; i < allocations; i++)
            {
                if (growthRate >= 0)
                    sizeStorage[i] = baseSize + growthRate * i;
                else
                    sizeStorage[i] = baseSize + (-growthRate * (allocations - 1)) + growthRate * i;
            }
            AllocSize = sizeStorage;
        }
        /// 
        /// To be called following each measurement. Frees the memory allocated in the  method.
        /// This also frees the memory allocated by the given allocator using the stored information in this class.
        /// 
        /// A handle to the allocator being measured.
        unsafe public void Teardown(AllocatorManager.AllocatorHandle allocator)
        {
            if (AllocPtr.IsCreated)
            {
                for (int i = 0; i < AllocPtr.Length; i++)
                {
                    var inner = AllocPtr[i];
                    for (int j = 0; j < inner.Length; j++)
                    {
                        AllocatorManager.Free(allocator, (void*)inner[j], AllocSize[j], 0);
                        inner[j] = IntPtr.Zero;
                    }
                }
            }
            Teardown();
        }
        /// 
        /// To be called following each measurement. Frees the memory allocated in the  method.
        /// This does not free the memory allocated by a given allocator type used in measurement tests.
        /// 
        public void Teardown()
        {
            if (AllocPtr.IsCreated)
            {
                for (int i = 0; i < AllocPtr.Length; i++)
                {
                    if (AllocPtr[i].IsCreated)
                        AllocPtr[i].Dispose();
                }
                AllocPtr.Dispose();
            }
            if (AllocSize.IsCreated)
                AllocSize.Dispose();
        }
    }
}