425 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			HLSL
		
	
	
	
	
	
			
		
		
	
	
			425 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			HLSL
		
	
	
	
	
	
| /*
 | |
| MIT License
 | |
| 
 | |
| Copyright (c) 2022 Kleber Garcia
 | |
| 
 | |
| Permission is hereby granted, free of charge, to any person obtaining a copy
 | |
| of this software and associated documentation files (the "Software"), to deal
 | |
| in the Software without restriction, including without limitation the rights
 | |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | |
| copies of the Software, and to permit persons to whom the Software is
 | |
| furnished to do so, subject to the following conditions:
 | |
| 
 | |
| The above copyright notice and this permission notice shall be included in all
 | |
| copies or substantial portions of the Software.
 | |
| 
 | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 | |
| SOFTWARE.
 | |
| */
 | |
| 
 | |
| #ifndef __COVERAGE__
 | |
| #define __COVERAGE__
 | |
| 
 | |
| //Utilities for coverage bit mask on an 8x8 grid.
 | |
| namespace Coverage
 | |
| {
 | |
| 
 | |
| //**************************************************************************************************************/
 | |
| //                                           How to use
 | |
| //**************************************************************************************************************/
 | |
| /*
 | |
| To utilize this library, first call the genLUT function at the beginning of your compute shader.
 | |
| This function must be followed by a group sync. Example follows:
 | |
| 
 | |
| ...
 | |
| coverage::genLUT(groupThreadIndex);
 | |
| GroupMemoryBarrierWithGroupSync();
 | |
| ...
 | |
| 
 | |
| Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries.
 | |
| 
 | |
| After this use the coverage functions
 | |
| 
 | |
| */
 | |
| 
 | |
| //**************************************************************************************************************/
 | |
| //                                        Coordinate System
 | |
| //**************************************************************************************************************/
 | |
| /*
 | |
| The functions in this library follow the same convension, input is a shape described by certain vertices,
 | |
| output is a 64 bit mask with such shape's coverage.
 | |
| 
 | |
| The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right.
 | |
| The LSB represents coordinate (0,0), and sample points are centered on the pixel.
 | |
| 
 | |
| (0.0,0.0)                           (1.0,0.0)
 | |
|     |                                   |
 | |
|     |___________________________________|
 | |
|     |   |   |   |   |   |   |   |   |   |
 | |
|     | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
 | |
|     |___|___|___|___|___|___|___|___|___|
 | |
|     |   |   |   |   |   |   |   |   |   |
 | |
|     | 9 | 10| 11| 12| 13| 14| 15| 16| 17|
 | |
|     |___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0)
 | |
| 
 | |
|  the center of bit 0 would be 0.5,0.5 and so on
 | |
| 
 | |
| any points outside of the range (0,1) means they are outside the grid.
 | |
| */
 | |
| 
 | |
| //**************************************************************************************************************/
 | |
| //                                           Masks
 | |
| //**************************************************************************************************************/
 | |
| /*
 | |
| Masks are stored in a packed 64 bit represented by uint2.
 | |
| x component represents the first 32 bits, y component the next 32 bits.
 | |
| */
 | |
| 
 | |
| //**************************************************************************************************************/
 | |
| //                                           coverage API
 | |
| //**************************************************************************************************************/
 | |
| 
 | |
| /*
 | |
| lut for 4x4 quad mask. See buildQuadMask function
 | |
| 4 states for horizontal flipping and vertical flipping
 | |
| You can dump this lut to a buffer, and preload it manually,
 | |
| or just regenerated in your thread group
 | |
| */
 | |
| groupshared uint gs_quadMask[16 * 4];
 | |
| 
 | |
| /*
 | |
| Call this function to generate the coverage 4x4 luts
 | |
| groupThreadIndex - the thread index.
 | |
| NOTE: must sync group threads after calling this.
 | |
| */
 | |
| void GenLUT(uint groupThreadIndex);
 | |
| 
 | |
| /*
 | |
| Call this function to get a 64 bit coverage mask for a triangle.
 | |
| v0, v1, v2 - the triangle coordinates in right hand ruling order
 | |
| return - the coverage mask for this triangle
 | |
| */
 | |
| uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface);
 | |
| 
 | |
| 
 | |
| /*
 | |
| Call this function to get a 64 bit coverage mask for a line.
 | |
| v0, v1 - the line coordinates.
 | |
| thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile
 | |
| caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile
 | |
| return - the coverage mask of this line
 | |
| */
 | |
| uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps);
 | |
| 
 | |
| 
 | |
| //**************************************************************************************************************/
 | |
| //                                       coverage implementation
 | |
| //**************************************************************************************************************/
 | |
| 
 | |
| /*
 | |
| function that builds a 4x4 compact bit quad for line coverage.
 | |
| the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most.
 | |
| "incrementMask" is a bit mask specifying how much the y component of a line increments.
 | |
| "incrementMask" only describes 4 bits, the rest of the bits are ignored.
 | |
| For example, given this bit mask:
 | |
| 1 0 1 0
 | |
| would generate this 4x4 coverage mask:
 | |
| 
 | |
| 0 0 0 0
 | |
| 0 0 0 1 <- 3rd bit tells the line to raise here
 | |
| 0 1 1 1 <- first bit raises the line
 | |
| 1 1 1 1 <- low axis is always covered
 | |
| */
 | |
| uint BuildQuadMask(uint incrementMask)
 | |
| {
 | |
|     uint c = 0;
 | |
| 
 | |
|     uint mask = 0xF;
 | |
|     for (int r = 0; r < 4; ++r)
 | |
|     {
 | |
|         c |= mask << (r * 8);
 | |
|         if (incrementMask == 0)
 | |
|             break;
 | |
|         int b = firstbitlow(incrementMask);
 | |
|         mask = (0xFu << (b + 1)) & 0xFu;
 | |
|         incrementMask ^= 1u << b;
 | |
|     }
 | |
| 
 | |
|     return c;
 | |
| }
 | |
| 
 | |
| //flip 4 bit nibble
 | |
| uint FlipNibble(uint mask, int offset)
 | |
| {
 | |
|     mask = (mask >> offset) & 0xF;
 | |
|     uint r = ((mask << 3) & 0x8)
 | |
|            | ((mask << 1) & 0x4)
 | |
|            | ((mask >> 1) & 0x2)
 | |
|            | ((mask >> 3) & 0x1);
 | |
|     return (r << offset);
 | |
| }
 | |
| 
 | |
| //flip an entire 4x4 bit quad
 | |
| uint FlipQuadInX(uint mask)
 | |
| {
 | |
|     return FlipNibble(mask, 0) | FlipNibble(mask, 8) | FlipNibble(mask, 16) | FlipNibble(mask, 24);
 | |
| }
 | |
| 
 | |
| uint TransposeQuad(uint mask)
 | |
| {
 | |
|     uint result = 0;
 | |
|     [unroll]
 | |
|     for (int i = 0; i < 4; ++i)
 | |
|     {
 | |
|         for (int j = 0; j < 4; ++j)
 | |
|         {
 | |
|             if (mask & (1u << (i * 8 + j)))
 | |
|                 result |= 1u << (j * 8 + i);
 | |
|         }
 | |
|     }
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| // Builds all the luts necessary for fast bit based coverage
 | |
| void GenLUT(uint groupThreadIndex)
 | |
| {
 | |
|     // Neutral
 | |
|     if (groupThreadIndex < 16)
 | |
|         gs_quadMask[groupThreadIndex] = BuildQuadMask(groupThreadIndex);
 | |
| 
 | |
|     GroupMemoryBarrierWithGroupSync();
 | |
| 
 | |
|     // Flip in X axis, transpose
 | |
|     if (groupThreadIndex < 16)
 | |
|     {
 | |
|         gs_quadMask[groupThreadIndex + 16] = FlipQuadInX(gs_quadMask[groupThreadIndex]);
 | |
|         gs_quadMask[groupThreadIndex + 32] = TransposeQuad(gs_quadMask[groupThreadIndex]);
 | |
|     }
 | |
|     GroupMemoryBarrierWithGroupSync();
 | |
|     if (groupThreadIndex < 16)
 | |
|     {
 | |
|         gs_quadMask[groupThreadIndex + 48] = (~TransposeQuad(FlipQuadInX(gs_quadMask[groupThreadIndex]))) & 0x0F0F0F0F;
 | |
|     }
 | |
| }
 | |
| 
 | |
| // Represents a 2D analytical line.
 | |
| // stores slope (a) and offset (b)
 | |
| struct AnalyticalLine
 | |
| {
 | |
|     float a;
 | |
|     float b;
 | |
| 
 | |
|     // Builds an analytical line based on two points.
 | |
|     void Build(float2 v0, float2 v1)
 | |
|     {
 | |
|         //line equation: f(x): a * x + b;
 | |
|         // where a = (v1.y - v0.y)/(v1.x - v0.x)
 | |
|         float2 l = v1 - v0;
 | |
|         a = l.y/l.x;
 | |
|         b = v1.y - a * v1.x;
 | |
|     }
 | |
| 
 | |
|     // Builds a "Flipped" line.
 | |
|     // A flipped line is defined as having a positive slope < 1.0
 | |
|     // The two output booleans specify the flip operators to recover the original line.
 | |
|     void BuildFlipped(float2 v0, float2 v1, out bool outFlipX, out bool outFlipAxis, out bool outIsRightHand, out bool outValid)
 | |
|     {
 | |
|         //build line with flip bits for lookup compression
 | |
|         //This line will have a slope between 0 and 0.5, and always positive.
 | |
|         //We output the flips as bools
 | |
| 
 | |
|         float2 ll = v1 - v0;
 | |
|         outFlipAxis = abs(ll.y) > abs(ll.x);
 | |
|         outFlipX = sign(ll.y) != sign(ll.x);
 | |
|         outIsRightHand = ll.x >= 0 ? v0.y >= v1.y : v0.y > v1.y;
 | |
|         if (outFlipAxis)
 | |
|         {
 | |
|             ll.xy = ll.yx;
 | |
|             v0.xy = v0.yx;
 | |
|             v1.xy = v1.yx;
 | |
|         }
 | |
| 
 | |
|         a = ll.y/ll.x;
 | |
|         if (outFlipX)
 | |
|         {
 | |
|             v0.x = 1.0 - v0.x;
 | |
|             v1.x = 1.0 - v1.x;
 | |
|             a *= -1;
 | |
|         }
 | |
|         b = v1.y - a * v1.x;
 | |
|         outValid = any(v1 != v0);//ll.y != 0.0f;
 | |
|     }
 | |
| 
 | |
|     // Evaluates f(x) = a * x + b for the line
 | |
|     float Eval(float xval)
 | |
|     {
 | |
|         return xval * a + b;
 | |
|     }
 | |
| 
 | |
|     // Evaluates 4 inputs of f(x) = a * x + b for the line
 | |
|     float4 Eval4(float4 xvals)
 | |
|     {
 | |
|         return xvals * a + b;
 | |
|     }
 | |
| 
 | |
|     // Evaluates a single 2d in the line given an X.
 | |
|     float2 PointAt(float xv)
 | |
|     {
 | |
|         return float2(xv, Eval(xv));
 | |
|     }
 | |
| };
 | |
| 
 | |
| /*
 | |
| Represents a set of bits in an 8x8 grid divided by a line.
 | |
| The representation is given by 2 splits of the 8x8 grid.
 | |
| offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis)
 | |
| the mask represents the increment mask used to look up the quadCoverage
 | |
| */
 | |
| struct LineArea
 | |
| {
 | |
|     int offsets[2];
 | |
|     uint masks[2];
 | |
|     bool isValid;
 | |
|     bool flipX;
 | |
|     bool flipAxis;
 | |
|     bool isRightHand;
 | |
|     AnalyticalLine debugLine;
 | |
| 
 | |
|     // Recovers a single point in the boundary
 | |
|     // of the line (where the line intersects a pixel).
 | |
|     // Theres a total of 8 possible points
 | |
|     float2 GetBoundaryPoint(uint i)
 | |
|     {
 | |
|         int j = i & 0x3;
 | |
|         int m = i >> 2;
 | |
|         int yval = offsets[m] + (int)countbits(((1u << j) - 1) & masks[m]);
 | |
|         float2 v = float2(i + 0.5, yval + 0.5) * 1.0/8.0;
 | |
|         if (flipX)
 | |
|             v.x = 1.0 - v.x;
 | |
|         if (flipAxis)
 | |
|         {
 | |
|             float2 tmp = v;
 | |
|             v.xy = tmp.yx;
 | |
|         }
 | |
|         return v;
 | |
|     }
 | |
| 
 | |
|     // Creates a line area object, based on 2 points on an 8x8 quad
 | |
|     // quad coordinate domain is 0.0 -> 1.0 for both axis.
 | |
|     // Anything negative or greater than 1.0 is by definition outside of the 8x8 quad.
 | |
|     static LineArea Create(float2 v0, float2 v1)
 | |
|     {
 | |
|         LineArea data;
 | |
| 
 | |
|         //line debug data
 | |
|         data.debugLine.Build(v0, v1);
 | |
| 
 | |
|         AnalyticalLine l;
 | |
|         l.BuildFlipped(v0, v1, data.flipX, data.flipAxis, data.isRightHand, data.isValid);
 | |
| 
 | |
|         // Xs values of 8 points
 | |
|         const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0;
 | |
|         const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0;
 | |
| 
 | |
|         // Ys values of 8 points
 | |
|         float4 ys0 = l.Eval4(xs0);
 | |
|         float4 ys1 = l.Eval4(xs1);
 | |
| 
 | |
|         int4 ysi0 = (int4)floor(ys0 * 8.0 - 0.5);
 | |
|         int4 ysi1 = (int4)floor(ys1 * 8.0 - 0.5);
 | |
| 
 | |
|         // Incremental masks
 | |
|         uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw;
 | |
|         uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0);
 | |
| 
 | |
|         // Final output, offset and mask
 | |
|         data.offsets[0] = ysi0.x;
 | |
|         data.masks[0] = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3);
 | |
|         data.offsets[1] = countbits(data.masks[0]) + data.offsets[0];
 | |
|         data.masks[1] = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3);
 | |
|         return data;
 | |
|     }
 | |
| } ;
 | |
| 
 | |
| uint2 CreateCoverageMask(in LineArea lineArea)
 | |
| {
 | |
|     const uint leftSideMask = 0x0F0F0F0F;
 | |
|     const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask);
 | |
| 
 | |
|     //prepare samples, flip samples if there is mirroring in x
 | |
|     int2 ii = lineArea.flipX ? int2(1,0) : int2(0,1);
 | |
|     int lutOperation = ((uint)lineArea.flipX << 4) | ((uint)lineArea.flipAxis << 5);
 | |
|     int2 offsets = int2(lineArea.offsets[ii.x],lineArea.offsets[ii.y]);
 | |
|     uint2 halfSamples = uint2(gs_quadMask[lineArea.masks[ii.x] + lutOperation], gs_quadMask[lineArea.masks[ii.y] + lutOperation]);
 | |
| 
 | |
|     uint2 result = 0;
 | |
|     if (lineArea.flipAxis)
 | |
|     {
 | |
|         //Case were we have flipped axis / transpose. We generate top and bottom part
 | |
|         int2 tOffsets = clamp(offsets, -31, 31);
 | |
|         uint2 workMask = leftSideMask << clamp(offsets, 0, 4);
 | |
|         uint2 topDownMasks = uint2( tOffsets.x > 0 ?
 | |
|                                     ((halfSamples.x << min(4,tOffsets.x)) & leftSideMask) | ((halfSamples.x << min(8,tOffsets.x)) & ~leftSideMask)
 | |
|                                     : ((halfSamples.x << 4) >> min(4,-tOffsets.x) & ~leftSideMask) >> 4,
 | |
|                                     tOffsets.y > 0 ?
 | |
|                                     ((halfSamples.y << min(4, tOffsets.y)) & leftSideMask) | ((halfSamples.y << min(8, tOffsets.y)) & ~leftSideMask)
 | |
|                                     : ((halfSamples.y << 4) >> min(4, -tOffsets.y) & ~leftSideMask) >> 4);
 | |
|             ;
 | |
|         int2 backMaskShift = lineArea.flipX ? clamp(tOffsets + 4, -31, 31) : tOffsets;
 | |
|         uint2 backMaskOp = int2((backMaskShift.x > 0 ? 1u << backMaskShift.x : 1u >> -backMaskShift.x) - 1u, (backMaskShift.y > 0 ? 1u << backMaskShift.y : 1u >> -backMaskShift.y) - 1u);
 | |
|         uint2 backBite = uint2( backMaskShift.x <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.x) : (0xFFFF & backMaskOp.x)),
 | |
|                                 backMaskShift.y <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.y) : (0xFFFF & backMaskOp.y)));
 | |
|         result = backBite | (backBite << 8) | (backBite << 16) | (backBite << 24) | (topDownMasks & workMask);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         //Case were the masks are positioned horizontally. We generate 4 quads
 | |
|         uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y << 4));
 | |
|         int4 tOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31);
 | |
|         uint4 halfMasks = uint4( tOffsets.x > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.x : ~(sideMasks.x >> -tOffsets.x),
 | |
|                                  tOffsets.y > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.y : ~(sideMasks.y >> -tOffsets.y),
 | |
|                                  tOffsets.z > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.z : ~(sideMasks.x >> -tOffsets.z),
 | |
|                                  tOffsets.w > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.w : ~(sideMasks.y >> -tOffsets.w)) & horizontalMask.xyxy;
 | |
|         result = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w);
 | |
|     }
 | |
| 
 | |
|     result = lineArea.flipX ? ~result : result;
 | |
|     result = lineArea.isRightHand ? result : ~result;
 | |
|     result = lineArea.isValid ? result : 0;
 | |
|     return result;
 | |
| 
 | |
| }
 | |
| 
 | |
| uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface)
 | |
| {
 | |
|     uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0, v1));
 | |
|     uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1, v2));
 | |
|     uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v2, v0));
 | |
|     uint2 frontMask = (mask0 & mask1 & mask2);
 | |
|     bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0);
 | |
|     return (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2));
 | |
| }
 | |
| 
 | |
| uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps)
 | |
| {
 | |
|     float2 lineVector = normalize(v1 - v0);
 | |
|     float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness;
 | |
|     v0 -= caps * lineVector;
 | |
|     v1 += caps * lineVector;
 | |
| 
 | |
|     uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 - D, v1 - D));
 | |
|     uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 + D, v0 + D));
 | |
|     uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 + D, v0 - D));
 | |
|     uint2 mask3 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 - D, v1 + D));
 | |
|     return mask0 & mask1 & mask3 & mask2;
 | |
| }
 | |
| 
 | |
| }
 | |
| 
 | |
| #endif
 |