// gmcs -unsafe -r:System,System.Drawing,Mono.Simd dct.cs
using Mono.Simd;
using System;
using System.Diagnostics;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Runtime.InteropServices;
class DCTSample {
private const PixelFormat Format8bppGrayscale = PixelFormat.Format8bppIndexed;
private static Vector4f[] coefficientsVector = null;
private static Vector4f[] transposedCoefficientsVector = null;
private static float[] coefficientsArray = null;
private static void InitializeCoefficients(bool simd)
{
var coefficients = new float[8 * 8];
for (var y = 0; y < 8; y++) {
for (var x = 0; x < 8; x++) {
coefficients[y * 8 + x] = (x == 0) ? (float)(0.5 / Math.Sqrt(2.0))
: (float)(0.5 * Math.Cos((x * (2 * y + 1) * Math.PI) / 16.0));
}
}
if (simd) {
coefficientsVector = new Vector4f[8 * 8 / 4];
transposedCoefficientsVector = new Vector4f[8 * 8 / 4];
for (var i = 0; i < 64; i += 4) {
coefficientsVector[i / 4] = ArrayExtensions.GetVector(coefficients, i);
}
for (var i = 0; i < 8; i++) {
transposedCoefficientsVector[i * 2 ] = new Vector4f(coefficients[i + 0],
coefficients[i + 8],
coefficients[i + 16],
coefficients[i + 24]);
transposedCoefficientsVector[i * 2 + 1] = new Vector4f(coefficients[i + 32],
coefficients[i + 40],
coefficients[i + 48],
coefficients[i + 56]);
}
}
else {
coefficientsArray = coefficients;
}
}
private static unsafe void ForwardDCTSimd(BitmapData image, IntPtr dctBuffer, int w, int h)
{
var dct_temp = stackalloc float[8 * 8];
var transposed = stackalloc float[8 * 8];
var stride = w;
var scan0_p = (byte*)image.Scan0.ToPointer();
var scan0_c = (float*)dctBuffer.ToPointer();
for (var blockY = 0; blockY < h; blockY += 8) {
var block_p = scan0_p + blockY * image.Stride;
var block_c = scan0_c + blockY * stride;
for (var blockX = 0; blockX < w; blockX += 8, block_c += 8, block_p += 8) {
{ // transpose matrix
var pix_t = transposed;
for (var tx = 0; tx < 8; tx++) {
var pix_p_xy = block_p + tx;
for (var ty = 0; ty < 8; ty++) {
*(pix_t++) = (float)*pix_p_xy;
pix_p_xy += image.Stride;
}
}
}
{ // y
var dct_temp_y = dct_temp;
for (var v = 0; v < 8; v++) {
var vec_p_xy = (Vector4f*)transposed;
for (var x = 0; x < 8; x++) {
var t = *(vec_p_xy++) * transposedCoefficientsVector[/* (v * 8 / 4) */ v * 2];
t += *(vec_p_xy++) * transposedCoefficientsVector[/* (v * 8 / 4) */ v * 2 + 1];
*(dct_temp_y++) = t.X + t.Y + t.Z + t.W;
}
}
}
{ // x
for (var v = 0; v < 8; v++) {
var pix_c_uv = block_c + v * stride;
var dct_temp_x = (Vector4f*)(dct_temp + v * 8);
for (var u = 0; u < 8; u++) {
var vec_c = dct_temp_x[0] * transposedCoefficientsVector[/* (u * 8 / 4) */ u * 2];
vec_c += dct_temp_x[1] * transposedCoefficientsVector[/* (u * 8 / 4) */ u * 2 + 1];
pix_c_uv[u] = vec_c.X + vec_c.Y + vec_c.Z + vec_c.W;
}
}
}
}
}
}
private static unsafe void InverseDCTSimd(IntPtr dctBuffer, BitmapData image, int w, int h)
{
var dct_temp = stackalloc float[8 * 8];
var transposed = stackalloc float[8 * 8];
var stride = w;
var scan0_p = (byte*)image.Scan0.ToPointer();
var scan0_c = (float*)dctBuffer.ToPointer();
for (var blockY = 0; blockY < h; blockY += 8) {
var block_p = scan0_p + blockY * image.Stride;
var block_c = scan0_c + blockY * stride;
for (var blockX = 0; blockX < w; blockX += 8, block_c += 8, block_p += 8) {
{ // transpose matrix
var pix_t = transposed;
for (var tu = 0; tu < 8; tu++) {
var pix_c_uv = block_c + tu;
for (var tv = 0; tv < 8; tv++) {
*(pix_t++) = *pix_c_uv;
pix_c_uv += stride;
}
}
}
{ // y
var dct_temp_y = dct_temp;
for (var y = 0; y < 8; y++) {
var vec_c_uv = (Vector4f*)transposed;
for (var u = 0; u < 8; u++) {
var t = *(vec_c_uv++) * coefficientsVector[/* (y * 8 / 4) */ y * 2];
t += *(vec_c_uv++) * coefficientsVector[/* (y * 8 / 4) */ y * 2 + 1];
*(dct_temp_y++) = t.X + t.Y + t.Z + t.W;
}
}
}
{ // x
for (var y = 0; y < 8; y++) {
var pix_p_xy = block_p + y * image.Stride;
var dct_temp_x = (Vector4f*)(dct_temp + y * 8);
for (var x = 0; x < 8; x++) {
var vec_p = dct_temp_x[0] * coefficientsVector[/* (x * 8 / 4) */ x * 2];
vec_p += dct_temp_x[1] * coefficientsVector[/* (x * 8 / 4) */ x * 2 + 1];
pix_p_xy[x] = (byte)(vec_p.X + vec_p.Y + vec_p.Z + vec_p.W);
}
}
}
}
}
}
private static unsafe void ForwardDCTSisd(BitmapData image, IntPtr dctBuffer, int w, int h)
{
#if !TEST
var dct_temp = stackalloc float[8 * 8];
#endif
var stride = w;
var scan0_p = (byte*)image.Scan0.ToPointer();
var scan0_c = (float*)dctBuffer.ToPointer();
for (var blockY = 0; blockY < h; blockY += 8) {
var block_p = scan0_p + blockY * image.Stride;
var block_c = scan0_c + blockY * stride;
for (var blockX = 0; blockX < w; blockX += 8, block_p += 8, block_c += 8) {
#if !TEST
{ // y
var dct_temp_y = dct_temp;
for (var v = 0; v < 8; v++) {
for (var x = 0; x < 8; x++) {
var pix_p_xy = block_p + x;
var t = 0.0f;
for (var y = 0; y < 8; y++) {
t += *pix_p_xy * coefficientsArray[y * 8 + v];
pix_p_xy += image.Stride;
}
*(dct_temp_y++) = t;
}
}
}
{ // x
for (var v = 0; v < 8; v++) {
var pix_c_uv = block_c + v * stride;
for (var u = 0; u < 8; u++) {
var dct_temp_x = dct_temp + v * 8;
var pix_c = 0.0f;
for (var x = 0; x < 8; x++) {
pix_c += dct_temp_x[x] * coefficientsArray[x * 8 + u];
}
pix_c_uv[u] = pix_c;
}
}
}
#else
for (var v = 0; v < 8; v++) {
var pix_c_uv = block_c + v * stride;
for (var u = 0; u < 8; u++) {
var pix_c = 0.0f;
for (var y = 0; y < 8; y++) {
var pix_p_xy = block_p + y * image.Stride;
for (var x = 0; x < 8; x++) {
pix_c += pix_p_xy[x] * coefficientsArray[y * 8 + v] * coefficientsArray[x * 8 + u];
}
}
pix_c_uv[u] = pix_c;
}
}
#endif
}
}
}
private static unsafe void InverseDCTSisd(IntPtr dctBuffer, BitmapData image, int w, int h)
{
#if !TEST
var dct_temp = stackalloc float[8 * 8];
#endif
var stride = w;
var scan0_p = (byte*)image.Scan0.ToPointer();
var scan0_c = (float*)dctBuffer.ToPointer();
for (var blockY = 0; blockY < h; blockY += 8) {
var block_p = scan0_p + blockY * image.Stride;
var block_c = scan0_c + blockY * stride;
for (var blockX = 0; blockX < w; blockX += 8, block_c += 8, block_p += 8) {
#if !TEST
{ // y
var dct_temp_y = dct_temp;
for (var y = 0; y < 8; y++) {
for (var u = 0; u < 8; u++) {
var pix_c_uv = block_c + u;
var t = 0.0f;
for (var v = 0; v < 8; v++) {
t += *pix_c_uv * coefficientsArray[y * 8 + v];
pix_c_uv += stride;
}
*(dct_temp_y++) = t;
}
}
}
{ // x
for (var y = 0; y < 8; y++) {
var pix_p_xy = block_p + y * image.Stride;
for (var x = 0; x < 8; x++) {
var dct_temp_x = dct_temp + y * 8;
var pix_p = 0.0f;
for (var u = 0; u < 8; u++) {
pix_p += dct_temp_x[u] * coefficientsArray[x * 8 + u];
}
pix_p_xy[x] = (byte)pix_p;
}
}
}
#else
for (var y = 0; y < 8; y++) {
var pix_p_xy = block_p + y * image.Stride;
for (var x = 0; x < 8; x++) {
var pix_p = 0.0f;
for (var v = 0; v < 8; v++) {
var pix_c_uv = block_c + v * stride;
for (var u = 0; u < 8; u++) {
pix_p += pix_c_uv[u] * coefficientsArray[y * 8 + v] * coefficientsArray[x * 8 + u];
}
}
pix_p_xy[x] = (byte)pix_p;
}
}
#endif
}
}
}
private static void ForwardDCT(bool simd, Bitmap luminance, IntPtr dctBuffer)
{
DCT(simd, true, luminance.Width, luminance.Height, luminance, dctBuffer);
}
private static Bitmap InverseDCT(bool simd, int width, int height, IntPtr dctBuffer)
{
var image = Create8bppGrayscaleBitmap(width, height);
DCT(simd, false, width, height, image, dctBuffer);
return image;
}
private static void DCT(bool simd, bool forward, int width, int height, Bitmap image, IntPtr dctBuffer)
{
var rect = new Rectangle(0, 0, width, height);
BitmapData locked = null;
try {
locked = image.LockBits(rect, forward ? ImageLockMode.ReadOnly : ImageLockMode.WriteOnly, Format8bppGrayscale);
var stopwatch = new Stopwatch();
stopwatch.Start();
if (forward) {
if (simd)
ForwardDCTSimd(locked, dctBuffer, rect.Width, rect.Height);
else
ForwardDCTSisd(locked, dctBuffer, rect.Width, rect.Height);
}
else {
if (simd)
InverseDCTSimd(dctBuffer, locked, rect.Width, rect.Height);
else
InverseDCTSisd(dctBuffer, locked, rect.Width, rect.Height);
}
stopwatch.Stop();
Console.WriteLine("{0} DCT spent {1}", forward ? "forward" : "inverse", stopwatch.Elapsed);
}
finally {
if (locked != null)
image.UnlockBits(locked);
}
}
private static Bitmap CreateLuminanceImage(Bitmap imageColored)
{
var imageLuminance = Create8bppGrayscaleBitmap(imageColored.Width, imageColored.Height);
BitmapData lockedColored = null;
BitmapData lockedLuminance = null;
try {
var rect = new Rectangle(0, 0, imageColored.Width, imageColored.Height);
lockedColored = imageColored .LockBits(rect, ImageLockMode.ReadOnly, PixelFormat.Format24bppRgb);
lockedLuminance = imageLuminance.LockBits(rect, ImageLockMode.WriteOnly, Format8bppGrayscale);
unsafe {
for (var y = 0; y < imageColored.Height; y++) {
var bgr = (byte*) lockedColored.Scan0.ToPointer() + y * lockedColored.Stride;
var lum = (byte*)lockedLuminance.Scan0.ToPointer() + y * lockedLuminance.Stride;
for (var x = 0; x < imageColored.Width; x++) {
// 0.299R + 0.587G + 0.114B
*(lum++) = (byte)((*(bgr++) * 0114 / 1000) +
(*(bgr++) * 0587 / 1000) +
(*(bgr++) * 0299 / 1000));
}
}
}
return imageLuminance;
}
finally {
if (lockedColored != null)
imageColored.UnlockBits(lockedColored);
if (lockedLuminance != null)
imageLuminance.UnlockBits(lockedLuminance);
}
}
private static Bitmap Create8bppGrayscaleBitmap(int width, int height)
{
var grayscaled = new Bitmap(width, height, Format8bppGrayscale);
// 8bpp indexed as 8bpp grayscale
var palette = grayscaled.Palette;
for (var i = 0; i < palette.Entries.Length; i++) {
palette.Entries[i] = Color.FromArgb(0xff, i, i, i);
}
grayscaled.Palette = palette;
return grayscaled;
}
static void Main(string[] args)
{
var useSimd = (args[0] == "simd");
if (useSimd) {
Console.WriteLine("Mono.Simd accel mode: {0}", SimdRuntime.AccelMode);
foreach (var method in new[] {
new {Type = typeof(Vector4f), Method = "op_Addition", Signature = new[] {typeof(Vector4f), typeof(Vector4f)}},
new {Type = typeof(Vector4f), Method = "op_Multiply", Signature = new[] {typeof(Vector4f), typeof(Vector4f)}},
}) {
Console.WriteLine("acceleration: {0}.{1} = {2}",
method.Type.Name,
method.Method,
SimdRuntime.IsMethodAccelerated(method.Type,
method.Method,
method.Signature));
}
}
using (var image = new Bitmap(args[1])) {
Console.WriteLine("image size {0}x{1}", image.Width, image.Height);
var w = image.Width;
var h = image.Height;
if ((w & 7) != 0 || (h & 7) != 0)
throw new ApplicationException("both width and height must be n * 8");
InitializeCoefficients(useSimd);
using (var luminance = CreateLuminanceImage(image)) {
var dctBuffer = Marshal.AllocCoTaskMem(sizeof(float) * w * h);
for (var act = 0; act < 5; act++) {
ForwardDCT(useSimd, luminance, dctBuffer);
using (var inverted = InverseDCT(useSimd, w, h, dctBuffer)) {
if (act != 0)
continue;
luminance.Save(Path.Combine(args[2], "luminance.bmp"), ImageFormat.Bmp);
inverted.Save(Path.Combine(args[2], "inverted.bmp"), ImageFormat.Bmp);
}
}
Marshal.FreeCoTaskMem(dctBuffer);
}
}
}
}