2009-03-25T01:43:00の更新内容

programming/mono/mono_simd/index.wiki.txt

current previous
1,752 0,0
+
${smdncms:tags,Mono.Simd,simd,dct,c#,imaging,画像処理}
+
*Mono.Simd
+
Mono.Simdを用いることで、SIMD命令を用いたベクタ演算をマネージドコードで記述できる。
+
-Mono.Simdのドキュメント [[Mono Documentation Mono.Simd Namespace:http://go-mono.com/docs/index.aspx?tlink=0@N%3AMono.Simd]]
+
-Mono.Simdの概要 [[Mono's SIMD Support: Making Mono safe for Gaming - Miguel de Icaza:http://tirania.org/blog/archive/2008/Nov-03.html]]
+

          
+
**例1 加算合成
+
二枚の8ビットARGB画像から飽和加算による加算合成画像を得る例。 4ピクセル分(16バイト)をVector16b構造体として扱い、VectorOperations.AddWithSaturationメソッドで飽和加算する。
+

          
+
***ソース
+
 // gmcs -unsafe -r:System,System.Drawing,Mono.Simd addition.cs
+
 using Mono.Simd;
+
 
+
 using System;
+
 using System.Diagnostics;
+
 using System.Drawing;
+
 using System.Drawing.Imaging;
+
 
+
 class Addition {
+
   private static unsafe void AddSimd(BitmapData result, BitmapData image1, BitmapData image2, int w, int h)
+
   {
+
     for {
+
       var resultPixel =;
+
       var image1Pixel =;
+
       var image2Pixel =;
+
 
+
       for {
+
         *(resultPixel++) = VectorOperations.AddWithSaturation(*(image1Pixel++), *(image2Pixel++));
+
       }
+
     }
+
   }
+
 
+
   private static unsafe void AddSisd(BitmapData result, BitmapData image1, BitmapData image2, int w, int h)
+
   {
+
     for {
+
       var resultPixel = + y * result.Stride;
+
       var image1Pixel = + y * image1.Stride;
+
       var image2Pixel = + y * image2.Stride;
+
 
+
       for {
+
         int b = *(image1Pixel++) + *(image2Pixel++);
+
         int g = *(image1Pixel++) + *(image2Pixel++);
+
         int r = *(image1Pixel++) + *(image2Pixel++);
+
         int a = *(image1Pixel++) + *(image2Pixel++);
+
 
+
         *(resultPixel++) =;
+
         *(resultPixel++) =;
+
         *(resultPixel++) =;
+
         *(resultPixel++) =;
+
       }
+
     }
+
   }
+
 
+
   private static Bitmap Add(bool useSimd, int width, int height, Bitmap image1, Bitmap image2)
+
   {
+
     var result = new Bitmap(width, height,PixelFormat.Format32bppArgb);
+
 
+
     BitmapData lockedImage1 = null;
+
     BitmapData lockedImage2 = null;
+
     BitmapData lockedResult = null;
+
 
+
     try {
+
       var rect = new Rectangle(0, 0, width, height);
+
 
+
       lockedImage1 = image1.LockBits(rect, ImageLockMode.ReadOnly,  PixelFormat.Format32bppArgb);
+
       lockedImage2 = image2.LockBits(rect, ImageLockMode.ReadOnly,  PixelFormat.Format32bppArgb);
+
       lockedResult = result.LockBits(rect, ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
+
 
+
       var stopwatch = new Stopwatch();
+
 
+
       stopwatch.Start();
+
 
+
       if
+
         AddSimd(lockedResult, lockedImage1, lockedImage2, width, height);
+
       else
+
         AddSisd(lockedResult, lockedImage1, lockedImage2, width, height);
+
 
+
       stopwatch.Stop();
+
 
+
       Console.WriteLine("spent {0}", stopwatch.Elapsed);
+
 
+
       return result;
+
     }
+
     finally {
+
       if
+
         image1.UnlockBits(lockedImage1);
+
       if
+
         image2.UnlockBits(lockedImage2);
+
       if
+
         result.UnlockBits(lockedResult);
+
     }
+
   }
+
 
+
   public static void Main(string[] args)
+
   {
+
     var useSimd =;
+
 
+
     if {
+
       Console.WriteLine("Mono.Simd accel mode: {0}", SimdRuntime.AccelMode);
+
       Console.WriteLine("acceleration: {0}", SimdRuntime.IsMethodAccelerated(typeof(VectorOperations),
+
                                                                              "AddWithSaturation",
+
                                                                              new[] {typeof(Vector16b), typeof(Vector16b)}));
+
     }
+
 
+
     using {
+
       var width  = image1.Width;
+
       var height = image1.Height;
+
 
+
       Console.WriteLine("image size {0}x{1}", width, height);
+
 
+
       if
+
         throw new ApplicationException("width must be n * 4");
+
 
+
       if
+
         throw new ApplicationException("not same size");
+
 
+
       for {
+
         var result = Add(useSimd, width, height, image1, image2);
+
 
+
         if
+
           result.Save(args[3], ImageFormat.Bmp);
+
 
+
         result.Dispose();
+
       }
+
     }
+
   }
+
 }
+

          
+
***処理時間
+
Mono.Simd使用なし。
+
 $ mono addition.exe sisd image1.bmp image2.bmp result.bmp
+
 image size 1280x720
+
 spent 00:00:00.0129155
+
 spent 00:00:00.0120989
+
 spent 00:00:00.0121380
+
 spent 00:00:00.0122019
+
 spent 00:00:00.0121072
+

          
+
Mono.Simd使用あり、SIMD命令使用あり。
+
 $ mono addition.exe simd image1.bmp image2.bmp result.bmp
+
 Mono.Simd accel mode: SSE1, SSE2, SSE3, SSSE3
+
 acceleration: True
+
 image size 1280x720
+
 spent 00:00:00.0036350
+
 spent 00:00:00.0029663
+
 spent 00:00:00.0030908
+
 spent 00:00:00.0040916
+
 spent 00:00:00.0030134
+

          
+
Mono.Simd使用あり、SIMD命令使用なし。
+
 $ mono -O=-simd addition.exe simd image1.bmp image2.bmp result.bmp
+
 Mono.Simd accel mode: None
+
 acceleration: False
+
 image size 1280x720
+
 spent 00:00:00.0187466
+
 spent 00:00:00.0178911
+
 spent 00:00:00.0177878
+
 spent 00:00:00.0177766
+
 spent 00:00:00.0177772
+

          
+
参考までに、実行した環境・CPUは以下のとおり。
+
 $ mono -V
+
 Mono JIT compiler version 2.5
+
 Copyright 2002-2008 Novell, Inc and Contributors. www.mono-project.com
+
 	TLS:           __thread
+
 	GC:            Included Boehm
+
 	SIGSEGV:       altstack
+
 	Notifications: epoll
+
 	Architecture:  x86
+
 	Disabled:      none
+
 
+
 $ cat /proc/cpuinfo 
+
 processor	: 0
+
 vendor_id	: GenuineIntel
+
 cpu family	: 6
+
 model		: 15
+
 model name	: Intel(R) Core(TM)2 CPU          6600  @ 2.40GHz
+
 stepping	: 6
+
 cpu MHz		: 1596.000
+
 cache size	: 4096 KB
+
 physical id	: 0
+
 siblings	: 2
+
 core id		: 0
+
 cpu cores	: 2
+
 apicid		: 0
+
 initial apicid	: 0
+
 fdiv_bug	: no
+
 hlt_bug		: no
+
 f00f_bug	: no
+
 coma_bug	: no
+
 fpu		: yes
+
 fpu_exception	: yes
+
 cpuid level	: 10
+
 wp		: yes
+
 flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts pni monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr lahf_lm
+
 bogomips	: 4799.98
+
 clflush size	: 64
+
 power management:
+
 
+
 processor	: 1
+
 vendor_id	: GenuineIntel
+
 cpu family	: 6
+
 model		: 15
+
 model name	: Intel(R) Core(TM)2 CPU          6600  @ 2.40GHz
+
 stepping	: 6
+
 cpu MHz		: 1596.000
+
 cache size	: 4096 KB
+
 physical id	: 0
+
 siblings	: 2
+
 core id		: 1
+
 cpu cores	: 2
+
 apicid		: 1
+
 initial apicid	: 1
+
 fdiv_bug	: no
+
 hlt_bug		: no
+
 f00f_bug	: no
+
 coma_bug	: no
+
 fpu		: yes
+
 fpu_exception	: yes
+
 cpuid level	: 10
+
 wp		: yes
+
 flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts pni monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr lahf_lm
+
 bogomips	: 4800.12
+
 clflush size	: 64
+
 power management:
+

          
+
**例2 離散コサイン変換(DCT)
+
8x8 Forward DCTにより画像を変換し、変換したものを8x8 Inverse DCTで復元する例。 floatで表される4ピクセル分(16バイト)をVector4f構造体として扱い、DCT係数の積和を演算する。
+

          
+
***処理時間
+
使用したソースコードは後述。 Mono.Simd使用なし。
+
 $ mono dct.exe sisd image.bmp .
+
 image size 1280x720
+
 forward DCT spent 00:00:00.0861134
+
 inverse DCT spent 00:00:00.0645653
+
 forward DCT spent 00:00:00.0815288
+
 inverse DCT spent 00:00:00.0649667
+
 forward DCT spent 00:00:00.0816030
+
 inverse DCT spent 00:00:00.0668463
+
 forward DCT spent 00:00:00.0815200
+
 inverse DCT spent 00:00:00.0651595
+
 forward DCT spent 00:00:00.0850448
+
 inverse DCT spent 00:00:00.0655908
+

          
+
Mono.Simd使用あり、SIMD命令使用あり。
+
 $ mono dct.exe simd image.bmp .
+
 Mono.Simd accel mode: SSE1, SSE2, SSE3, SSSE3
+
 acceleration: Vector4f.op_Addition = True
+
 acceleration: Vector4f.op_Multiply = True
+
 image size 1280x720
+
 forward DCT spent 00:00:00.0366700
+
 inverse DCT spent 00:00:00.0326371
+
 forward DCT spent 00:00:00.0348020
+
 inverse DCT spent 00:00:00.0312227
+
 forward DCT spent 00:00:00.0329452
+
 inverse DCT spent 00:00:00.0329587
+
 forward DCT spent 00:00:00.0489560
+
 inverse DCT spent 00:00:00.0493336
+
 forward DCT spent 00:00:00.0323098
+
 inverse DCT spent 00:00:00.0338064
+

          
+
Mono.Simd使用あり、SIMD命令使用なし。
+
 $ mono -O=-simd dct.exe simd image.bmp .
+
 Mono.Simd accel mode: None
+
 acceleration: Vector4f.op_Addition = False
+
 acceleration: Vector4f.op_Multiply = False
+
 image size 1280x720
+
 forward DCT spent 00:00:00.1917066
+
 inverse DCT spent 00:00:00.1856522
+
 forward DCT spent 00:00:00.1853029
+
 inverse DCT spent 00:00:00.1826358
+
 forward DCT spent 00:00:00.1866373
+
 inverse DCT spent 00:00:00.1803647
+
 forward DCT spent 00:00:00.1832263
+
 inverse DCT spent 00:00:00.1786115
+
 forward DCT spent 00:00:00.1827668
+
 inverse DCT spent 00:00:00.1789582
+

          
+
***ソース
+
SIMD版、SISD版ともにX方向とY方向のDCTを別々に演算することで積和の演算回数を減らしている(#define TESTを有効にすると、演算回数の多い実装が有効になる)。 また、SIMD版のDCTでは、処理しやすいようにマトリクスを転置してから積和を求めるようにしてある。
+

          
+
 // gmcs -unsafe -r:System,System.Drawing,Mono.Simd dct.cs
+
 using Mono.Simd;
+
 
+
 using System;
+
 using System.Diagnostics;
+
 using System.Drawing;
+
 using System.Drawing.Imaging;
+
 using System.IO;
+
 using System.Runtime.InteropServices;
+
 
+
 class DCTSample {
+
   private const PixelFormat Format8bppGrayscale = PixelFormat.Format8bppIndexed;
+
 
+
   private static Vector4f[] coefficientsVector = null;
+
   private static Vector4f[] transposedCoefficientsVector = null;
+
   private static float[] coefficientsArray = null;
+
 
+
   private static void InitializeCoefficients(bool simd)
+
   {
+
     var coefficients = new float[8 * 8];
+
 
+
     for {
+
       for {
+
         coefficients[y * 8 + x] =
+
                                            :;
+
       }
+
     }
+
 
+
     if {
+
       coefficientsVector           = new Vector4f[8 * 8 / 4];
+
       transposedCoefficientsVector = new Vector4f[8 * 8 / 4];
+
 
+
       for {
+
         coefficientsVector[i / 4] = ArrayExtensions.GetVector(coefficients, i);
+
       }
+
 
+
       for {
+
         transposedCoefficientsVector[i * 2    ] = new Vector4f(coefficients[i + 0],
+
                                                                coefficients[i + 8],
+
                                                                coefficients[i + 16],
+
                                                                coefficients[i + 24]);
+
         transposedCoefficientsVector[i * 2 + 1] = new Vector4f(coefficients[i + 32],
+
                                                                coefficients[i + 40],
+
                                                                coefficients[i + 48],
+
                                                                coefficients[i + 56]);
+
       }
+
     }
+
     else {
+
       coefficientsArray = coefficients;
+
     }
+
   }
+
 
+
   private static unsafe void ForwardDCTSimd(BitmapData image, IntPtr dctBuffer, int w, int h)
+
   {
+
     var dct_temp = stackalloc float[8 * 8];
+
     var transposed = stackalloc float[8 * 8];
+
     var stride = w;
+
 
+
     var scan0_p =;
+
     var scan0_c =;
+
 
+
     for {
+
       var block_p = scan0_p + blockY * image.Stride;
+
       var block_c = scan0_c + blockY * stride;
+
 
+
       for {
+
         { // transpose matrix
+
           var pix_t = transposed;
+
 
+
           for {
+
             var pix_p_xy = block_p + tx;
+
 
+
             for {
+
               *(pix_t++) =*pix_p_xy;
+
               pix_p_xy += image.Stride;
+
             }
+
           }
+
         }
+
 
+
         { // y
+
           var dct_temp_y = dct_temp;
+
 
+
           for {
+
             var vec_p_xy =transposed;
+
 
+
             for {
+
               var t  = *(vec_p_xy++) * transposedCoefficientsVector[/* */ v * 2];
+
                   t += *(vec_p_xy++) * transposedCoefficientsVector[/* */ v * 2 + 1];
+
 
+
               *(dct_temp_y++) = t.X + t.Y + t.Z + t.W;
+
             }
+
           }
+
         }
+
 
+
         { // x
+
           for {
+
             var pix_c_uv = block_c + v * stride;
+
             var dct_temp_x =;
+
 
+
             for {
+
               var vec_c  = dct_temp_x[0] * transposedCoefficientsVector[/* */ u * 2];
+
                   vec_c += dct_temp_x[1] * transposedCoefficientsVector[/* */ u * 2 + 1];
+
 
+
               pix_c_uv[u] = vec_c.X + vec_c.Y + vec_c.Z + vec_c.W;
+
             }
+
           }
+
         }
+
       }
+
     }
+
   }
+
 
+
   private static unsafe void InverseDCTSimd(IntPtr dctBuffer, BitmapData image, int w, int h)
+
   {
+
     var dct_temp = stackalloc float[8 * 8];
+
     var transposed = stackalloc float[8 * 8];
+
     var stride = w;
+
 
+
     var scan0_p =;
+
     var scan0_c =;
+
 
+
     for {
+
       var block_p = scan0_p + blockY * image.Stride;
+
       var block_c = scan0_c + blockY * stride;
+
 
+
       for {
+
         { // transpose matrix
+
           var pix_t = transposed;
+
 
+
           for {
+
             var pix_c_uv = block_c + tu;
+
 
+
             for {
+
               *(pix_t++) = *pix_c_uv;
+
               pix_c_uv += stride;
+
             }
+
           }
+
         }
+
 
+
         { // y
+
           var dct_temp_y = dct_temp;
+
 
+
           for {
+
             var vec_c_uv =transposed;
+
 
+
             for {
+
               var t  = *(vec_c_uv++) * coefficientsVector[/* */ y * 2];
+
                   t += *(vec_c_uv++) * coefficientsVector[/* */ y * 2 + 1];
+
 
+
               *(dct_temp_y++) = t.X + t.Y + t.Z + t.W;
+
             }
+
           }
+
         }
+
 
+
         { // x
+
           for {
+
             var pix_p_xy = block_p + y * image.Stride;
+
             var dct_temp_x =;
+
 
+
             for {
+
               var vec_p  = dct_temp_x[0] * coefficientsVector[/* */ x * 2];
+
                   vec_p += dct_temp_x[1] * coefficientsVector[/* */ x * 2 + 1];
+
 
+
               pix_p_xy[x] =;
+
             }
+
           }
+
         }
+
       }
+
     }
+
   }
+
 
+
   private static unsafe void ForwardDCTSisd(BitmapData image, IntPtr dctBuffer, int w, int h)
+
   {
+
 #if !TEST
+
     var dct_temp = stackalloc float[8 * 8];
+
 #endif
+
     var stride = w;
+
 
+
     var scan0_p =;
+
     var scan0_c =;
+
 
+
     for {
+
       var block_p = scan0_p + blockY * image.Stride;
+
       var block_c = scan0_c + blockY * stride;
+
 
+
       for {
+
 #if !TEST
+
         { // y
+
           var dct_temp_y = dct_temp;
+
 
+
           for {
+
             for {
+
               var pix_p_xy = block_p + x;
+
               var t = 0.0f;
+
 
+
               for {
+
                 t += *pix_p_xy * coefficientsArray[y * 8 + v];
+
                 pix_p_xy += image.Stride;
+
               }
+
 
+
               *(dct_temp_y++) = t;
+
             }
+
           }
+
         }
+
 
+
         { // x
+
           for {
+
             var pix_c_uv = block_c + v * stride;
+
 
+
             for {
+
               var dct_temp_x = dct_temp + v * 8;
+
               var pix_c = 0.0f;
+
 
+
               for {
+
                 pix_c += dct_temp_x[x] * coefficientsArray[x * 8 + u];
+
               }
+
 
+
               pix_c_uv[u] = pix_c;
+
             }
+
           }
+
         }
+
 #else
+
         for {
+
           var pix_c_uv = block_c + v * stride;
+
 
+
           for {
+
             var pix_c = 0.0f;
+
 
+
             for {
+
               var pix_p_xy = block_p + y * image.Stride;
+
 
+
               for {
+
                 pix_c += pix_p_xy[x] * coefficientsArray[y * 8 + v] * coefficientsArray[x * 8 + u];
+
               }
+
             }
+
 
+
             pix_c_uv[u] = pix_c;
+
           }
+
         }
+
 #endif
+
       }
+
     }
+
   }
+
 
+
   private static unsafe void InverseDCTSisd(IntPtr dctBuffer, BitmapData image, int w, int h)
+
   {
+
 #if !TEST
+
     var dct_temp = stackalloc float[8 * 8];
+
 #endif
+
     var stride = w;
+
 
+
     var scan0_p =;
+
     var scan0_c =;
+
 
+
     for {
+
       var block_p = scan0_p + blockY * image.Stride;
+
       var block_c = scan0_c + blockY * stride;
+
 
+
       for {
+
 #if !TEST
+
         { // y
+
           var dct_temp_y = dct_temp;
+
 
+
           for {
+
             for {
+
               var pix_c_uv = block_c + u;
+
               var t = 0.0f;
+
 
+
               for {
+
                 t += *pix_c_uv * coefficientsArray[y * 8 + v];
+
                 pix_c_uv += stride;
+
               }
+
 
+
               *(dct_temp_y++) = t;
+
             }
+
           }
+
         }
+
 
+
         { // x
+
           for {
+
             var pix_p_xy = block_p + y * image.Stride;
+
 
+
             for {
+
               var dct_temp_x = dct_temp + y * 8;
+
               var pix_p = 0.0f;
+
 
+
               for {
+
                 pix_p += dct_temp_x[u] * coefficientsArray[x * 8 + u];
+
               }
+
 
+
               pix_p_xy[x] =pix_p;
+
             }
+
           }
+
         }
+
 #else
+
         for {
+
           var pix_p_xy = block_p + y * image.Stride;
+
 
+
           for {
+
             var pix_p = 0.0f;
+
 
+
             for {
+
               var pix_c_uv = block_c + v * stride;
+
 
+
               for {
+
                 pix_p += pix_c_uv[u] * coefficientsArray[y * 8 + v] * coefficientsArray[x * 8 + u];
+
               }
+
             }
+
 
+
             pix_p_xy[x] =pix_p;
+
           }
+
         }
+
 #endif
+
       }
+
     }
+
   }
+
 
+
   private static void ForwardDCT(bool simd, Bitmap luminance, IntPtr dctBuffer)
+
   {
+
     DCT(simd, true, luminance.Width, luminance.Height, luminance, dctBuffer);
+
   }
+
 
+
   private static Bitmap InverseDCT(bool simd, int width, int height, IntPtr dctBuffer)
+
   {
+
     var image = Create8bppGrayscaleBitmap(width, height);
+
 
+
     DCT(simd, false, width, height, image, dctBuffer);
+
 
+
     return image;
+
   }
+
 
+
   private static void DCT(bool simd, bool forward, int width, int height, Bitmap image, IntPtr dctBuffer)
+
   {
+
     var rect = new Rectangle(0, 0, width, height);
+
 
+
     BitmapData locked   = null;
+
 
+
     try {
+
       locked = image.LockBits(rect, forward ? ImageLockMode.ReadOnly : ImageLockMode.WriteOnly, Format8bppGrayscale);
+
 
+
       var stopwatch = new Stopwatch();
+
 
+
       stopwatch.Start();
+
 
+
       if {
+
         if
+
           ForwardDCTSimd(locked, dctBuffer, rect.Width, rect.Height);
+
         else
+
           ForwardDCTSisd(locked, dctBuffer, rect.Width, rect.Height);
+
       }
+
       else {
+
         if
+
           InverseDCTSimd(dctBuffer, locked, rect.Width, rect.Height);
+
         else
+
           InverseDCTSisd(dctBuffer, locked, rect.Width, rect.Height);
+
       }
+
 
+
       stopwatch.Stop();
+
 
+
       Console.WriteLine("{0} DCT spent {1}", forward ? "forward" : "inverse", stopwatch.Elapsed);
+
     }
+
     finally {
+
       if
+
         image.UnlockBits(locked);
+
     }
+
   }
+
 
+
   private static Bitmap CreateLuminanceImage(Bitmap imageColored)
+
   {
+
     var imageLuminance = Create8bppGrayscaleBitmap(imageColored.Width, imageColored.Height);
+
 
+
     BitmapData lockedColored   = null;
+
     BitmapData lockedLuminance = null;
+
 
+
     try {
+
       var rect = new Rectangle(0, 0, imageColored.Width, imageColored.Height);
+
 
+
       lockedColored   = imageColored  .LockBits(rect, ImageLockMode.ReadOnly,  PixelFormat.Format24bppRgb);
+
       lockedLuminance = imageLuminance.LockBits(rect, ImageLockMode.WriteOnly, Format8bppGrayscale);
+
 
+
       unsafe {
+
         for {
+
           var bgr = + y * lockedColored.Stride;
+
           var lum = + y * lockedLuminance.Stride;
+
 
+
           for {
+
             // 0.299R + 0.587G + 0.114B
+
             *(lum++) = +
+
 +
+
;
+
           }
+
         }
+
       }
+
 
+
       return imageLuminance;
+
     }
+
     finally {
+
       if
+
         imageColored.UnlockBits(lockedColored);
+
       if
+
         imageLuminance.UnlockBits(lockedLuminance);
+
     }
+
   }
+
 
+
   private static Bitmap Create8bppGrayscaleBitmap(int width, int height)
+
   {
+
     var grayscaled = new Bitmap(width, height, Format8bppGrayscale);
+
 
+
     // 8bpp indexed as 8bpp grayscale
+
     var palette = grayscaled.Palette;
+
 
+
     for {
+
       palette.Entries[i] = Color.FromArgb(0xff, i, i, i);
+
     }
+
 
+
     grayscaled.Palette = palette;
+
 
+
     return grayscaled;
+
   }
+
 
+
   public static void Main(string[] args)
+
   {
+
     var useSimd =;
+
 
+
     if {
+
       Console.WriteLine("Mono.Simd accel mode: {0}", SimdRuntime.AccelMode);
+
 
+
       foreach (var method in new[] {
+
         new {Type = typeof(Vector4f), Method = "op_Addition", Signature = new[] {typeof(Vector4f), typeof(Vector4f)}},
+
         new {Type = typeof(Vector4f), Method = "op_Multiply", Signature = new[] {typeof(Vector4f), typeof(Vector4f)}},
+
       }) {
+
         Console.WriteLine("acceleration: {0}.{1} = {2}",
+
                           method.Type.Name,
+
                           method.Method,
+
                           SimdRuntime.IsMethodAccelerated(method.Type,
+
                                                           method.Method,
+
                                                           method.Signature));
+
       }
+
     }
+
 
+
     using {
+
       Console.WriteLine("image size {0}x{1}", image.Width, image.Height);
+
 
+
       var w = image.Width;
+
       var h = image.Height;
+
 
+
       if
+
         throw new ApplicationException("both width and height must be n * 8");
+
 
+
       InitializeCoefficients(useSimd);
+
 
+
       using {
+
         var dctBuffer = Marshal.AllocCoTaskMem(sizeof(float) * w * h);
+
 
+
         for {
+
           ForwardDCT(useSimd, luminance, dctBuffer);
+
 
+
           using {
+
             if
+
               continue;
+
 
+
             luminance.Save(Path.Combine(args[2], "luminance.bmp"), ImageFormat.Bmp);
+
             inverted.Save(Path.Combine(args[2], "inverted.bmp"), ImageFormat.Bmp);
+
           }
+
         }
+
 
+
         Marshal.FreeCoTaskMem(dctBuffer);
+
       }
+
     }
+
   }
+
 }
+