読者です 読者をやめる 読者になる 読者になる

ネイティブバイナリのtidy.dllでHTMLを清書する

C# HTML

が失敗しました。orz

やはり、日本語が文字化けします。何か間違ってるんだろうか・・・
↑ウソ:これが原因

using System;
using System.Text;
using System.Runtime.InteropServices;

namespace TMGBCReserver
{
    public class Tidy32
    {
        [StructLayout(LayoutKind.Sequential)]
        public struct TidyBuffer
        {
            public IntPtr bp;           /**< Pointer to bytes */
            public uint size;         /**< # bytes currently in use */
            public uint allocated;    /**< # bytes allocated */
            public uint next;         /**< Offset of current input position */
        };

        [DllImport("tidy.dll")]
        public static extern int tidyBufAlloc(ref TidyBuffer tidyBuffer, int allocSize);

        [DllImport("tidy.dll")]
        public static extern int tidyBufFree(ref TidyBuffer tidyBuffer);

        [DllImport("tidy.dll")]
        public static extern IntPtr tidyCreate();

        [DllImport("tidy.dll")]
        public static extern int tidyParseFile(IntPtr tidyPointer, [MarshalAs(UnmanagedType.LPStr)]string fileName);

        [DllImport("tidy.dll")]
        public static extern int tidyParseBuffer(IntPtr tidyPointer, [MarshalAs(UnmanagedType.Struct)] ref TidyBuffer tidyBuffer);

        [DllImport("tidy.dll")]
        public static extern int tidyCleanAndRepair(IntPtr tidyPointer);

        [DllImport("tidy.dll")]
        public static extern int tidySaveFile(IntPtr tidyPointer, [MarshalAs(UnmanagedType.LPStr)]string outFileName);

        [DllImport("tidy.dll")]
        public static extern int tidySaveBuffer(IntPtr tidyPointer, [MarshalAs(UnmanagedType.Struct)] ref TidyBuffer tidyBuffer);

        [DllImport("tidy.dll")]
        public static extern int tidyRelease(IntPtr tidyPointer);

        [DllImport("tidy.dll")]
        public static extern int tidySetCharEncoding(IntPtr tidyPointer, [MarshalAs(UnmanagedType.LPStr)]string encoding);

        [DllImport("tidy.dll")]
        public static extern int tidyOptSetBool(IntPtr tidyPointer, int value, int Bool);

        public static string CleanFile(string inputHtml)
        {
            string result;
            byte[] inputArray = Encoding.UTF8.GetBytes(inputHtml);

            TidyBuffer tidyBuffer2;
            tidyBuffer2.size = 0;
            tidyBuffer2.allocated = 0;
            tidyBuffer2.next = 0;
            tidyBuffer2.bp = (IntPtr)0;
            tidyBufAlloc(ref tidyBuffer2, 1024 * 1024);

            IntPtr tidyPointer = tidyCreate();
            try
            {
                // We want the resulting file to be UTF8 encoded
                tidySetCharEncoding(tidyPointer, "utf8");

                TidyBuffer tidyBuffer1;
                tidyBuffer1.size = (uint)inputArray.Length;
                tidyBuffer1.allocated = (uint)inputArray.Length;
                tidyBuffer1.next = 0;
                GCHandle pinHandle = GCHandle.Alloc(inputArray, GCHandleType.Pinned);

                try
                {
                    tidyBuffer1.bp = Marshal.UnsafeAddrOfPinnedArrayElement(inputArray, 0);

                    if (tidyParseBuffer(tidyPointer, ref tidyBuffer1) >= 0)
                    {
                        tidyOptSetBool(tidyPointer, 29, 1);
                        tidyOptSetBool(tidyPointer, 23, 1);
                        if (tidyCleanAndRepair(tidyPointer) >= 0)
                        {
                            int rc = tidySaveBuffer(tidyPointer, ref tidyBuffer2);
                        }
                    }
                }
                finally
                {
                    pinHandle.Free();
                }

                byte[] outputArray = new byte[tidyBuffer2.size];
                Marshal.Copy(tidyBuffer2.bp, outputArray, 0, outputArray.Length);
                result = Encoding.UTF8.GetString(outputArray, 0, outputArray.Length);
            }
            finally
            {
                tidyBufFree(ref tidyBuffer2);
                tidyRelease(tidyPointer);
            }

            return result;
        }
    }
}