Soci (Soczó Zsolt) szakmai blogja

2018.04.10.

Ha valaki játszani akar a választási adatokkal

Filed under: Szakmai élet — Soczó Zsolt @ 21:04
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml;

namespace ConsoleApp1
{
    class Program
    {
        static void Main()
        {
            new Program().Run();
        }

        private void Run()
        {
            Regex r = new Regex(@".+\\(\w+)\\(\w+).evkjkv.html", RegexOptions.Compiled | RegexOptions.IgnoreCase);

            List<string> parties = new List<string>()
            {
                "FIDESZ",
                "JOBBIK",
                "MSZP",
                "LMP",
                "EGYÜTT",
                "DK",
            };

            foreach (var file in Directory.GetFiles(
                @"C:\temp\valasztas\valasztas.hu\dyn\pv18\szavossz\hu\", "evkjkv.html", SearchOption.AllDirectories))
            {
                var d = FromHtml(file);

                IEnumerable<string> cols = parties.Select(p =>
                {
                    var part = GetVotes(d, file, p);
                    return $"{p},{part}";
                });
                string res = string.Join(",", cols);
                var m = r.Match(file);
                Console.WriteLine($"{m.Groups[1]},{m.Groups[2]},{res}");
            }
        }

        private static string GetVotes(XmlDocument d, string file, string party)
        {
            var n = d.SelectSingleNode($"//tr[td[starts-with(text(), '{party}')]]");
            if (n == null)
            {
                return "0";
                //Console.WriteLine($"Skipping {file} because there is no data for {party}");
            }

            //Console.WriteLine(n.InnerXml);
            return n.SelectSingleNode("td[4]").InnerText.Replace("&amp;", "").Replace("&nbsp;", "");
        }

        XmlDocument FromHtml(string path)
        {
            using (TextReader reader = File.OpenText(path))
            {
                XmlDocument doc;
                using (var sgmlReader = new Sgml.SgmlReader
                {
                    DocType = "HTML",
                    WhitespaceHandling = WhitespaceHandling.All,
                    CaseFolding = Sgml.CaseFolding.ToLower,
                    InputStream = reader
                })
                {
                    doc = new XmlDocument
                    {
                        PreserveWhitespace = true,
                        XmlResolver = null
                    };
                    doc.Load(sgmlReader);
                }

                return doc;
            }
        }
    }
}

Powered by WordPress