Soci (Soczó Zsolt) szakmai blogja

2018.04.10.

Ha valaki játszani akar a választási adatokkal

Filed under: Szakmai élet — Soczó Zsolt @ 21:04

[source lang=”csharp”]
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml;

namespace ConsoleApp1
{
class Program
{
static void Main()
{
new Program().Run();
}

private void Run()
{
Regex r = new Regex(@".+\\(\w+)\\(\w+).evkjkv.html", RegexOptions.Compiled | RegexOptions.IgnoreCase);

List<string> parties = new List<string>()
{
"FIDESZ",
"JOBBIK",
"MSZP",
"LMP",
"EGYÜTT",
"DK",
};

foreach (var file in Directory.GetFiles(
@"C:\temp\valasztas\valasztas.hu\dyn\pv18\szavossz\hu\", "evkjkv.html", SearchOption.AllDirectories))
{
var d = FromHtml(file);

IEnumerable<string> cols = parties.Select(p =>
{
var part = GetVotes(d, file, p);
return $"{p},{part}";
});
string res = string.Join(",", cols);
var m = r.Match(file);
Console.WriteLine($"{m.Groups[1]},{m.Groups[2]},{res}");
}
}

private static string GetVotes(XmlDocument d, string file, string party)
{
var n = d.SelectSingleNode($"//tr[td[starts-with(text(), ‘{party}’)]]");
if (n == null)
{
return "0";
//Console.WriteLine($"Skipping {file} because there is no data for {party}");
}

//Console.WriteLine(n.InnerXml);
return n.SelectSingleNode("td[4]").InnerText.Replace("&amp;", "").Replace("&nbsp;", "");
}

XmlDocument FromHtml(string path)
{
using (TextReader reader = File.OpenText(path))
{
XmlDocument doc;
using (var sgmlReader = new Sgml.SgmlReader
{
DocType = "HTML",
WhitespaceHandling = WhitespaceHandling.All,
CaseFolding = Sgml.CaseFolding.ToLower,
InputStream = reader
})
{
doc = new XmlDocument
{
PreserveWhitespace = true,
XmlResolver = null
};
doc.Load(sgmlReader);
}

return doc;
}
}
}
}
[/source]

No Comments

No comments yet.

RSS feed for comments on this post.

Sorry, the comment form is closed at this time.

Powered by WordPress