using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml;
namespace ConsoleApp1
{
class Program
{
static void Main()
{
new Program().Run();
}
private void Run()
{
Regex r = new Regex(@".+\\(\w+)\\(\w+).evkjkv.html", RegexOptions.Compiled | RegexOptions.IgnoreCase);
List<string> parties = new List<string>()
{
"FIDESZ",
"JOBBIK",
"MSZP",
"LMP",
"EGYÜTT",
"DK",
};
foreach (var file in Directory.GetFiles(
@"C:\temp\valasztas\valasztas.hu\dyn\pv18\szavossz\hu\", "evkjkv.html", SearchOption.AllDirectories))
{
var d = FromHtml(file);
IEnumerable<string> cols = parties.Select(p =>
{
var part = GetVotes(d, file, p);
return $"{p},{part}";
});
string res = string.Join(",", cols);
var m = r.Match(file);
Console.WriteLine($"{m.Groups[1]},{m.Groups[2]},{res}");
}
}
private static string GetVotes(XmlDocument d, string file, string party)
{
var n = d.SelectSingleNode($"//tr[td[starts-with(text(), '{party}')]]");
if (n == null)
{
return "0";
//Console.WriteLine($"Skipping {file} because there is no data for {party}");
}
//Console.WriteLine(n.InnerXml);
return n.SelectSingleNode("td[4]").InnerText.Replace("&", "").Replace(" ", "");
}
XmlDocument FromHtml(string path)
{
using (TextReader reader = File.OpenText(path))
{
XmlDocument doc;
using (var sgmlReader = new Sgml.SgmlReader
{
DocType = "HTML",
WhitespaceHandling = WhitespaceHandling.All,
CaseFolding = Sgml.CaseFolding.ToLower,
InputStream = reader
})
{
doc = new XmlDocument
{
PreserveWhitespace = true,
XmlResolver = null
};
doc.Load(sgmlReader);
}
return doc;
}
}
}
}
Could you hire me? Contact me if you like what I’ve done in this article and think I can create value for your company with my skills.