RosterParserFixed.XmlParser parser = new RosterParserFixed.XmlParser()
Fixed the nesting problem. Fixed item parsing. Item stats for nested ones units show up now. As with the Ruby parser, throw different combinations at it and see what happens.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | using System; using System.Collections.Generic; using System.Text; using System.IO; using System.Text.RegularExpressions; using System.Xml; using System.Xml.Serialization; using System.Xml.Schema; using System.Xml.XPath; namespace abparser { class Program { static void Main(string[] args) { RosterParserTest.XmlParser parser = new RosterParserTest.XmlParser(); parser.ParseRoster(@"C:\Temp\de7th.rst", @"C:\Temp\output.xml"); Console.ReadLine(); } } } namespace RosterParserTest { class XmlParser { static XmlDocument Roster = new XmlDocument(); static XmlElement rootElement = Roster.CreateElement("", "Army", ""); public static string RemoveWhitespace(string str) { try { //Ryan's Regex return new Regex(@"(\s+|\{.*?\}|\(.*?\)|\/+|\.+)").Replace(str, String.Empty); } catch (Exception) { return str; } } public void ParseNestedXML(XmlElement thisElement, XmlElement rosterElement) { bool linkUnitStatsDone = false; //This is a dirty hack. string replaceMe = thisElement.GetAttribute("name").ToString(); replaceMe = RemoveWhitespace(replaceMe); XmlElement baseElement; XmlNodeList linkUnitStatNodeList = thisElement.SelectNodes("./link | ./unitstat"); /*Grab the last of the PascalCase names. HarGanethExecutioners becomes Executioners * SupremeSorceress becomes Sorceress, etc. Replace the rest of the name with a backreference */ string regexMatcher = Regex.Replace(replaceMe, @".*?([A-Z][a-z]+)$", "${1}"); //This way, it'll actually parse the NodeList for stats in nested things. if (Regex.IsMatch(rosterElement.Name.ToString(), regexMatcher)) //So that I don't get duplicate empty nodes. { baseElement = rosterElement; //Adding to the previous node in the tree. foreach (XmlElement parseElement in thisElement) { if (parseElement.HasChildNodes && parseElement.InnerXml.Contains("entity")) { ParseNestedXML(parseElement, baseElement); //Parsing out nested. } //if (parseElement.HasChildNodes && parseElement.InnerXml.Contains("entity")) else if (!linkUnitStatsDone) { ParseLinkUnitStats(linkUnitStatNodeList, baseElement); linkUnitStatsDone = true; //Hack implemented. } //else if (!linkUnitStatsDone) } //foreach (XmlElement parseElement in thisElement) } else { baseElement = Roster.CreateElement(replaceMe); foreach (XmlElement parseElement in thisElement) { if (parseElement.HasChildNodes && parseElement.InnerXml.Contains("entity")) { ParseNestedXML(parseElement, baseElement); //Whee recursion. } //if (parseElement.HasChildNodes && parseElement.InnerXml.Contains("entity")) else { ParseLinkUnitStats(parseElement, baseElement); //This has always worked. } //else rosterElement.AppendChild(baseElement); //Add to the local node. rootElement.AppendChild(rosterElement); //Add to the Army node. } //foreach (XmlElement parseElement in thisElement) } //else } //public void ParseNestedXML(XmlElement thisElement, XmlElement rosterElement) public void ParseRoster(string path, string output) { XmlDocument parsingRoster = new XmlDocument(); parsingRoster.Load(path); XmlNodeList parsingElements = parsingRoster.SelectNodes("/document/squad"); foreach (XmlElement thisElement in parsingElements) { XmlElement rosterElement = Roster.CreateElement("Unit"); ParseNestedXML(thisElement, rosterElement); } //foreach (XmlElement thisElement in parsingElements) Roster.AppendChild(rootElement); Roster.Save(output); } //public void ParseRoster(string path, string output) public void ParseLinkUnitStats(XmlElement parseElement, XmlElement baseElement) { foreach (XmlElement correctElement in parseElement) { if (correctElement.HasAttribute("name")) { string subReplaceMe = correctElement.GetAttribute("name").ToString(); subReplaceMe = RemoveWhitespace(subReplaceMe); XmlElement addElement = Roster.CreateElement(subReplaceMe); if (!Regex.Match(subReplaceMe, @"(Left|Worker|Helper|Pts|Coun|Group)").Success) { if (parseElement.HasChildNodes && parseElement.InnerXml.Contains("entity")) { //Console.WriteLine("Found an item (XmlElement)"); ParseNestedXML(addElement, correctElement); } else if (correctElement.HasAttribute("description")) { addElement.InnerText = correctElement.GetAttribute("description").ToString(); } //if correctElement.HasAttribute("description")) else if (correctElement.HasAttribute("value") && (Regex.IsMatch(correctElement.GetAttribute("value"), @"[^0|-]"))) { addElement.InnerText = RemoveWhitespace(correctElement.GetAttribute("value").ToString()); baseElement.AppendChild(addElement); } //else if (correctElement.HasAttribute("value")) } //else if (parseElement.HasAttribute("basename")) { /*It's a non-dwarf item. Whee! They don't show up in the XmlNodeList one. Get rid of newlines and periods at the end, then set it as the InnerText This doesn't catch cases where the item has other properties inside it, but I haven't seen those */ baseElement.InnerText = Regex.Replace(parseElement.GetAttribute("itemsummary"), @"(\\n|\.)", String.Empty); } } //if (correctElement.HasAttribute("name") } //foreach (XmlElement correctElement in parseElement) } //public void ParseLinkUnitStats(XmlElement parseElement, XmlElement baseElement) public void ParseLinkUnitStats(XmlNodeList parseNodeList, XmlElement baseElement) { foreach (XmlElement correctElement in parseNodeList) { if (correctElement.HasAttribute("name")) { string subReplaceMe = correctElement.GetAttribute("name").ToString(); subReplaceMe = RemoveWhitespace(subReplaceMe); if (!Regex.Match(subReplaceMe, @"(Left|Worker|Helper|Pts|Coun|Group)").Success) { XmlElement addElement = Roster.CreateElement(subReplaceMe); if (correctElement.HasChildNodes && correctElement.InnerXml.Contains("entity")) { //Console.WriteLine("Found an item (XmlNodeList)"); ParseNestedXML(addElement, correctElement); } if (correctElement.HasAttribute("description")) { addElement.InnerText = correctElement.GetAttribute("description").ToString(); baseElement.AppendChild(addElement); } //if (correctElement.HasAttribute("description")) else if (correctElement.HasAttribute("value") && (Regex.IsMatch(correctElement.GetAttribute("value"), @"[^0|-]"))) { addElement.InnerText = RemoveWhitespace(correctElement.GetAttribute("value").ToString()); baseElement.AppendChild(addElement); } //else if (correctElement.HasAttribute("value")) } //else } //if (correctElement.HasAttribute("name")) } //foreach (XmlElement correctElement in parseNodeList) } //public void ParseLinkUnitStats(XmlNodeList parseNodeList, XmlElement baseElement) } //class XmlParser } //namespace RosterParserTest |
No Comments
No comments yet.
RSS feed for comments on this post. TrackBack URI
Leave a comment
You must be logged in to post a comment.