Parser

Ok, boring. I didn’t spend as much time working on it tonight as I intended to, but it parses the Dwarf roster, at least, fine. It does not parse the Dark Elf roster properly (namely, it doesn’t pull the description out of items or Gifts of Khaine, and it doubles up the <item> tag for reasons I’m not sure of), but that’ll get fixed when I’m at work tomorrow.

Ruby code:

#!/usr/bin/ruby
require "rexml/document"
require "pp"
require "rexml/formatters/default"
include REXML
 
inputxml = File.read('dwarfroster.rst')
@roster = Document.new inputxml
 
@army = Document.new.add_element("army")
 
def parsenested(process, addto)
  #Try to guess if it's a champion, character in the unit, or item
  process.elements.each('entity') do |p|
    #puts p
    if p.elements["link"].has_elements?
      #Recursively run through these to figure out what the hell it is
      if p.elements["link/entity"].attributes["itemsummary"].any?
         adder = addto.add_element("item")
         puts "Found nested\n"
         parsenested(p.elements["link"], adder)
      else
        #This is really just stubbed out, since I haven't seen it
      end
    elsif p.attributes["statset"] =~ /Normal/
      #It's a character, crew, or mount.  Figure out which
      if p.attributes["totalcost"] !~ /^0/
        #It's a champion or character
        adder = addto.add_element("champion")
        puts "Found champ\n"
        parse(p, adder)
      else
        #It's crew or the like
        adder = addto.add_element("crew")
        puts "Found crew\n"
        parse(p, adder)
      end
    else
      #It's an item
      puts "Found item\n"
      if addto.elements["item"].nil?
        @adder = addto.add_element("item")
      end
      added = @adder.add_element(p.attributes["name"].gsub(/\s+/, ''))
      p.elements.each('link') do |ele|
        unless ele.attributes["name"] =~ /(Worker|Helper|Cost|Left)/
          added.add_element(ele.attributes["name"].gsub(/\s+/, '')).add_text(ele.attributes["description"])
        end
      end
    end
  end
end
 
def parse(s, addto)
    #In some cases, the basename differs (i.e. Supreme Sorc vs. High Sorc)
    #Also, it'll pick up whether there's a champion in the unit by the diff
    #of base and count
    %w[basename count base].each do |b|
      if s.attributes[b].any?
        addto.add_element(b).add_text(s.attributes[b])
      end
    end
    stats = addto.add_element("stats")
    s.elements.each('unitstat') do |a|
      #unit.fetch(:stats) { |el| unit[el] = {}}
      #I don't want blank stats
      if a.attributes["value"].any? && (a.attributes["value"] !~ /(0|-)/)
         stats.add_element(a.attributes["name"].gsub(/\s+/, '')).add_text(a.attributes["value"])
      end
    end
    s.elements.each('link') do |link|
      if link.has_elements?
        #Figure out what the hell it is
        parsenested(link, addto)
      else
        #unitatt = unit.add_element("attributes")
        #Rip out the name if it doesn't have "Helper, Worker, Points Left, or Cost"
        unless link.attributes["name"] =~ /(Worker|Helper|Cost|Left)/
          #Get rid of the stuff in braces AB puts in
          if addto.elements["attributes"].nil?
            @unitatt = addto.add_element("attributes")
          end
          @unitatt.add_element(link.attributes["name"].gsub(/\{.*?\}/, '').gsub(/\s+/, '')).add_text('true')
        end
 
      end
    end
end
 
@roster.elements.each('document/roster') do |ele|
  info = @army.add_element("info")
  #Pick out the race, army name, total points, used points, canonical race name
  %w[race size activesize racename].each do |attr|
    info.add_element(attr).add_text(ele.attributes[attr])
  end
  #@army.push(info)
end
 
@roster.elements.each('document/squad') do |ele|
  @unit = @army.add_element("unit")
 
   #Pick out the name of the model and its cost, plus how many models
   %w[name modelcount totalcost].each do |attr|
     @unit.add_element(attr).add_text(ele.attributes[attr])
    end
    ele.elements.each('entity') do |s|
      #Parse it out
      parse(s, @unit)
    end
end
#pp @army
 
prettyprint = REXML::Formatters::Pretty.new
output = String.new
puts prettyprint.write(@army, output)

And the XML output:

<?xml version="1.0" encoding="ISO-8859-1"?>
<army>
	<info>
		<race>Dwarf</race>
		<size>1500</size>
		<activesize>1499.</activesize>
		<racename>Dwarfs</racename>
	</info>
	<unit>
		<name>Thane</name>
		<modelcount>1</modelcount>
		<totalcost>134</totalcost>
		<basename>Thane</basename>
		<count>1</count>
		<base>1</base>
		<stats>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>3+</Save>
			<St>4/8</St>
			<To>5</To>
			<UnitSt.>1</UnitSt.>
			<WS>6</WS>
			<Wo>2</Wo>
			<At>3</At>
			<BS>4</BS>
			<In>3</In>
			<ItemPts>75</ItemPts>
		</stats>
		<attributes>
			<General>true</General>
			<HandWeapon>true</HandWeapon>
			<GreatWeapon>true</GreatWeapon>
			<GromrilArmor>true</GromrilArmor>
		</attributes>
		<item>
			<RunicWeapon>
				<MasterRuneofKraggtheGrim>Allows other runes to be placed on a Great Weapon.</MasterRuneofKraggtheGrim>
				<RuneofCleaving>+1 Strength</RuneofCleaving>
			</RunicWeapon>
			<RunicArmor>
				<RuneofStone>+1 Armor Save</RuneofStone>
			</RunicArmor>
		</item>
	</unit>
	<unit>
		<name>Thane</name>
		<modelcount>1</modelcount>
		<totalcost>132</totalcost>
		<basename>Thane</basename>
		<count>1</count>
		<base>1</base>
		<stats>
			<In>3</In>
			<ItemPts>75</ItemPts>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>2+/1+</Save>
			<St>4/7</St>
			<To>5</To>
			<UnitSt.>1</UnitSt.>
			<WS>6</WS>
			<Wo>2</Wo>
			<At>3</At>
			<BS>4</BS>
		</stats>
		<attributes>
			<HandWeapon>true</HandWeapon>
			<GromrilArmor>true</GromrilArmor>
			<Shield>true</Shield>
		</attributes>
		<item>
			<RunicWeapon>
				<RuneofCleaving>+1 Strength</RuneofCleaving>
			</RunicWeapon>
			<RunicArmor>
				<RuneofStone>+1 Armor Save</RuneofStone>
			</RunicArmor>
		</item>
	</unit>
	<unit>
		<name>Thane</name>
		<modelcount>1</modelcount>
		<totalcost>95</totalcost>
		<basename>Thane</basename>
		<count>1</count>
		<base>1</base>
		<stats>
			<In>3</In>
			<ItemPts>75</ItemPts>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>3+</Save>
			<St>4</St>
			<To>5</To>
			<UnitSt.>1</UnitSt.>
			<WS>6</WS>
			<Wo>2</Wo>
			<At>3</At>
			<BS>4</BS>
		</stats>
		<attributes>
			<HandWeapon>true</HandWeapon>
			<GromrilArmor>true</GromrilArmor>
			<BattleStandardBearer>true</BattleStandardBearer>
		</attributes>
		<item>
			<RunicArmor>
				<RuneofStone>+1 Armor Save</RuneofStone>
			</RunicArmor>
		</item>
	</unit>
	<unit>
		<name>Dwarf Warriors</name>
		<modelcount>20</modelcount>
		<totalcost>205</totalcost>
		<basename>Dwarf Warriors</basename>
		<count>19</count>
		<base>20</base>
		<stats>
			<In>2</In>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>4+/3+</Save>
			<St>3</St>
			<To>4</To>
			<UnitSt.>1</UnitSt.>
			<WS>4</WS>
			<Wo>1</Wo>
			<At>1</At>
			<BS>3</BS>
		</stats>
		<champion>
			<basename>Veteran</basename>
			<count>1</count>
			<base>1</base>
			<stats>
				<In>2</In>
				<Ld>9</Ld>
				<Mv>3</Mv>
				<Save>4+/3+</Save>
				<St>3</St>
				<To>4</To>
				<UnitSt.>1</UnitSt.>
				<WS>4</WS>
				<Wo>1</Wo>
				<At>2</At>
				<BS>3</BS>
			</stats>
			<attributes>
				<HandWeapon>true</HandWeapon>
				<HeavyArmor>true</HeavyArmor>
				<Shield>true</Shield>
			</attributes>
		</champion>
		<attributes>
			<Musician>true</Musician>
			<StandardBearer>true</StandardBearer>
			<HandWeapon>true</HandWeapon>
			<HeavyArmor>true</HeavyArmor>
			<Shield>true</Shield>
		</attributes>
	</unit>
	<unit>
		<name>Quarellers</name>
		<modelcount>10</modelcount>
		<totalcost>110</totalcost>
		<basename>Quarrellers</basename>
		<count>10</count>
		<base>10</base>
		<stats>
			<In>2</In>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>6+</Save>
			<St>3</St>
			<To>4</To>
			<UnitSt.>1</UnitSt.>
			<WS>4</WS>
			<Wo>1</Wo>
			<At>1</At>
			<BS>3</BS>
		</stats>
		<attributes>
			<HandWeapon>true</HandWeapon>
			<Crossbow>true</Crossbow>
			<LightArmor>true</LightArmor>
		</attributes>
	</unit>
	<unit>
		<name>Quarellers</name>
		<modelcount>10</modelcount>
		<totalcost>110</totalcost>
		<basename>Quarrellers</basename>
		<count>10</count>
		<base>10</base>
		<stats>
			<In>2</In>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>6+</Save>
			<St>3</St>
			<To>4</To>
			<UnitSt.>1</UnitSt.>
			<WS>4</WS>
			<Wo>1</Wo>
			<At>1</At>
			<BS>3</BS>
		</stats>
		<attributes>
			<HandWeapon>true</HandWeapon>
			<Crossbow>true</Crossbow>
			<LightArmor>true</LightArmor>
		</attributes>
	</unit>
	<unit>
		<name>Ironbreakers</name>
		<modelcount>14</modelcount>
		<totalcost>237</totalcost>
		<basename>Ironbreakers</basename>
		<count>13</count>
		<base>14</base>
		<stats>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>3+/2+</Save>
			<St>4</St>
			<To>4</To>
			<UnitSt.>1</UnitSt.>
			<WS>5</WS>
			<Wo>1</Wo>
			<At>1</At>
			<BS>3</BS>
			<In>2</In>
		</stats>
		<champion>
			<basename>Ironbeard</basename>
			<count>1</count>
			<base>1</base>
			<stats>
				<In>2</In>
				<Ld>9</Ld>
				<Mv>3</Mv>
				<Save>3+/2+</Save>
				<St>4</St>
				<To>4</To>
				<UnitSt.>1</UnitSt.>
				<WS>5</WS>
				<Wo>1</Wo>
				<At>2</At>
				<BS>3</BS>
			</stats>
			<attributes>
				<HandWeapon>true</HandWeapon>
				<GromrilArmor>true</GromrilArmor>
				<Shield>true</Shield>
			</attributes>
		</champion>
		<attributes>
			<Musician>true</Musician>
			<StandardBearer>true</StandardBearer>
			<HandWeapon>true</HandWeapon>
			<GromrilArmor>true</GromrilArmor>
			<Shield>true</Shield>
		</attributes>
		<item>
			<RunicStandard>
				<RuneofStoicism>The unit counts as double its actual Unit Strength.</RuneofStoicism>
			</RunicStandard>
		</item>
	</unit>
	<unit>
		<name>Hammerers</name>
		<modelcount>18</modelcount>
		<totalcost>246</totalcost>
		<basename>Hammerers</basename>
		<count>17</count>
		<base>18</base>
		<stats>
			<Ld>9</Ld>
			<Mv>3</Mv>
			<Save>5+</Save>
			<St>4/6</St>
			<To>4</To>
			<UnitSt.>1</UnitSt.>
			<WS>5</WS>
			<Wo>1</Wo>
			<At>1</At>
			<BS>3</BS>
			<In>2</In>
		</stats>
		<champion>
			<basename>Gate Keeper</basename>
			<count>1</count>
			<base>1</base>
			<stats>
				<In>2</In>
				<Ld>9</Ld>
				<Mv>3</Mv>
				<Save>5+</Save>
				<St>4/6</St>
				<To>4</To>
				<UnitSt.>1</UnitSt.>
				<WS>5</WS>
				<Wo>1</Wo>
				<At>2</At>
				<BS>3</BS>
			</stats>
			<attributes>
				<HandWeapon>true</HandWeapon>
				<GreatWeapon>true</GreatWeapon>
				<HeavyArmor>true</HeavyArmor>
			</attributes>
		</champion>
		<attributes>
			<Musician>true</Musician>
			<StandardBearer>true</StandardBearer>
			<HandWeapon>true</HandWeapon>
			<GreatWeapon>true</GreatWeapon>
			<HeavyArmor>true</HeavyArmor>
			<Stubborn>true</Stubborn>
		</attributes>
	</unit>
	<unit>
		<name>Artillery Battery</name>
		<modelcount>4</modelcount>
		<totalcost>45</totalcost>
		<basename>Bolt Thrower</basename>
		<count>1</count>
		<base>1</base>
		<stats>
			<To>7</To>
			<UnitSt.>3</UnitSt.>
			<Wo>3</Wo>
		</stats>
		<crew>
			<basename>Crew</basename>
			<count>3</count>
			<base>3</base>
			<stats>
				<In>2</In>
				<Ld>9</Ld>
				<Mv>3</Mv>
				<Save>6+</Save>
				<St>3</St>
				<To>4</To>
				<WS>4</WS>
				<Wo>1</Wo>
				<At>1</At>
				<BS>3</BS>
			</stats>
			<attributes>
				<HandWeapon>true</HandWeapon>
				<LightArmor>true</LightArmor>
			</attributes>