<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="http://feeds.feedburner.com/~d/styles/rss2full.xsl" type="text/xsl" media="screen"?><?xml-stylesheet href="http://feeds.feedburner.com/~d/styles/itemcontent.css" type="text/css" media="screen"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" version="2.0">

<channel>
	<title>The Prodigal Boyfriend</title>
	
	<link>http://blog.theprodigalboyfriend.com</link>
	<description>Why are you even reading this?</description>
	<pubDate>Sat, 22 Nov 2008 04:13:39 +0000</pubDate>
	<generator>http://wordpress.org/?v=2.6.3</generator>
	<language>en</language>
			<geo:lat>44.935315</geo:lat><geo:long>-93.120493</geo:long><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" href="http://feeds.feedburner.com/TheProdigalBoyfriend" type="application/rss+xml" /><feedburner:emailServiceId xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">2707864</feedburner:emailServiceId><feedburner:feedburnerHostname xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">http://www.feedburner.com</feedburner:feedburnerHostname><item>
		<title>Importing From MySpace</title>
		<link>http://blog.theprodigalboyfriend.com/2008/11/21/importing-from-myspace/</link>
		<comments>http://blog.theprodigalboyfriend.com/2008/11/21/importing-from-myspace/#comments</comments>
		<pubDate>Sat, 22 Nov 2008 04:13:39 +0000</pubDate>
		<dc:creator>Ryan</dc:creator>
		
		<category><![CDATA[General]]></category>

		<category><![CDATA[Code]]></category>

		<category><![CDATA[Ruby]]></category>

		<category><![CDATA[WTF]]></category>

		<guid isPermaLink="false">http://blog.theprodigalboyfriend.com/?p=262</guid>
		<description><![CDATA[Given that I&#8217;ve deleted my Facebook account, and I&#8217;ll be deleting my MySpace account shortly (they never get used, and I don&#8217;t see the point), I decided to look into the possibility of importing posts from MySpace to Wordpress.  As it turns out, the Wordpress developers are apparently really ridiculously lazy, or just don&#8217;t [...]]]></description>
			<content:encoded><![CDATA[<p>Given that I&#8217;ve deleted my Facebook account, and I&#8217;ll be deleting my MySpace account shortly (they never get used, and I don&#8217;t see the point), I decided to look into the possibility of importing posts from MySpace to Wordpress.  As it turns out, the Wordpress developers are apparently really ridiculously lazy, or just don&#8217;t give a shit about importing.  Somebody had written a Perl script which pulled MySpace blogs into RSS, but bringing RSS into Wordpress doesn&#8217;t get comments with it.  </p>
<p>After looking over the formats Wordpress -could- import from, I threw out everything with [!CDATA] tags in XML (almost every format).  Fortunately, the Movable Type developers don&#8217;t see a need to dump binary blobs in XML, or use XML at all.  Their format is refreshingly simple.  Hence, a parser that runs through every blog post on somebody&#8217;s MySpace, pulls out data that matters (title, date, post, commenters and their comments), then puts those into Movable Type format.  You&#8217;ll note that I now have way more posts on here than I did before, some of those being from before this website existed.</p>
<p>Code:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby ruby" style="font-family:monospace;"><span style="color:#008000; font-style:italic;">#!/usr/bin/ruby</span>
&nbsp;
<span style="color:#CC0066; font-weight:bold;">require</span> <span style="color:#996600;">'open-uri'</span>
<span style="color:#CC0066; font-weight:bold;">require</span> <span style="color:#996600;">'time'</span>
&nbsp;
<span style="color:#9966CC; font-weight:bold;">class</span> Comment
  attr_accessor <span style="color:#ff3333; font-weight:bold;">:author</span>, <span style="color:#ff3333; font-weight:bold;">:datewritten</span>, <span style="color:#ff3333; font-weight:bold;">:comment</span>
  <span style="color:#9966CC; font-weight:bold;">def</span> initialize <span style="color:#006600; font-weight:bold;">&#40;</span>author, datewritten, comment<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#0066ff; font-weight:bold;">@author</span>, <span style="color:#0066ff; font-weight:bold;">@datewritten</span>, <span style="color:#0066ff; font-weight:bold;">@comment</span> = author, datewritten, comment
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
<span style="color:#9966CC; font-weight:bold;">class</span> Post
  attr_accessor <span style="color:#ff3333; font-weight:bold;">:author</span>, <span style="color:#ff3333; font-weight:bold;">:title</span>, <span style="color:#ff3333; font-weight:bold;">:datewritten</span>, <span style="color:#ff3333; font-weight:bold;">:body</span>, <span style="color:#ff3333; font-weight:bold;">:comments</span>
  <span style="color:#9966CC; font-weight:bold;">def</span> initialize <span style="color:#006600; font-weight:bold;">&#40;</span>author, title, datewritten, body<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#0066ff; font-weight:bold;">@author</span>, <span style="color:#0066ff; font-weight:bold;">@title</span>, <span style="color:#0066ff; font-weight:bold;">@datewritten</span>, <span style="color:#0066ff; font-weight:bold;">@body</span> = author, title, datewritten, body
    <span style="color:#0066ff; font-weight:bold;">@comments</span> = <span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006600; font-weight:bold;">&#93;</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">def</span> addcomment <span style="color:#006600; font-weight:bold;">&#40;</span>author, datewritten, comment<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#0066ff; font-weight:bold;">@comments</span>.<span style="color:#9900CC;">push</span><span style="color:#006600; font-weight:bold;">&#40;</span>Comment.<span style="color:#9900CC;">new</span><span style="color:#006600; font-weight:bold;">&#40;</span>author, datewritten, comment<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
<span style="color:#9966CC; font-weight:bold;">class</span> Ripper
  <span style="color:#9966CC; font-weight:bold;">def</span> initialize  
    <span style="color:#0066ff; font-weight:bold;">@pages</span> = <span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006600; font-weight:bold;">&#93;</span>
    <span style="color:#0066ff; font-weight:bold;">@posts</span> = <span style="color:#CC0066; font-weight:bold;">Array</span>.<span style="color:#9900CC;">new</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">def</span> get <span style="color:#006600; font-weight:bold;">&#40;</span>uri<span style="color:#006600; font-weight:bold;">&#41;</span>
    connection = <span style="color:#CC0066; font-weight:bold;">open</span><span style="color:#006600; font-weight:bold;">&#40;</span>uri<span style="color:#006600; font-weight:bold;">&#41;</span>  
    content = connection.<span style="color:#9900CC;">read</span>
    <span style="color:#0000FF; font-weight:bold;">return</span> content
  <span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
  <span style="color:#9966CC; font-weight:bold;">def</span> parse <span style="color:#006600; font-weight:bold;">&#40;</span>uri<span style="color:#006600; font-weight:bold;">&#41;</span>
    content = get<span style="color:#006600; font-weight:bold;">&#40;</span>uri<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#008000; font-style:italic;">#blogContentInfo points to links to posts</span>
    links = content.<span style="color:#9900CC;">scan</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span><span style="color:#9966CC; font-weight:bold;">class</span>=<span style="color:#996600;">&quot;blogContentInfo&quot;</span><span style="color:#006600; font-weight:bold;">&gt;</span>.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&lt;</span>a href=<span style="color:#996600;">&quot;.*?&quot;</span><span style="color:#006600; font-weight:bold;">&gt;/</span>m<span style="color:#006600; font-weight:bold;">&#41;</span>
    links.<span style="color:#9900CC;">each</span> <span style="color:#9966CC; font-weight:bold;">do</span> |link|
        <span style="color:#008000; font-style:italic;">#Strip out the bullshit amazon links</span>
        <span style="color:#9966CC; font-weight:bold;">unless</span> link =~ <span style="color:#006600; font-weight:bold;">/</span>amazon<span style="color:#006600; font-weight:bold;">/</span>
            <span style="color:#008000; font-style:italic;">#Pull the URL out of the link</span>
            link = <span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>.<span style="color:#006600; font-weight:bold;">*&lt;</span>a href=<span style="color:#996600;">&quot;(.*)&quot;</span><span style="color:#006600; font-weight:bold;">&gt;/</span>m<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">match</span><span style="color:#006600; font-weight:bold;">&#40;</span>link<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006666;">1</span><span style="color:#006600; font-weight:bold;">&#93;</span>
            <span style="color:#0066ff; font-weight:bold;">@pages</span>.<span style="color:#9900CC;">push</span><span style="color:#006600; font-weight:bold;">&#40;</span>link<span style="color:#006600; font-weight:bold;">&#41;</span>
        <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#008000; font-style:italic;">#Checking if there are any older pages with a hyperlink</span>
    <span style="color:#9966CC; font-weight:bold;">if</span> content =~ <span style="color:#006600; font-weight:bold;">/</span>\<span style="color:#006600; font-weight:bold;">&#91;</span>.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&lt;</span>a href=<span style="color:#996600;">&quot;(.*?)&quot;</span><span style="color:#006600; font-weight:bold;">&gt;</span>Older<span style="color:#006600; font-weight:bold;">&lt;</span>\<span style="color:#006600; font-weight:bold;">/</span>a<span style="color:#006600; font-weight:bold;">&gt;/</span>
      <span style="color:#008000; font-style:italic;">#If so, call itself recursively to pull out the rest</span>
      <span style="color:#008000; font-style:italic;">#Myspace breaks the URI standard.  Replace the spaces with real escapes</span>
      parse<span style="color:#006600; font-weight:bold;">&#40;</span>$1.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">/</span>, <span style="color:#996600;">&quot;%20&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#9966CC; font-weight:bold;">else</span>
      <span style="color:#008000; font-style:italic;">#Edge case to break out of the loop for when there aren't any more older</span>
      parsepages<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
  <span style="color:#9966CC; font-weight:bold;">def</span> parsepages<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#0066ff; font-weight:bold;">@pages</span>.<span style="color:#9900CC;">each</span> <span style="color:#9966CC; font-weight:bold;">do</span> |uri|
      <span style="color:#008000; font-style:italic;">#Replace with yourself, if you want</span>
      author = <span style="color:#996600;">&quot;Ryan&quot;</span>
      content = get<span style="color:#006600; font-weight:bold;">&#40;</span>uri<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#008000; font-style:italic;">#Pull out the fields I want</span>
      title = <span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>blogSubject<span style="color:#996600;">&quot;&gt;(.*?)<span style="color:#000099;">\n</span>/m).match(content)[1]
      body = (/blogContent&quot;</span><span style="color:#006600; font-weight:bold;">&gt;</span><span style="color:#006600; font-weight:bold;">&#40;</span>.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&lt;</span>table<span style="color:#006600; font-weight:bold;">/</span>m<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">match</span><span style="color:#006600; font-weight:bold;">&#40;</span>content<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006666;">1</span><span style="color:#006600; font-weight:bold;">&#93;</span>
      datewritten = <span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>blogTimeStamp<span style="color:#996600;">&quot;&gt;(.*?)&lt;<span style="color:#000099;">\/</span>p&gt;/m).match(content)[1].gsub(/(^<span style="color:#000099;">\s</span>+|<span style="color:#000099;">\n</span>+)/, &quot;</span><span style="color:#996600;">&quot;)
      time = (/blogContentInfo&quot;</span><span style="color:#006600; font-weight:bold;">&gt;&lt;</span>b<span style="color:#006600; font-weight:bold;">&gt;</span>.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&#40;</span>\d<span style="color:#006600; font-weight:bold;">+</span>:\d<span style="color:#006600; font-weight:bold;">+</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/</span>m<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">match</span><span style="color:#006600; font-weight:bold;">&#40;</span>content<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006666;">1</span><span style="color:#006600; font-weight:bold;">&#93;</span>
      datewritten = datewritten <span style="color:#006600; font-weight:bold;">+</span> <span style="color:#996600;">&quot; #{time}:00&quot;</span>
      <span style="color:#008000; font-style:italic;">#Parse the time, and force it into something Wordpress can deal with</span>
      t = <span style="color:#CC00FF; font-weight:bold;">Time</span>.<span style="color:#9900CC;">parse</span><span style="color:#006600; font-weight:bold;">&#40;</span>datewritten<span style="color:#006600; font-weight:bold;">&#41;</span>
      datewritten = t.<span style="color:#9900CC;">strftime</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;%m/%d/%Y %H:%M:%S&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Title: #{title}<span style="color:#000099;">\n</span>&quot;</span>
      <span style="color:#008000; font-style:italic;">#Create a new Post object</span>
      post = Post.<span style="color:#9900CC;">new</span><span style="color:#006600; font-weight:bold;">&#40;</span>author, title, datewritten, body<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#008000; font-style:italic;">#Pull out an array of all the comment blocks</span>
      comments = content.<span style="color:#9900CC;">scan</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>id=<span style="color:#996600;">&quot;blogComments.*?commentSpacer/m)
      #Pass off the post object along with the list of comments
      parsecomments(comments, post)
    end
  end
&nbsp;
  def parsecomments(comments, post)
    comments.each do |com|
      author = (/profileLinks&quot;</span><span style="color:#006600; font-weight:bold;">&gt;</span><span style="color:#006600; font-weight:bold;">&#40;</span>.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&lt;/</span>m<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">match</span><span style="color:#006600; font-weight:bold;">&#40;</span>com<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006666;">1</span><span style="color:#006600; font-weight:bold;">&#93;</span>
      <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Author: #{author}<span style="color:#000099;">\n</span>&quot;</span>
      <span style="color:#008000; font-style:italic;">#MySpace decided to make the CSS ids identical here, except that the</span>
      <span style="color:#008000; font-style:italic;">#actual comment doesn't have &quot;Posted&quot; after the closing tag</span>
      <span style="color:#008000; font-style:italic;">#Filter it as such</span>
      comment = <span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>blogCommentsContent<span style="color:#996600;">&quot;&gt;(.*?)&lt;<span style="color:#000099;">\/</span>p&gt;/m).match(com)[1]
      datewritten = (/blogCommentsContent&quot;</span><span style="color:#006600; font-weight:bold;">&gt;</span>Posted by.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&gt;</span> on<span style="color:#006600; font-weight:bold;">&#40;</span>.<span style="color:#006600; font-weight:bold;">*</span>?<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&lt;</span>b<span style="color:#006600; font-weight:bold;">/</span>m<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">match</span><span style="color:#006600; font-weight:bold;">&#40;</span>com<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006666;">1</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\n|\t|\r<span style="color:#006600; font-weight:bold;">/</span>, <span style="color:#996600;">&quot;&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      t = <span style="color:#CC00FF; font-weight:bold;">Time</span>.<span style="color:#9900CC;">parse</span><span style="color:#006600; font-weight:bold;">&#40;</span>datewritten<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#008000; font-style:italic;">#The same datetime munging as before</span>
      datewritten = t.<span style="color:#9900CC;">strftime</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;%m/%d/%Y %H:%M:%S&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#008000; font-style:italic;">#Commit each commment to our post object</span>
      post.<span style="color:#9900CC;">addcomment</span><span style="color:#006600; font-weight:bold;">&#40;</span>author, datewritten, comment<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#008000; font-style:italic;">#Push them all into our class array</span>
  <span style="color:#0066ff; font-weight:bold;">@posts</span>.<span style="color:#9900CC;">push</span><span style="color:#006600; font-weight:bold;">&#40;</span>post<span style="color:#006600; font-weight:bold;">&#41;</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
  <span style="color:#9966CC; font-weight:bold;">def</span> <span style="color:#CC0066; font-weight:bold;">print</span><span style="color:#006600; font-weight:bold;">&#40;</span>file<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#0066ff; font-weight:bold;">@posts</span>.<span style="color:#9900CC;">each</span> <span style="color:#9966CC; font-weight:bold;">do</span> |post|
      <span style="color:#008000; font-style:italic;">#Using Movable Type's export syntax, so I don't need to mess with XML</span>
      <span style="color:#008000; font-style:italic;">#It's documented here: http://www.sixapart.com/moveabletype/docs/mtimport#example</span>
      <span style="color:#008000; font-style:italic;">#Basically, 5 hyphens separates the categories</span>
      <span style="color:#008000; font-style:italic;">#Eight hyphens separate each post</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;TITLE: #{post.title}&quot;</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;AUTHOR: #{post.author}&quot;</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;DATE: #{post.datewritten}&quot;</span>
      <span style="color:#008000; font-style:italic;">#Change this, too, if you want</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;CATEGORY: MySpace&quot;</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;-----&quot;</span>
      <span style="color:#008000; font-style:italic;">#Get rid of empty lines and fucking Windows ^M newlines, plus convert &amp;nbsp; to &quot; &quot;</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;BODY:<span style="color:#000099;">\n</span>#{post.body.gsub(/^(<span style="color:#000099;">\s</span>+|<span style="color:#000099;">\t</span>+|<span style="color:#000099;">\n</span>+)$/, &quot;</span><span style="color:#996600;">&quot;).gsub(/<span style="color:#000099;">\0</span>15/, &quot;</span><span style="color:#996600;">&quot;).gsub(/&amp;nbsp;/, &quot;</span> <span style="color:#996600;">&quot;)}&quot;</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;-----&quot;</span>
      post.<span style="color:#9900CC;">comments</span>.<span style="color:#9900CC;">each</span> <span style="color:#9966CC; font-weight:bold;">do</span> |com|
        <span style="color:#008000; font-style:italic;">#More stuff is possible here, but isn't necessary</span>
        file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;COMMENT:&quot;</span>
        file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;AUTHOR: #{com.author}&quot;</span>
        file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;DATE: #{com.datewritten}&quot;</span>
        file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;#{com.comment}&quot;</span>
        file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;-----&quot;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
      file.<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;--------&quot;</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
<span style="color:#008000; font-style:italic;">#Instantiate it</span>
ripper = Ripper.<span style="color:#9900CC;">new</span>
<span style="color:#008000; font-style:italic;">#Parse my blog (substitute whatever yours is here)</span>
ripper.<span style="color:#9900CC;">parse</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;http://blog.myspace.com/lykurgos&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#008000; font-style:italic;">#Output it</span>
&nbsp;
output = <span style="color:#CC00FF; font-weight:bold;">File</span>.<span style="color:#CC0066; font-weight:bold;">open</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;posts.txt&quot;</span>, <span style="color:#996600;">&quot;a&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
ripper.<span style="color:#CC0066; font-weight:bold;">print</span><span style="color:#006600; font-weight:bold;">&#40;</span>output<span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Done!<span style="color:#000099;">\n</span>&quot;</span>
<span style="color:#008000; font-style:italic;">#Import into Wordpress!</span></pre></div></div>

<p>Maybe somebody will actually find it useful.</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=5O4cN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=5O4cN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=Ml2Qn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=Ml2Qn" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=34h2n"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=34h2n" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=mQB6N"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=mQB6N" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=2vUEn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=2vUEn" border="0"></img></a>
</div>]]></content:encoded>
			<wfw:commentRss>http://blog.theprodigalboyfriend.com/2008/11/21/importing-from-myspace/feed/</wfw:commentRss>
		</item>
		<item>
		<title>Real World Regexes</title>
		<link>http://blog.theprodigalboyfriend.com/2008/10/08/real-world-regexes/</link>
		<comments>http://blog.theprodigalboyfriend.com/2008/10/08/real-world-regexes/#comments</comments>
		<pubDate>Thu, 09 Oct 2008 04:18:53 +0000</pubDate>
		<dc:creator>Ryan</dc:creator>
		
		<category><![CDATA[General]]></category>

		<category><![CDATA[c#]]></category>

		<category><![CDATA[Code]]></category>

		<category><![CDATA[Perl]]></category>

		<category><![CDATA[Python]]></category>

		<category><![CDATA[Ruby]]></category>

		<guid isPermaLink="false">http://blog.theprodigalboyfriend.com/?p=70</guid>
		<description><![CDATA[Dan mentioned that he wasn&#8217;t that knowledgeable about regular expressions (a topic I am intimately familiar with), so I figured I&#8217;d put up some examples from code I&#8217;ve actually written, along with the text they&#8217;re actually supposed to match.
To begin with, here are the general rules for regexes.  To begin with, &#8220;operator&#8221; refers to [...]]]></description>
			<content:encoded><![CDATA[<p>Dan mentioned that he wasn&#8217;t that knowledgeable about regular expressions (a topic I am intimately familiar with), so I figured I&#8217;d put up some examples from code I&#8217;ve actually written, along with the text they&#8217;re actually supposed to match.</p>
<p>To begin with, here are the general rules for regexes.  To begin with, &#8220;operator&#8221; refers to any of these (so \s+, [A-Z], (Word), etc).  Greedy means it&#8217;ll continue matching as far as possible, and if the operator/character you want to match occurs more than once in the string, it&#8217;ll eat the first one and only stop matching at the last one.</p>
<p>.   Match any character<br />
\w  Match &#8220;word&#8221; character (alphanumeric plus &#8220;_&#8221;)<br />
\W  Match non-word character<br />
\s  Match whitespace character<br />
\S  Match non-whitespace character<br />
\d  Match digit character<br />
\D  Match non-digit character<br />
\t  Match tab<br />
\n  Match newline<br />
\r  Match return<br />
\f  Match formfeed<br />
\a  Match alarm (bell, beep, etc)<br />
\e  Match escape<br />
^  Beginning of the line<br />
$ End of the line<br />
+ matches the preceding operator one or more times (greedy)<br />
* matches the preceding operator zero or more times (greedy)<br />
? matches the preceding operator once if it exists, but it doesn&#8217;t have to be there.  Mostly used to stop greedy operators (*? or +?, for instance) at the match you want.<br />
() is used for grouping (either to use later as a backreference or to exclude)<br />
(?&lt;name&gt;) (or (?P&lt;name&gt;) in Python and maybe others) is used for a named backreference.  There&#8217;ll be some examples of that.<br />
| is used as a logical or<br />
{n} is used to match the preceding character n times<br />
{n, m} matches n to m times<br />
{n,} matches 1 or more times (may as well use +)<br />
[A-Za-z] is used to match whatever is in the middle, but it only counts as one character (so [A-Za-z] would match any of those characters ONCE.  Useful if you want [a-f] or [0-5]+ or something).<br />
[^] is used to exclude things.  [^word] excludes &#8220;w&#8221;, but the caret only matches ONCE (this can be chained as [^(word)], since groups count as a single operator.</p>
<p>Sound confusing?  It is, which is why I&#8217;ll put up real examples.  FYI, these are PCRE (Perl Compatible Regular Expressions) rather than SCRE (Sed Compatible Regular Expressions), but Dan&#8217;ll almost certainly never use sed compatible (which doesn&#8217;t have a ? operator, among other things).</p>
<p>Using a backreference later depends on the language.  .NET uses ${n} where n is the reference number (note that they start from 1, as the entire string you matched is ${0}), Perl (and a lot of others) us $n, Ruby uses \1 (as does Python, but Python {like .NET} needs an operator in front to use a raw string {.NET is @, Python is r}, otherwise it&#8217;s \\1).  Language reference is your best bet here.</p>
<p>First example.</p>
<pre>(Oct6 0423z) Dec4100: C, was acknowledged by, ek</pre>

<div class="wp_syntax"><div class="code"><pre class="csharp csharp" style="font-family:monospace;"><span style="color: #FF0000;">string</span> regexPattern <span style="color: #008000;">=</span> <span style="">@&quot;.*?\)\s
                      (?&lt;system&gt;\S+?)
                      :\s
                      (?&lt;tape&gt;\w)
                      .*,\s
                      (?&lt;initials&gt;.*)&quot;</span>;
Regex re <span style="color: #008000;">=</span> <span style="color: #008000;">new</span> Regex<span style="color: #000000;">&#40;</span>regexPattern, RegexOptions.<span style="color: #0000FF;">ExplicitCapture</span><span style="color: #000000;">&#41;</span>;</pre></div></div>

<p>It eats everything up until the right parenthesis (escaped so the regex parser doesn&#8217;t try to interpret it) followed by a space, then it gets all non-whitespace characters until the colon as the system name.  Ignores the colon and a space, then grabs all word characters ([A-Z0-9_]) as the tape number.  Ignores zero or more matches of any character (the &#8220;.&#8221;) until it finds a comma followed by a space, then yanks the rest of the line as the initials.</p>
<p>C is the tape name.</p>
<p>ek are the initials.</p>
<p>This means Dec4100 is available as ${system} (if doing Regex.Replace) or m.Groups["system"] if you matched the regex with m = Regex.Match(logfilestring, re);  </p>
<p>Another example:</p>

<div class="wp_syntax"><div class="code"><pre class="html" style="font-family:monospace;">	&lt;form action=&quot;http://www.climate.weatheroffice.ec.gc.ca/climateData/Interform.cfm&quot; method=&quot;post&quot; name=&quot;stnRequest1&quot;&gt;
		&lt;input type=&quot;Hidden&quot; name=&quot;hlyRange&quot; value=&quot;N/A&quot;&gt;
		&lt;input type=&quot;Hidden&quot; name=&quot;dlyRange&quot; value=&quot;1998-4-1|2007-11-30&quot;&gt;
		&lt;input type=&quot;Hidden&quot; name=&quot;mlyRange&quot; value=&quot;1998-4-1|2007-11-1&quot;&gt;
		&lt;input type=&quot;Hidden&quot; name=&quot;StationID&quot; value=&quot;10700&quot;&gt;
		&lt;input type=&quot;Hidden&quot; name=&quot;prov&quot; value=&quot;CA&quot;&gt;
		&lt;input type=&quot;Hidden&quot; name=&quot;urlExtension&quot; value=&quot;_e.html&quot;&gt;
	&lt;tr id=&quot;dataTableOddRow&quot;&gt;
		&lt;td id=&quot;dataTableRowHeader&quot;&gt;(AE) BOW SUMMIT&lt;/td&gt;
		&lt;td id=&quot;dataTableRowHeader&quot;&gt;&lt;abbr title=&quot;ALBERTA&quot;&gt;ALTA&lt;/abbr&gt;&lt;/td&gt;
		&lt;td&gt;
			&lt;select name=&quot;timeframe&quot; size=&quot;1&quot; class=&quot;formElement75w&quot; onChange=&quot;elementChange(document.stnRequest1,1)&quot;&gt;
	&lt;option value=&quot;2&quot;&gt;Daily&lt;/option&gt;&lt;option value=&quot;3&quot;&gt;Monthly&lt;/option&gt;&lt;option value=&quot;4&quot;&gt;Almanac&lt;/option&gt;
			&lt;/select&gt;
		&lt;/td&gt;
	&lt;td&gt;
	&lt;select name=&quot;day&quot; size=&quot;1&quot; class=&quot;formElement&quot; disabled&gt;&lt;option value=&quot;1&quot; &gt;1&lt;/option&gt;&lt;option value=&quot;2&quot; &gt;2&lt;/option&gt;&lt;option value=&quot;3&quot; &gt;3&lt;/option&gt;&lt;option value=&quot;4&quot; &gt;4&lt;/option&gt;&lt;option value=&quot;5&quot; &gt;5&lt;/option&gt;&lt;option value=&quot;6&quot; &gt;6&lt;/option&gt;&lt;option value=&quot;7&quot; &gt;7&lt;/option&gt;&lt;option value=&quot;8&quot; &gt;8&lt;/option&gt;&lt;option value=&quot;9&quot; &gt;9&lt;/option&gt;&lt;option value=&quot;10&quot; &gt;10&lt;/option&gt;&lt;option value=&quot;11&quot; &gt;11&lt;/option&gt;&lt;option value=&quot;12&quot; &gt;12&lt;/option&gt;&lt;option value=&quot;13&quot; &gt;13&lt;/option&gt;&lt;option value=&quot;14&quot; &gt;14&lt;/option&gt;&lt;option value=&quot;15&quot; &gt;15&lt;/option&gt;&lt;option value=&quot;16&quot; &gt;16&lt;/option&gt;&lt;option value=&quot;17&quot; &gt;17&lt;/option&gt;&lt;option value=&quot;18&quot; &gt;18&lt;/option&gt;&lt;option value=&quot;19&quot; &gt;19&lt;/option&gt;&lt;option value=&quot;20&quot; &gt;20&lt;/option&gt;&lt;option value=&quot;21&quot; &gt;21&lt;/option&gt;&lt;option value=&quot;22&quot; &gt;22&lt;/option&gt;&lt;option value=&quot;23&quot; &gt;23&lt;/option&gt;&lt;option value=&quot;24&quot; &gt;24&lt;/option&gt;&lt;option value=&quot;25&quot; &gt;25&lt;/option&gt;&lt;option value=&quot;26&quot; &gt;26&lt;/option&gt;&lt;option value=&quot;27&quot; &gt;27&lt;/option&gt;&lt;option value=&quot;28&quot; &gt;28&lt;/option&gt;&lt;option value=&quot;29&quot; &gt;29&lt;/option&gt;&lt;option value=&quot;30&quot; Selected&gt;30&lt;/option&gt;&lt;option value=&quot;31&quot; &gt;31&lt;/option&gt;
		&lt;/select&gt;
	&lt;/td&gt;
	&lt;td&gt;
	&lt;select name=&quot;month&quot; size=&quot;1&quot; class=&quot;formElement&quot; onChange=&quot;elementChange(document.stnRequest1,1)&quot; &gt;&lt;option value=&quot;1&quot; &gt;Jan&lt;/option&gt;&lt;option value=&quot;2&quot; &gt;Feb&lt;/option&gt;&lt;option value=&quot;3&quot; &gt;Mar&lt;/option&gt;&lt;option value=&quot;4&quot; &gt;Apr&lt;/option&gt;&lt;option value=&quot;5&quot; &gt;May&lt;/option&gt;&lt;option value=&quot;6&quot; &gt;Jun&lt;/option&gt;&lt;option value=&quot;7&quot; &gt;Jul&lt;/option&gt;&lt;option value=&quot;8&quot; &gt;Aug&lt;/option&gt;&lt;option value=&quot;9&quot; &gt;Sep&lt;/option&gt;&lt;option value=&quot;10&quot; &gt;Oct&lt;/option&gt;&lt;option value=&quot;11&quot; Selected&gt;Nov&lt;/option&gt;&lt;option value=&quot;12&quot; &gt;Dec&lt;/option&gt;
		&lt;/select&gt;
	&lt;/td&gt;
	&lt;td&gt;
	&lt;select name=&quot;year&quot; size=&quot;1&quot; class=&quot;formElement&quot; onChange=&quot;elementChange(document.stnRequest1,1)&quot;&gt;&lt;option value=&quot;1998&quot; &gt;1998&lt;/option&gt;&lt;option value=&quot;1999&quot; &gt;1999&lt;/option&gt;&lt;option value=&quot;2000&quot; &gt;2000&lt;/option&gt;&lt;option value=&quot;2001&quot; &gt;2001&lt;/option&gt;&lt;option value=&quot;2002&quot; &gt;2002&lt;/option&gt;&lt;option value=&quot;2003&quot; &gt;2003&lt;/option&gt;&lt;option value=&quot;2004&quot; &gt;2004&lt;/option&gt;&lt;option value=&quot;2005&quot; &gt;2005&lt;/option&gt;&lt;option value=&quot;2006&quot; &gt;2006&lt;/option&gt;&lt;option value=&quot;2007&quot; Selected&gt;2007&lt;/option&gt;
	&lt;/select&gt;
	&lt;/td&gt;
	&lt;td&gt;
	&lt;input type=&quot;submit&quot; name=&quot;stnSubmit&quot; value=&quot;Go&quot; class=&quot;formElement&quot;&gt;
&lt;/td&gt;
&lt;/form&gt;</pre></div></div>

<p>And the parser:</p>

<div class="wp_syntax"><div class="code"><pre class="perl perl" style="font-family:monospace;"><span style="color: #b1b100;">if</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$chunk</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/.*StationID.*?&quot;(\d+)&quot;.*?prov.*?&quot;(\w+).*?TableRowHeader&quot;&gt;(.*?)&lt;.*abbr title.*?&gt;(\w+).*?/s</span><span style="color: #009900;">&#41;</span> <span style="color: #009900;">&#123;</span>
     <span style="color: #b1b100;">my</span> <span style="color: #0000ff;">$stationid</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
     <span style="color: #b1b100;">my</span> <span style="color: #0000ff;">$province</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$2</span>;
     <span style="color: #b1b100;">my</span> <span style="color: #0000ff;">$name</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$3</span>;
     <span style="color: #b1b100;">my</span> <span style="color: #0000ff;">$abbrprov</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$4</span>;
<span style="color: #009900;">&#125;</span></pre></div></div>

<p>This is a multi-line regex (hence the //s, like //g is global, //i is case insensitive, //gi is both g and i, etc), and a good example of non-greedy matching.  It snags everything up until StationId, then the next quotation mark followed by numbers, and captures those numbers. It comes out as &#8220;10700&#8243;.</p>
<p>Does the same thing following &#8220;prov&#8221; up until the next word characters in quotation marks, and captures those.  As .* rather than .*?, it would have grabbed &#8220;data&#8221;, which precedes TableRowHeader (inside the same parenthesis).  Comes out as &#8220;CA&#8221;.</p>
<p>Grabs everything from TableRowHeader&#8221;&gt; until the next &lt; Comes out as &#8220;(AE) Bow Summit&#8221;.</p>
<p>Drops everything up until the next &lt; after &#8220;abbr title&#8221;, then captures all word characters.  &#8220;ALBA&#8221;</p>
<p>These are all assigned to variables via backreferences.  $1, $2, $3, $4 are the groups in order.  It&#8217;s worth noting that (at least in .NET), named backreferences are assigned numbers BEFORE regular backreferences.  So (?&lt;a&gt;a)(b)(?&lt;c&gt;c)(d) would be acbd as ${0}${1}${2}${3}.  </p>
<p>Another example:</p>
<pre>04:26:23 [2] Error creating WLAAAP06.FS8 = 1 : Unrecognized KGFXENG Error Code</pre>
<p>And the parser:</p>

<div class="wp_syntax"><div class="code"><pre class="python python" style="font-family:monospace;"><span style="color: #dc143c;">re</span>.<span style="color: black;">match</span><span style="color: black;">&#40;</span>line, r<span style="color: #483d8b;">'^(?P&lt;time&gt;.*?)<span style="color: #000099; font-weight: bold;">\s</span>+<span style="color: #000099; font-weight: bold;">\[</span>(?P&lt;engine&gt;<span style="color: #000099; font-weight: bold;">\d</span>+)<span style="color: #000099; font-weight: bold;">\]</span><span style="color: #000099; font-weight: bold;">\s</span>+(?P&lt;error&gt;.*?(KGFXENG|LeadTools).*)'</span></pre></div></div>

<p>Grabs everything from the beginning of the line until the first space as &#8220;time&#8221;.  Comes out as &#8220;04:26:23&#8243;.</p>
<p>Then skips whitespace and a bracket (escaped with \[) and grabs one or more numbers (\d+) as "engine".  Comes out as "2", of course.  Skips a space, then captures anything which contains "KGFXENG" or "LeadTools" as "error".  Basically, the rest of the line.</p>
<p>This line, for instance, wouldn't match, and nothing in the regex would be captured:</p>
<pre>00:15:18 [1] Error producing WPATAZ00.FSD = F088 : Error while saving the graphic</pre>
<p>These are used later with this:</p>

<div class="wp_syntax"><div class="code"><pre class="python python" style="font-family:monospace;">message = <span style="color: #483d8b;">&quot;ERROR: %s %s: %s&quot;</span> <span style="color: #66cc66;">%</span> <span style="color: black;">&#40;</span><span style="color: #dc143c;">re</span>.<span style="color: black;">sub</span><span style="color: black;">&#40;</span>r<span style="color: #483d8b;">'.*?([A-Za-z]+Engine[A-Za-z]*?)(Errors)?.*'</span>, r<span style="color: #483d8b;">'<span style="color: #000099; font-weight: bold;">\1</span>'</span>, 
                         logfilename<span style="color: black;">&#41;</span>, 
                         engine, 
                         match.<span style="color: black;">group</span><span style="color: black;">&#40;</span><span style="color: #483d8b;">'error'</span><span style="color: black;">&#41;</span><span style="color: black;">&#41;</span></pre></div></div>

<p>&#8220;logfilename&#8221; is something like &#8220;2008_Oct_07__ProductEngineErrors.log&#8221;.  This grabs everything up until A through Z (uppercase or lowercase) one or more times followed by Engine, optionally followed by something else (*, though ? would have worked if I said r&#8217;Engine([A-Za-z]+)?&#8217;).  It stops on Errors, if it exists (the question mark afterwards), and replaces the entire name with the first backreference (&#8221;ProductEngine&#8221; in this case).</p>
<p>Last example is a nested bitch of increasingly complicated rules:</p>

<div class="wp_syntax"><div class="code"><pre class="perl perl" style="font-family:monospace;"><span style="color: #666666; font-style: italic;">#Match plain ol' timezones</span>
<span style="color: #b1b100;">if</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^\[(\w+)\](.*)/</span><span style="color: #009900;">&#41;</span>
<span style="color: #009900;">&#123;</span>
	<span style="color: #0000ff;">$DateZone</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
	<span style="color: #0000ff;">$newname</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$2</span>;
<span style="color: #009900;">&#125;</span>
<span style="color: #666666; font-style: italic;">#Match timezones with a day modification, and grab that along with the +/-</span>
<span style="color: #b1b100;">elsif</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^\[(\w+)(\S\d+)\](.*)/</span><span style="color: #009900;">&#41;</span>
<span style="color: #009900;">&#123;</span>
	<span style="color: #0000ff;">$DateZone</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
	<span style="color: #0000ff;">$TempDay2</span> <span style="color: #339933;">=</span> ONE_DAY <span style="color: #339933;">*</span> <span style="color: #0000ff;">$2</span>;
	<span style="color: #0000ff;">$newname</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$3</span>;
<span style="color: #009900;">&#125;</span>
<span style="color: #666666; font-style: italic;">#Check for a delete flag</span>
<span style="color: #b1b100;">elsif</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^(\d)\[.*/</span><span style="color: #009900;">&#41;</span>
<span style="color: #009900;">&#123;</span>
	<span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
	<span style="color: #666666; font-style: italic;">#If the status is one, we want to capture everything after the timezone as the DeleteName</span>
	<span style="color: #b1b100;">if</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">==</span> <span style="color: #cc66cc;">1</span><span style="color: #009900;">&#41;</span>
	<span style="color: #009900;">&#123;</span>
		<span style="color: #b1b100;">if</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^(\d)\[(\w+)\](.*)/</span><span style="color: #009900;">&#41;</span>
		<span style="color: #009900;">&#123;</span>
			<span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
			<span style="color: #0000ff;">$DateZone</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$2</span>;
			<span style="color: #0000ff;">$DeleteFilesNames</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$3</span>;
			<span style="color: #0000ff;">$newname</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$3</span>;
		<span style="color: #009900;">&#125;</span>
		<span style="color: #b1b100;">elsif</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^(\d)\[(\w+)(\S\d+)\](.*)/</span><span style="color: #009900;">&#41;</span>
		<span style="color: #009900;">&#123;</span>
			<span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
			<span style="color: #0000ff;">$DateZone</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$2</span>;
			<span style="color: #0000ff;">$TempDay2</span> <span style="color: #339933;">=</span> ONE_DAY <span style="color: #339933;">*</span> <span style="color: #0000ff;">$3</span>;
			<span style="color: #0000ff;">$DeleteFilesNames</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$4</span>;
			<span style="color: #0000ff;">$newname</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$4</span>;
		<span style="color: #009900;">&#125;</span>
	<span style="color: #009900;">&#125;</span>
	<span style="color: #666666; font-style: italic;">#Otherwise, the DeleteName is in more brackets</span>
	<span style="color: #b1b100;">elsif</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">==</span> <span style="color: #cc66cc;">2</span><span style="color: #009900;">&#41;</span>
	<span style="color: #009900;">&#123;</span>
                <span style="color: #666666; font-style: italic;">#Grab it all, but without a time modification</span>
		<span style="color: #b1b100;">if</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^(\d)\[(\w+)\]\[(.*\.\w+)\](.*)/</span><span style="color: #009900;">&#41;</span>
		<span style="color: #009900;">&#123;</span>
			<span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
			<span style="color: #0000ff;">$DateZone</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$2</span>;
			<span style="color: #0000ff;">$DeleteFilesNames</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$3</span>;
			<span style="color: #0000ff;">$newname</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$4</span>;
		<span style="color: #009900;">&#125;</span>
                <span style="color: #666666; font-style: italic;">#Grab it with a time modification</span>
		<span style="color: #b1b100;">elsif</span> <span style="color: #009900;">&#40;</span><span style="color: #0000ff;">$brpos</span> <span style="color: #339933;">=~</span> <span style="color: #009966; font-style: italic;">/^(\d)\[(\w+)(\S\d+)\]\[(.*\.\w+)\](.*)/</span><span style="color: #009900;">&#41;</span>
		<span style="color: #009900;">&#123;</span>
			<span style="color: #0000ff;">$DeleteFilesStatus</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$1</span>;
			<span style="color: #0000ff;">$DateZone</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$2</span>;
			<span style="color: #0000ff;">$TempDay2</span> <span style="color: #339933;">=</span> ONE_DAY <span style="color: #339933;">*</span> <span style="color: #0000ff;">$3</span>;
			<span style="color: #0000ff;">$DeleteFilesNames</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$4</span>;
			<span style="color: #0000ff;">$newname</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">$5</span>;
		<span style="color: #009900;">&#125;</span>
	<span style="color: #009900;">&#125;</span>
<span style="color: #009900;">&#125;</span></pre></div></div>

<p>Examples of what I&#8217;m catching (hopefully in order).  The stuff in brackets later is filled in for date/time stamps:</p>
<pre>[EDT]DOV-F-[MM][dd][yy][hh].csv
[CST-1][MM][dd].act
1[PDT]Actual[yy][MM][dd][hh][mm].csv
1[EST-3]KLGA[yy][MM][dd].mtx
2[EDT][WBD*.txt]WBD[yy][MM][dd]05.txt
2[MST+2][WSM*.txt]WBD[yyyy][MM].txt</pre>
<p>Sadly, I&#8217;m out of work for the night, but these matches aren&#8217;t that complicated.  Lots of escaping brackets, and use of the \S character to match &#8220;-&#8221; or &#8220;+&#8221;, then grabbing the rest of them.  I may write more tomorrow&#8230;</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=GtICN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=GtICN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=Gf1yn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=Gf1yn" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=be6an"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=be6an" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=XlCwN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=XlCwN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=o8ozn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=o8ozn" border="0"></img></a>
</div>]]></content:encoded>
			<wfw:commentRss>http://blog.theprodigalboyfriend.com/2008/10/08/real-world-regexes/feed/</wfw:commentRss>
		</item>
		<item>
		<title>RosterParserFixed.XmlParser parser = new RosterParserFixed.XmlParser()</title>
		<link>http://blog.theprodigalboyfriend.com/2008/10/08/rosterparserfixedxmlparser-parser-new-rosterparserfixedxmlparser/</link>
		<comments>http://blog.theprodigalboyfriend.com/2008/10/08/rosterparserfixedxmlparser-parser-new-rosterparserfixedxmlparser/#comments</comments>
		<pubDate>Thu, 09 Oct 2008 02:44:41 +0000</pubDate>
		<dc:creator>Ryan</dc:creator>
		
		<category><![CDATA[General]]></category>

		<category><![CDATA[c#]]></category>

		<category><![CDATA[Code]]></category>

		<category><![CDATA[xml]]></category>

		<guid isPermaLink="false">http://blog.theprodigalboyfriend.com/?p=68</guid>
		<description><![CDATA[Fixed the nesting problem.  Fixed item parsing.  Item stats for nested ones units show up now.  As with the Ruby parser, throw different combinations at it and see what happens.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Serialization;
using System.Xml.Schema;
using System.Xml.XPath;
&#160;
namespace abparser
&#123;
    class Program
    &#123;
    [...]]]></description>
			<content:encoded><![CDATA[<p>Fixed the nesting problem.  Fixed item parsing.  Item stats for nested ones units show up now.  As with the Ruby parser, throw different combinations at it and see what happens.</p>

<div class="wp_syntax"><table><tr><td class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
</pre></td><td class="code"><pre class="csharp csharp" style="font-family:monospace;"><span style="color: #0600FF;">using</span> <span style="color: #008080;">System</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Collections.Generic</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Text</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.IO</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Text.RegularExpressions</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Xml</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Xml.Serialization</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Xml.Schema</span>;
<span style="color: #0600FF;">using</span> <span style="color: #008080;">System.Xml.XPath</span>;
&nbsp;
<span style="color: #0600FF;">namespace</span> abparser
<span style="color: #000000;">&#123;</span>
    <span style="color: #FF0000;">class</span> Program
    <span style="color: #000000;">&#123;</span>
        <span style="color: #0600FF;">static</span> <span style="color: #0600FF;">void</span> Main<span style="color: #000000;">&#40;</span><span style="color: #FF0000;">string</span><span style="color: #000000;">&#91;</span><span style="color: #000000;">&#93;</span> args<span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
            RosterParserTest.<span style="color: #0000FF;">XmlParser</span> parser <span style="color: #008000;">=</span> <span style="color: #008000;">new</span> RosterParserTest.<span style="color: #0000FF;">XmlParser</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
            parser.<span style="color: #0000FF;">ParseRoster</span><span style="color: #000000;">&#40;</span><span style="">@&quot;C:\Temp\de7th.rst&quot;</span>, <span style="">@&quot;C:\Temp\output.xml&quot;</span><span style="color: #000000;">&#41;</span>;
            Console.<span style="color: #0000FF;">ReadLine</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
        <span style="color: #000000;">&#125;</span>
    <span style="color: #000000;">&#125;</span>
<span style="color: #000000;">&#125;</span>
&nbsp;
&nbsp;
&nbsp;
<span style="color: #0600FF;">namespace</span> RosterParserTest
<span style="color: #000000;">&#123;</span>
    <span style="color: #FF0000;">class</span> XmlParser
    <span style="color: #000000;">&#123;</span>
        <span style="color: #0600FF;">static</span> XmlDocument Roster <span style="color: #008000;">=</span> <span style="color: #008000;">new</span> XmlDocument<span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
        <span style="color: #0600FF;">static</span> XmlElement rootElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;&quot;</span>, <span style="color: #666666;">&quot;Army&quot;</span>, <span style="color: #666666;">&quot;&quot;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
        <span style="color: #0600FF;">public</span> <span style="color: #0600FF;">static</span> <span style="color: #FF0000;">string</span> RemoveWhitespace<span style="color: #000000;">&#40;</span><span style="color: #FF0000;">string</span> str<span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
            <span style="color: #0600FF;">try</span>
            <span style="color: #000000;">&#123;</span>
                <span style="color: #008080; font-style: italic;">//Ryan's Regex</span>
                <span style="color: #0600FF;">return</span> <span style="color: #008000;">new</span> Regex<span style="color: #000000;">&#40;</span><span style="">@&quot;(\s+|\{.*?\}|\(.*?\)|\/+|\.+)&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">Replace</span><span style="color: #000000;">&#40;</span>str, <span style="color: #FF0000;">String</span>.<span style="color: #0000FF;">Empty</span><span style="color: #000000;">&#41;</span>;
            <span style="color: #000000;">&#125;</span>
            <span style="color: #0600FF;">catch</span> <span style="color: #000000;">&#40;</span>Exception<span style="color: #000000;">&#41;</span>
            <span style="color: #000000;">&#123;</span>
                <span style="color: #0600FF;">return</span> str;
            <span style="color: #000000;">&#125;</span>
    <span style="color: #000000;">&#125;</span>
&nbsp;
        <span style="color: #0600FF;">public</span> <span style="color: #0600FF;">void</span> ParseNestedXML<span style="color: #000000;">&#40;</span>XmlElement thisElement, XmlElement rosterElement<span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
            <span style="color: #FF0000;">bool</span> linkUnitStatsDone <span style="color: #008000;">=</span> false; <span style="color: #008080; font-style: italic;">//This is a dirty hack.</span>
&nbsp;
            <span style="color: #FF0000;">string</span> replaceMe <span style="color: #008000;">=</span> thisElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
            replaceMe <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>replaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
            XmlElement baseElement;
&nbsp;
            XmlNodeList linkUnitStatNodeList <span style="color: #008000;">=</span> thisElement.<span style="color: #0000FF;">SelectNodes</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;./link | ./unitstat&quot;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
            <span style="color: #008080; font-style: italic;">/*Grab the last of the PascalCase names.  HarGanethExecutioners becomes Executioners
             * SupremeSorceress becomes Sorceress, etc.  Replace the rest of the name with a backreference */</span>
            <span style="color: #FF0000;">string</span> regexMatcher <span style="color: #008000;">=</span> Regex.<span style="color: #0000FF;">Replace</span><span style="color: #000000;">&#40;</span>replaceMe, <span style="">@&quot;.*?([A-Z][a-z]+)$&quot;</span>, <span style="color: #666666;">&quot;${1}&quot;</span><span style="color: #000000;">&#41;</span>; 
&nbsp;
            <span style="color: #008080; font-style: italic;">//This way, it'll actually parse the NodeList for stats in nested things.</span>
            <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>Regex.<span style="color: #0000FF;">IsMatch</span><span style="color: #000000;">&#40;</span>rosterElement.<span style="color: #0000FF;">Name</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>, regexMatcher<span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span> <span style="color: #008080; font-style: italic;">//So that I don't get duplicate empty nodes.</span>
            <span style="color: #000000;">&#123;</span>
                baseElement <span style="color: #008000;">=</span> rosterElement; <span style="color: #008080; font-style: italic;">//Adding to the previous node in the tree.</span>
&nbsp;
                <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement parseElement <span style="color: #0600FF;">in</span> thisElement<span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
                        <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>parseElement.<span style="color: #0000FF;">HasChildNodes</span> <span style="color: #008000;">&amp;&amp;</span> parseElement.<span style="color: #0000FF;">InnerXml</span>.<span style="color: #0000FF;">Contains</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;entity&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            ParseNestedXML<span style="color: #000000;">&#40;</span>parseElement, baseElement<span style="color: #000000;">&#41;</span>; <span style="color: #008080; font-style: italic;">//Parsing out nested.</span>
                        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//if (parseElement.HasChildNodes &amp;&amp; parseElement.InnerXml.Contains(&quot;entity&quot;))</span>
                        <span style="color: #0600FF;">else</span> <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span><span style="color: #008000;">!</span>linkUnitStatsDone<span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            ParseLinkUnitStats<span style="color: #000000;">&#40;</span>linkUnitStatNodeList, baseElement<span style="color: #000000;">&#41;</span>;
                            linkUnitStatsDone <span style="color: #008000;">=</span> true; <span style="color: #008080; font-style: italic;">//Hack implemented.</span>
                        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else if (!linkUnitStatsDone)</span>
                    <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//foreach (XmlElement parseElement in thisElement)</span>
            <span style="color: #000000;">&#125;</span>
            <span style="color: #0600FF;">else</span>
            <span style="color: #000000;">&#123;</span>
                baseElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span>replaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
                <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement parseElement <span style="color: #0600FF;">in</span> thisElement<span style="color: #000000;">&#41;</span>
                <span style="color: #000000;">&#123;</span>
                     <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>parseElement.<span style="color: #0000FF;">HasChildNodes</span> <span style="color: #008000;">&amp;&amp;</span> parseElement.<span style="color: #0000FF;">InnerXml</span>.<span style="color: #0000FF;">Contains</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;entity&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
                        ParseNestedXML<span style="color: #000000;">&#40;</span>parseElement, baseElement<span style="color: #000000;">&#41;</span>; <span style="color: #008080; font-style: italic;">//Whee recursion.</span>
                    <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//if (parseElement.HasChildNodes &amp;&amp; parseElement.InnerXml.Contains(&quot;entity&quot;))</span>
                    <span style="color: #0600FF;">else</span>
                    <span style="color: #000000;">&#123;</span>
                        ParseLinkUnitStats<span style="color: #000000;">&#40;</span>parseElement, baseElement<span style="color: #000000;">&#41;</span>; <span style="color: #008080; font-style: italic;">//This has always worked.</span>
                    <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else</span>
&nbsp;
                    rosterElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>baseElement<span style="color: #000000;">&#41;</span>; <span style="color: #008080; font-style: italic;">//Add to the local node.</span>
&nbsp;
                    rootElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>rosterElement<span style="color: #000000;">&#41;</span>; <span style="color: #008080; font-style: italic;">//Add to the Army node.</span>
                <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//foreach (XmlElement parseElement in thisElement)</span>
&nbsp;
            <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else</span>
&nbsp;
        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//public void ParseNestedXML(XmlElement thisElement, XmlElement rosterElement)</span>
&nbsp;
        <span style="color: #0600FF;">public</span> <span style="color: #0600FF;">void</span> ParseRoster<span style="color: #000000;">&#40;</span><span style="color: #FF0000;">string</span> path, <span style="color: #FF0000;">string</span> output<span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
            XmlDocument parsingRoster <span style="color: #008000;">=</span> <span style="color: #008000;">new</span> XmlDocument<span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
            parsingRoster.<span style="color: #0000FF;">Load</span><span style="color: #000000;">&#40;</span>path<span style="color: #000000;">&#41;</span>;
&nbsp;
            XmlNodeList parsingElements <span style="color: #008000;">=</span> parsingRoster.<span style="color: #0000FF;">SelectNodes</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;/document/squad&quot;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
            <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement thisElement <span style="color: #0600FF;">in</span> parsingElements<span style="color: #000000;">&#41;</span>
            <span style="color: #000000;">&#123;</span>
                XmlElement rosterElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;Unit&quot;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
                ParseNestedXML<span style="color: #000000;">&#40;</span>thisElement, rosterElement<span style="color: #000000;">&#41;</span>;
            <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//foreach (XmlElement thisElement in parsingElements)</span>
&nbsp;
            Roster.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>rootElement<span style="color: #000000;">&#41;</span>;
&nbsp;
            Roster.<span style="color: #0000FF;">Save</span><span style="color: #000000;">&#40;</span>output<span style="color: #000000;">&#41;</span>;
        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//public void ParseRoster(string path, string output)</span>
&nbsp;
        <span style="color: #0600FF;">public</span> <span style="color: #0600FF;">void</span> ParseLinkUnitStats<span style="color: #000000;">&#40;</span>XmlElement parseElement, XmlElement baseElement<span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
&nbsp;
            <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement correctElement <span style="color: #0600FF;">in</span> parseElement<span style="color: #000000;">&#41;</span>
            <span style="color: #000000;">&#123;</span>
                <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                <span style="color: #000000;">&#123;</span>
                    <span style="color: #FF0000;">string</span> subReplaceMe <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
                    subReplaceMe <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>subReplaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
                    XmlElement addElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span>subReplaceMe<span style="color: #000000;">&#41;</span>;
                    <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span><span style="color: #008000;">!</span>Regex.<span style="color: #0000FF;">Match</span><span style="color: #000000;">&#40;</span>subReplaceMe, <span style="">@&quot;(Left|Worker|Helper|Pts|Coun|Group)&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">Success</span><span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
                        <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>parseElement.<span style="color: #0000FF;">HasChildNodes</span> <span style="color: #008000;">&amp;&amp;</span> parseElement.<span style="color: #0000FF;">InnerXml</span>.<span style="color: #0000FF;">Contains</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;entity&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            <span style="color: #008080; font-style: italic;">//Console.WriteLine(&quot;Found an item (XmlElement)&quot;);</span>
                            ParseNestedXML<span style="color: #000000;">&#40;</span>addElement, correctElement<span style="color: #000000;">&#41;</span>;
                        <span style="color: #000000;">&#125;</span>
                        <span style="color: #0600FF;">else</span> <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;description&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            addElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;description&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
                        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//if correctElement.HasAttribute(&quot;description&quot;))</span>
                        <span style="color: #0600FF;">else</span> <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span> <span style="color: #008000;">&amp;&amp;</span> <span style="color: #000000;">&#40;</span>Regex.<span style="color: #0000FF;">IsMatch</span><span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span>, <span style="">@&quot;[^0|-]&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            addElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>;
                            baseElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>addElement<span style="color: #000000;">&#41;</span>;
                        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else if (correctElement.HasAttribute(&quot;value&quot;))</span>
                    <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else</span>
                    <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>parseElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;basename&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
                        <span style="color: #008080; font-style: italic;">/*It's a non-dwarf item.  Whee!  They don't show up in the XmlNodeList one.
                        Get rid of newlines and periods at the end, then set it as the InnerText
                        This doesn't catch cases where the item has other properties inside it, but
                        I haven't seen those */</span>
                        baseElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> Regex.<span style="color: #0000FF;">Replace</span><span style="color: #000000;">&#40;</span>parseElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;itemsummary&quot;</span><span style="color: #000000;">&#41;</span>, <span style="">@&quot;(<span style="">\\</span>n|\.)&quot;</span>, <span style="color: #FF0000;">String</span>.<span style="color: #0000FF;">Empty</span><span style="color: #000000;">&#41;</span>;
                    <span style="color: #000000;">&#125;</span>
                <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//if (correctElement.HasAttribute(&quot;name&quot;)</span>
&nbsp;
&nbsp;
            <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//foreach (XmlElement correctElement in parseElement)</span>
&nbsp;
        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//public void ParseLinkUnitStats(XmlElement parseElement, XmlElement baseElement)</span>
&nbsp;
        <span style="color: #0600FF;">public</span> <span style="color: #0600FF;">void</span> ParseLinkUnitStats<span style="color: #000000;">&#40;</span>XmlNodeList parseNodeList, XmlElement baseElement<span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
            <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement correctElement <span style="color: #0600FF;">in</span> parseNodeList<span style="color: #000000;">&#41;</span>
            <span style="color: #000000;">&#123;</span>
                <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                <span style="color: #000000;">&#123;</span>
                    <span style="color: #FF0000;">string</span> subReplaceMe <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
                     subReplaceMe <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>subReplaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
                    <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span><span style="color: #008000;">!</span>Regex.<span style="color: #0000FF;">Match</span><span style="color: #000000;">&#40;</span>subReplaceMe, <span style="">@&quot;(Left|Worker|Helper|Pts|Coun|Group)&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">Success</span><span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
&nbsp;
                        XmlElement addElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span>subReplaceMe<span style="color: #000000;">&#41;</span>;
                        <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasChildNodes</span> <span style="color: #008000;">&amp;&amp;</span> correctElement.<span style="color: #0000FF;">InnerXml</span>.<span style="color: #0000FF;">Contains</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;entity&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            <span style="color: #008080; font-style: italic;">//Console.WriteLine(&quot;Found an item (XmlNodeList)&quot;);</span>
                            ParseNestedXML<span style="color: #000000;">&#40;</span>addElement, correctElement<span style="color: #000000;">&#41;</span>;
                        <span style="color: #000000;">&#125;</span>
&nbsp;
                        <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;description&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            addElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;description&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
                            baseElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>addElement<span style="color: #000000;">&#41;</span>;
                        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//if (correctElement.HasAttribute(&quot;description&quot;))</span>
                        <span style="color: #0600FF;">else</span> <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span> <span style="color: #008000;">&amp;&amp;</span> <span style="color: #000000;">&#40;</span>Regex.<span style="color: #0000FF;">IsMatch</span><span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span>, <span style="">@&quot;[^0|-]&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                        <span style="color: #000000;">&#123;</span>
                            addElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>;
                            baseElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>addElement<span style="color: #000000;">&#41;</span>;
                        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else if (correctElement.HasAttribute(&quot;value&quot;))</span>
                    <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//else</span>
&nbsp;
                <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//if (correctElement.HasAttribute(&quot;name&quot;))</span>
&nbsp;
            <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//foreach (XmlElement correctElement in parseNodeList)</span>
&nbsp;
        <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//public void ParseLinkUnitStats(XmlNodeList parseNodeList, XmlElement baseElement)</span>
&nbsp;
    <span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//class XmlParser</span>
&nbsp;
<span style="color: #000000;">&#125;</span> <span style="color: #008080; font-style: italic;">//namespace RosterParserTest</span></pre></td></tr></table></div>

<div class="feedflare">
<a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=AAAMN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=AAAMN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=S0gan"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=S0gan" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=icoen"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=icoen" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=wJDFN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=wJDFN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=8bf9n"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=8bf9n" border="0"></img></a>
</div>]]></content:encoded>
			<wfw:commentRss>http://blog.theprodigalboyfriend.com/2008/10/08/rosterparserfixedxmlparser-parser-new-rosterparserfixedxmlparser/feed/</wfw:commentRss>
		</item>
		<item>
		<title>Ugh.</title>
		<link>http://blog.theprodigalboyfriend.com/2008/10/03/ugh/</link>
		<comments>http://blog.theprodigalboyfriend.com/2008/10/03/ugh/#comments</comments>
		<pubDate>Sat, 04 Oct 2008 04:30:38 +0000</pubDate>
		<dc:creator>Ryan</dc:creator>
		
		<category><![CDATA[General]]></category>

		<category><![CDATA[c#]]></category>

		<category><![CDATA[Code]]></category>

		<category><![CDATA[xml]]></category>

		<guid isPermaLink="false">http://blog.theprodigalboyfriend.com/?p=66</guid>
		<description><![CDATA[I&#8217;m already not that fond of working with XML in .NET.  Here are a couple of fixes:

public static string RemoveWhitespace&#40;string str&#41;
&#123;
    try
    &#123;
        return new Regex&#40;@&#34;(\s+&#124;\{.*?\}&#124;\(.*?\)&#124;\/+&#124;\.+)&#34;&#41;.Replace&#40;str, String.Empty&#41;;
    &#125;
    catch &#40;Exception&#41;
    &#123;
  [...]]]></description>
			<content:encoded><![CDATA[<p>I&#8217;m already not that fond of working with XML in .NET.  Here are a couple of fixes:</p>

<div class="wp_syntax"><div class="code"><pre class="csharp csharp" style="font-family:monospace;"><span style="color: #0600FF;">public</span> <span style="color: #0600FF;">static</span> <span style="color: #FF0000;">string</span> RemoveWhitespace<span style="color: #000000;">&#40;</span><span style="color: #FF0000;">string</span> str<span style="color: #000000;">&#41;</span>
<span style="color: #000000;">&#123;</span>
    <span style="color: #0600FF;">try</span>
    <span style="color: #000000;">&#123;</span>
        <span style="color: #0600FF;">return</span> <span style="color: #008000;">new</span> Regex<span style="color: #000000;">&#40;</span><span style="">@&quot;(\s+|\{.*?\}|\(.*?\)|\/+|\.+)&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">Replace</span><span style="color: #000000;">&#40;</span>str, <span style="color: #FF0000;">String</span>.<span style="color: #0000FF;">Empty</span><span style="color: #000000;">&#41;</span>;
    <span style="color: #000000;">&#125;</span>
    <span style="color: #0600FF;">catch</span> <span style="color: #000000;">&#40;</span>Exception<span style="color: #000000;">&#41;</span>
    <span style="color: #000000;">&#123;</span>
        <span style="color: #0600FF;">return</span> str;
    <span style="color: #000000;">&#125;</span>
<span style="color: #000000;">&#125;</span></pre></div></div>

<p>Which actually gets rid of the crap in the braces, parentheses, etc (as well as getting rid of periods).</p>
<p>Secondly, I loathe empty nodes (stats, etc).</p>

<div class="wp_syntax"><div class="code"><pre class="csharp csharp" style="font-family:monospace;">replaceMe <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>replaceMe<span style="color: #000000;">&#41;</span>;
Console.<span style="color: #0000FF;">WriteLine</span><span style="color: #000000;">&#40;</span>replaceMe<span style="color: #000000;">&#41;</span>;
<span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>replaceMe <span style="color: #008000;">!=</span> <span style="color: #FF0000;">String</span>.<span style="color: #0000FF;">Empty</span><span style="color: #000000;">&#41;</span> 
<span style="color: #000000;">&#123;</span>
    XmlElement baseElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span>replaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
    <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement parseElement <span style="color: #0600FF;">in</span> thisElement<span style="color: #000000;">&#41;</span>
    <span style="color: #000000;">&#123;</span>
        <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>parseElement.<span style="color: #0000FF;">HasChildNodes</span> <span style="color: #008000;">&amp;&amp;</span> parseElement.<span style="color: #0000FF;">InnerXml</span>.<span style="color: #0000FF;">Contains</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;entity&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
        <span style="color: #000000;">&#123;</span>
&nbsp;
            ParseNestedXML<span style="color: #000000;">&#40;</span>parseElement, baseElement<span style="color: #000000;">&#41;</span>;
        <span style="color: #000000;">&#125;</span>
        <span style="color: #0600FF;">else</span>
        <span style="color: #000000;">&#123;</span>
            <span style="color: #0600FF;">foreach</span> <span style="color: #000000;">&#40;</span>XmlElement correctElement <span style="color: #0600FF;">in</span> parseElement<span style="color: #000000;">&#41;</span>
            <span style="color: #000000;">&#123;</span>
                <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                <span style="color: #000000;">&#123;</span>
                    <span style="color: #FF0000;">string</span> subReplaceMe <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;name&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
&nbsp;
                    subReplaceMe <span style="color: #008000;">=</span> RemoveWhitespace<span style="color: #000000;">&#40;</span>subReplaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
                    XmlElement addElement <span style="color: #008000;">=</span> Roster.<span style="color: #0000FF;">CreateElement</span><span style="color: #000000;">&#40;</span>subReplaceMe<span style="color: #000000;">&#41;</span>;
&nbsp;
                    <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;description&quot;</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
                        addElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;description&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
                        baseElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>addElement<span style="color: #000000;">&#41;</span>;
                    <span style="color: #000000;">&#125;</span>
                    <span style="color: #008080; font-style: italic;">//Bye, stats with a value of zero or a hyphen!</span>
                    <span style="color: #0600FF;">else</span> <span style="color: #0600FF;">if</span> <span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">HasAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span> <span style="color: #008000;">&amp;&amp;</span> <span style="color: #000000;">&#40;</span>Regex.<span style="color: #0000FF;">Match</span><span style="color: #000000;">&#40;</span>correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>, <span style="">@&quot;[^0|-]&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">Success</span><span style="color: #000000;">&#41;</span><span style="color: #000000;">&#41;</span>
                    <span style="color: #000000;">&#123;</span>
                        addElement.<span style="color: #0000FF;">InnerText</span> <span style="color: #008000;">=</span> correctElement.<span style="color: #0000FF;">GetAttribute</span><span style="color: #000000;">&#40;</span><span style="color: #666666;">&quot;value&quot;</span><span style="color: #000000;">&#41;</span>.<span style="color: #0000FF;">ToString</span><span style="color: #000000;">&#40;</span><span style="color: #000000;">&#41;</span>;
                        baseElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>addElement<span style="color: #000000;">&#41;</span>;
                    <span style="color: #000000;">&#125;</span>
&nbsp;
                <span style="color: #000000;">&#125;</span>
            <span style="color: #000000;">&#125;</span>
        <span style="color: #000000;">&#125;</span>
&nbsp;
        rosterElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>baseElement<span style="color: #000000;">&#41;</span>;
&nbsp;
        rootElement.<span style="color: #0000FF;">AppendChild</span><span style="color: #000000;">&#40;</span>rosterElement<span style="color: #000000;">&#41;</span>;
    <span style="color: #000000;">&#125;</span>
<span style="color: #000000;">&#125;</span></pre></div></div>

<p>I find it kind of ironic that recursion is used after bitching about recursion.  I&#8217;ll probably take a look at the nesting problems, and whatnot this weekend, assuming I have any time.</p>
<p>I wonder if it&#8217;s possible to get a job doing nothing but writing regular expressions&#8230;</p>
<div class="feedflare">
<a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=sSdLN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=sSdLN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=fMPbn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=fMPbn" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=Pf9tn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=Pf9tn" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=x2BtN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=x2BtN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=ACXjn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=ACXjn" border="0"></img></a>
</div>]]></content:encoded>
			<wfw:commentRss>http://blog.theprodigalboyfriend.com/2008/10/03/ugh/feed/</wfw:commentRss>
		</item>
		<item>
		<title>Parser, part deux</title>
		<link>http://blog.theprodigalboyfriend.com/2008/10/02/parser-part-deux/</link>
		<comments>http://blog.theprodigalboyfriend.com/2008/10/02/parser-part-deux/#comments</comments>
		<pubDate>Thu, 02 Oct 2008 21:54:57 +0000</pubDate>
		<dc:creator>Ryan</dc:creator>
		
		<category><![CDATA[General]]></category>

		<category><![CDATA[Code]]></category>

		<category><![CDATA[Ruby]]></category>

		<category><![CDATA[WTF]]></category>

		<guid isPermaLink="false">http://blog.theprodigalboyfriend.com/?p=64</guid>
		<description><![CDATA[Fixed the nested &#60;item&#62; blocks.  Ran into another problem where it didn&#8217;t parse nested characters and their items properly, then yet another where some Gifts of Khaine (and probably other things I haven&#8217;t seen in either list) are essentially nested worthlessness.  Fixed code for it:

def parseitem&#40;d, addto&#41;
  added = addto.add_element&#40;d.attributes&#91;&#34;name&#34;&#93;.gsub&#40;/\s+/, ''&#41;&#41;.add_text&#40;d.attributes&#91;&#34;description&#34;&#93;.gsub&#40;/\./, ''&#41;&#41;
 [...]]]></description>
			<content:encoded><![CDATA[<p>Fixed the nested &lt;item&gt; blocks.  Ran into another problem where it didn&#8217;t parse nested characters and their items properly, then yet another where some Gifts of Khaine (and probably other things I haven&#8217;t seen in either list) are essentially nested worthlessness.  Fixed code for it:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby ruby" style="font-family:monospace;"><span style="color:#9966CC; font-weight:bold;">def</span> parseitem<span style="color:#006600; font-weight:bold;">&#40;</span>d, addto<span style="color:#006600; font-weight:bold;">&#41;</span>
  added = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span>d.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">+/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">add_text</span><span style="color:#006600; font-weight:bold;">&#40;</span>d.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;description&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\.<span style="color:#006600; font-weight:bold;">/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
  d.<span style="color:#9900CC;">elements</span>.<span style="color:#9900CC;">each</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'link'</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#9966CC; font-weight:bold;">do</span> |ele|
    <span style="color:#9966CC; font-weight:bold;">unless</span> ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> =~ <span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#40;</span>Worker|Helper|Cost|Left<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> !ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;description&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#0000FF; font-weight:bold;">nil</span>?
        added.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span>ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">+/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">add_text</span><span style="color:#006600; font-weight:bold;">&#40;</span>ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;description&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">else</span>
        <span style="color:#008000; font-style:italic;">#This is necessary for some Gifts of Khaine, apparently</span>
        <span style="color:#9966CC; font-weight:bold;">if</span> added.<span style="color:#9900CC;">text</span> == ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>
          <span style="color:#CC0066; font-weight:bold;">print</span> <span style="color:#996600;">&quot;Found duplicated #{added.text}!<span style="color:#000099;">\n</span><span style="color:#000099;">\n</span>&quot;</span>
          added.<span style="color:#9900CC;">text</span> = <span style="color:#996600;">''</span>
        <span style="color:#9966CC; font-weight:bold;">end</span>
        added.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span>ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">+/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">add_text</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;True&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
<span style="color:#9966CC; font-weight:bold;">def</span> parsenested<span style="color:#006600; font-weight:bold;">&#40;</span>process, addto<span style="color:#006600; font-weight:bold;">&#41;</span>
  <span style="color:#008000; font-style:italic;">#Try to guess if it's a champion, character in the unit, or item</span>
  process.<span style="color:#9900CC;">elements</span>.<span style="color:#9900CC;">each</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'entity'</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#9966CC; font-weight:bold;">do</span> |d|
    <span style="color:#9966CC; font-weight:bold;">if</span> d.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;statset&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> =~ <span style="color:#006600; font-weight:bold;">/</span>Normal<span style="color:#006600; font-weight:bold;">/</span>
      <span style="color:#008000; font-style:italic;">#It's a character, crew, or mount.  Figure out which</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> d.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;totalcost&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> !~ <span style="color:#006600; font-weight:bold;">/</span>^0<span style="color:#006600; font-weight:bold;">/</span>
        <span style="color:#008000; font-style:italic;">#It's a champion or character</span>
        adder = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;champion&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
        <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found champ<span style="color:#000099;">\n</span>&quot;</span>
        parse<span style="color:#006600; font-weight:bold;">&#40;</span>d, adder<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">else</span>
        <span style="color:#008000; font-style:italic;">#It's crew or the like</span>
        adder = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;crew&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
        <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found crew<span style="color:#000099;">\n</span>&quot;</span>
        parse<span style="color:#006600; font-weight:bold;">&#40;</span>d, adder<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">else</span>
      <span style="color:#008000; font-style:italic;">#It's an item</span>
      <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found item<span style="color:#000099;">\n</span>&quot;</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> addto.<span style="color:#9900CC;">elements</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;item&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#0000FF; font-weight:bold;">nil</span>?
        <span style="color:#0066ff; font-weight:bold;">@adder</span> = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;item&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
      parseitem<span style="color:#006600; font-weight:bold;">&#40;</span>d, <span style="color:#0066ff; font-weight:bold;">@adder</span><span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span></pre></div></div>

<div class="feedflare">
<a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=PrY0N"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=PrY0N" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=awgGn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=awgGn" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=BHc3n"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=BHc3n" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=yMDyN"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=yMDyN" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?a=vSobn"><img src="http://feeds.feedburner.com/~f/TheProdigalBoyfriend?i=vSobn" border="0"></img></a>
</div>]]></content:encoded>
			<wfw:commentRss>http://blog.theprodigalboyfriend.com/2008/10/02/parser-part-deux/feed/</wfw:commentRss>
		</item>
		<item>
		<title>Parser</title>
		<link>http://blog.theprodigalboyfriend.com/2008/10/01/parser/</link>
		<comments>http://blog.theprodigalboyfriend.com/2008/10/01/parser/#comments</comments>
		<pubDate>Thu, 02 Oct 2008 04:09:52 +0000</pubDate>
		<dc:creator>Ryan</dc:creator>
		
		<category><![CDATA[General]]></category>

		<category><![CDATA[Code]]></category>

		<category><![CDATA[Ruby]]></category>

		<category><![CDATA[WTF]]></category>

		<category><![CDATA[xml]]></category>

		<guid isPermaLink="false">http://blog.theprodigalboyfriend.com/?p=62</guid>
		<description><![CDATA[Ok, boring.  I didn&#8217;t spend as much time working on it tonight as I intended to, but it parses the Dwarf roster, at least, fine.  It does not parse the Dark Elf roster properly (namely, it doesn&#8217;t pull the description out of items or Gifts of Khaine, and it doubles up the &#60;item&#62; [...]]]></description>
			<content:encoded><![CDATA[<p>Ok, boring.  I didn&#8217;t spend as much time working on it tonight as I intended to, but it parses the Dwarf roster, at least, fine.  It does <em>not</em> parse the Dark Elf roster properly (namely, it doesn&#8217;t pull the description out of items or Gifts of Khaine, and it doubles up the &lt;item&gt; tag for reasons I&#8217;m not sure of), but that&#8217;ll get fixed when I&#8217;m at work tomorrow.</p>
<p>Ruby code:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby ruby" style="font-family:monospace;"><span style="color:#008000; font-style:italic;">#!/usr/bin/ruby</span>
<span style="color:#CC0066; font-weight:bold;">require</span> <span style="color:#996600;">&quot;rexml/document&quot;</span>
<span style="color:#CC0066; font-weight:bold;">require</span> <span style="color:#996600;">&quot;pp&quot;</span>
<span style="color:#CC0066; font-weight:bold;">require</span> <span style="color:#996600;">&quot;rexml/formatters/default&quot;</span>
<span style="color:#9966CC; font-weight:bold;">include</span> REXML
&nbsp;
inputxml = <span style="color:#CC00FF; font-weight:bold;">File</span>.<span style="color:#9900CC;">read</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'dwarfroster.rst'</span><span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#0066ff; font-weight:bold;">@roster</span> = Document.<span style="color:#9900CC;">new</span> inputxml
&nbsp;
<span style="color:#0066ff; font-weight:bold;">@army</span> = Document.<span style="color:#9900CC;">new</span>.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;army&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
&nbsp;
<span style="color:#9966CC; font-weight:bold;">def</span> parsenested<span style="color:#006600; font-weight:bold;">&#40;</span>process, addto<span style="color:#006600; font-weight:bold;">&#41;</span>
  <span style="color:#008000; font-style:italic;">#Try to guess if it's a champion, character in the unit, or item</span>
  process.<span style="color:#9900CC;">elements</span>.<span style="color:#9900CC;">each</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'entity'</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#9966CC; font-weight:bold;">do</span> |p|
    <span style="color:#008000; font-style:italic;">#puts p</span>
    <span style="color:#9966CC; font-weight:bold;">if</span> <span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">elements</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;link&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#9900CC;">has_elements</span>?
      <span style="color:#008000; font-style:italic;">#Recursively run through these to figure out what the hell it is</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> <span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">elements</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;link/entity&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;itemsummary&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#9900CC;">any</span>?
         adder = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;item&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
         <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found nested<span style="color:#000099;">\n</span>&quot;</span>
         parsenested<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">elements</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;link&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>, adder<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">else</span>
        <span style="color:#008000; font-style:italic;">#This is really just stubbed out, since I haven't seen it</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">elsif</span> <span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;statset&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> =~ <span style="color:#006600; font-weight:bold;">/</span>Normal<span style="color:#006600; font-weight:bold;">/</span>
      <span style="color:#008000; font-style:italic;">#It's a character, crew, or mount.  Figure out which</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> <span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;totalcost&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> !~ <span style="color:#006600; font-weight:bold;">/</span>^0<span style="color:#006600; font-weight:bold;">/</span>
        <span style="color:#008000; font-style:italic;">#It's a champion or character</span>
        adder = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;champion&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
        <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found champ<span style="color:#000099;">\n</span>&quot;</span>
        parse<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#CC0066; font-weight:bold;">p</span>, adder<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">else</span>
        <span style="color:#008000; font-style:italic;">#It's crew or the like</span>
        adder = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;crew&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
        <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found crew<span style="color:#000099;">\n</span>&quot;</span>
        parse<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#CC0066; font-weight:bold;">p</span>, adder<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">else</span>
      <span style="color:#008000; font-style:italic;">#It's an item</span>
      <span style="color:#CC0066; font-weight:bold;">puts</span> <span style="color:#996600;">&quot;Found item<span style="color:#000099;">\n</span>&quot;</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> addto.<span style="color:#9900CC;">elements</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;item&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#0000FF; font-weight:bold;">nil</span>?
        <span style="color:#0066ff; font-weight:bold;">@adder</span> = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;item&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
      added = <span style="color:#0066ff; font-weight:bold;">@adder</span>.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">+/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#CC0066; font-weight:bold;">p</span>.<span style="color:#9900CC;">elements</span>.<span style="color:#9900CC;">each</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'link'</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#9966CC; font-weight:bold;">do</span> |ele|
        <span style="color:#9966CC; font-weight:bold;">unless</span> ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> =~ <span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#40;</span>Worker|Helper|Cost|Left<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/</span>
          added.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span>ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">+/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">add_text</span><span style="color:#006600; font-weight:bold;">&#40;</span>ele.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;description&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
        <span style="color:#9966CC; font-weight:bold;">end</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span>
&nbsp;
<span style="color:#9966CC; font-weight:bold;">def</span> parse<span style="color:#006600; font-weight:bold;">&#40;</span>s, addto<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#008000; font-style:italic;">#In some cases, the basename differs (i.e. Supreme Sorc vs. High Sorc)</span>
    <span style="color:#008000; font-style:italic;">#Also, it'll pick up whether there's a champion in the unit by the diff</span>
    <span style="color:#008000; font-style:italic;">#of base and count</span>
    <span style="color:#006600; font-weight:bold;">%</span>w<span style="color:#006600; font-weight:bold;">&#91;</span>basename count base<span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#9900CC;">each</span> <span style="color:#9966CC; font-weight:bold;">do</span> |b|
      <span style="color:#9966CC; font-weight:bold;">if</span> s.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span>b<span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#9900CC;">any</span>?
        addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span>b<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">add_text</span><span style="color:#006600; font-weight:bold;">&#40;</span>s.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span>b<span style="color:#006600; font-weight:bold;">&#93;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
    stats = addto.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;stats&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
    s.<span style="color:#9900CC;">elements</span>.<span style="color:#9900CC;">each</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'unitstat'</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#9966CC; font-weight:bold;">do</span> |a|
      <span style="color:#008000; font-style:italic;">#unit.fetch(:stats) { |el| unit[el] = {}}</span>
      <span style="color:#008000; font-style:italic;">#I don't want blank stats</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> a.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;value&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#9900CC;">any</span>? <span style="color:#006600; font-weight:bold;">&amp;&amp;</span> <span style="color:#006600; font-weight:bold;">&#40;</span>a.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;value&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> !~ <span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#40;</span>0|-<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#41;</span>
         stats.<span style="color:#9900CC;">add_element</span><span style="color:#006600; font-weight:bold;">&#40;</span>a.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span>.<span style="color:#CC0066; font-weight:bold;">gsub</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span>\s<span style="color:#006600; font-weight:bold;">+/</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">add_text</span><span style="color:#006600; font-weight:bold;">&#40;</span>a.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;value&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
    s.<span style="color:#9900CC;">elements</span>.<span style="color:#9900CC;">each</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'link'</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#9966CC; font-weight:bold;">do</span> |link|
      <span style="color:#9966CC; font-weight:bold;">if</span> link.<span style="color:#9900CC;">has_elements</span>?
        <span style="color:#008000; font-style:italic;">#Figure out what the hell it is</span>
        parsenested<span style="color:#006600; font-weight:bold;">&#40;</span>link, addto<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">else</span>
        <span style="color:#008000; font-style:italic;">#unitatt = unit.add_element(&quot;attributes&quot;)</span>
        <span style="color:#008000; font-style:italic;">#Rip out the name if it doesn't have &quot;Helper, Worker, Points Left, or Cost&quot;</span>
        <span style="color:#9966CC; font-weight:bold;">unless</span> link.<span style="color:#9900CC;">attributes</span><span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#996600;">&quot;name&quot;</span><span style="color:#006600; font-weight:bold;">&#93;</span> =~ <span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#40;</span>Worker|Helper|Cost|Left<span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/<