OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

docbook-apps message

[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]


Subject: Re: [docbook-apps] character maps


On 7/30/07, Hinrich Aue <hinrich.aue@lci-software.com> wrote:
> We have to replace " (ascii quotes) with unicode quotes.
> The documents are big and already written. Also using ascii for this is a
> lot easier for authoring. You don't want to copy and paste Unicode
> characters in the xml source all the time, while you have th ascii quotes on
> your keyboard.
>
> Also we want to exchange the ascii - (dash) with a longer Unicode dash.

Hinrich,

Here's the XSLT2 we use internally to "fix" straight quotes into
curlies. Please note that transformations like this are both
error-prone and language-specific, so this isn't a "use this blindly"
fix. Additionally, you may want to adjust the elements in which you do
no replacements.

$ cat curlies.xsl
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"; version="2.0">
  <xsl:output method="xml" encoding="UTF-8"/>
  <xsl:preserve-space elements="*"/>
  <!-- Default Rule -->
  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="text()[not(
                                  parent::command|
                                  parent::computeroutput|
                                  parent::constant|
                                  parent::filename|
                                  parent::function|
                                  parent::literal|
                                  parent::markup|
                                  parent::option|
                                  parent::optional|
                                  parent::programlisting|
                                  parent::prompt|
                                  parent::replaceable|
                                  parent::screen|
                                  parent::sgmltag|
                                  parent::userinput|
                                  parent::varname
                                  )]">
    <xsl:variable name="input"   select="."/>

    <!-- doubles -->
                                                            <!--
"Barring the baz... -->
    <xsl:variable name="first"   select="replace($input,
'^&#x0022;(\S)',                     '&#x201C;$1')"/>
                                                            <!--
arring the baz." OR any other odd symbol at the end -->
    <xsl:variable name="second"  select="replace($first,
'(\S)&#x0022;$',                   '$1&#x201D;')"/>
                                                            <!--
"arring the baz OR preceded by opening parens, em dash, or
CharMenuDelim -->
    <xsl:variable name="third"   select="replace($second,
'([\s(&#x2014;&#x2192;])&#x0022;(\S)',
'$1&#x201C;$2')"/>  <!-- ".022", for all you unbelievers -->
                                                            <!--
fee," arring the baz OR followed by closing parens, em dash, or
CharMenuDelim -->
    <xsl:variable name="fourth"  select="replace($third,
'(\S)&#x0022;([,.:;?!&#x20;\t\n\r)&#x2014;&#x2192;])',
'$1&#x201D;$2')"/>

    <!-- singles -->
                                                            <!--  'Ole
Bill was a big dog! -->
    <xsl:variable name="fifth"   select='replace($fourth,
"^&#x0027;([.a-rt-zA-Z])",                     "&#x2018;$1")'/>
                                                            <!--
<literal>parent</literal>'s house -->
    <xsl:variable name="sixth"   select='replace($fifth,
"&#x0027;s",                                   "&#x2019;s")'/>
                                                            <!--  this
was a real thing to be perpetuatin.' -->
    <xsl:variable name="seventh" select='replace($sixth,
"([,.!a-zA-Z0-9])&#x0027;$",                   "$1&#x2019;")'/>
                                                            <!--  this
was a real thing to be perpetuatin'. -->
    <xsl:variable name="eighth"  select='replace($seventh,
"(\s)&#x0027;([.a-zA-Z])",                  "$1&#x2018;$2")'/>
                                                            <!--  !':
dunno what is going on with this one... -->
    <xsl:variable name="ninth"   select='replace($eighth,
"([,.!a-zA-Z0-9])&#x0027;([.:;&#x20;\t\n\r])", "$1&#x2019;$2")'/>

    <!-- Special cases'll get ya -->
    <xsl:variable name="tenth"    select='replace($ninth,
"([a-zA-Z])&#x0027;([a-zA-Z])",  "$1&#x2019;$2")'/>
                                                            <!-- in the '90s -->
    <xsl:variable name="eleventh" select='replace($tenth,
"&#x0027;([0-9])",  "&#x2019;$1")'/>

    <!-- mixed -->
                                                            <!--
nested: double-curl-open + ' -->
    <xsl:variable name="twelfth" select='replace($eleventh,
"&#x201C;&#x0027;",  "&#x201C;&#x2018;")'/>
                                                            <!--
nested: ' + double-curl-close -->
    <xsl:variable name="thirteenth" select='replace($twelfth,
"&#x0027;&#x201D;",  "&#x2019;&#x201D;")'/>

    <!-- change this when you add more -->
    <xsl:variable name="final"   select='$thirteenth'/>

    <xsl:value-of select="$final"/>
  </xsl:template>

</xsl:stylesheet>


[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]