OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

docbook-apps message

[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]


Subject: Internationalized index in XSL


Hi,

automatic indexing which is available in the DocBook XSL stylesheets is 
working quite well for English, but for other languages it lacks several 
features.

 From my own experience, following functionality is missing:

- allow to group accented letters like e, é, ë into the same group under 
letter "e"

- treat special letters (e.g. "ch") as one character and place them in 
correct position (e.g. between "h" and "i")

Attached customization layer is able to solve these two issues. If you 
want to try it, just import original fo/docbook.xsl into your 
customization layer and then include attached file. It was tested under 
Saxon, but should work in any processor which implements EXSLT function 
extension.

Current settings are suitable for Czech language, but you can easily 
modify them by editing content of l:letters element:

<l:letters lang="cs">
   ...
   <l i="1">A</l>
   <l i="1">a</l>
   <l i="1">Á</l>
   <l i="1">á</l>
   <l i="2">B</l>
   <l i="2">b</l>
   ...
   <l i="10">H</l>
   <l i="10">h</l>
   <l i="11">Ch</l>
   <l i="11">ch</l>
   <l i="11">cH</l>
   <l i="11">CH</l>
   <l i="12">I</l>
   <l i="12">i</l>
   ...
</l:letters>

This snippet means that letters A, a, Á, á will be put under the same 
group in the index (they have same i attribute) and that letter "a" will 
be sorted before "b" which has i=2. Later elements define that "ch" will 
be placed between "h" and "i".

I will appreciate if you could:

- test solution and give me feedback
- send me <l:letter lang="xx"> table for languages you are using
- write me that this solution can't handle properly your language and 
write me what must be improved in order to fix it

After gathering your feedback I will add this file into standard DocBook 
XSL stylesheets.

					Jirka

-- 
-----------------------------------------------------------------
   Jirka Kosek  	
   e-mail: jirka@kosek.cz
   http://www.kosek.cz

<?xml version="1.0"?>
<!DOCTYPE xsl:stylesheet [

<!ENTITY lowercase "'abcdefghijklmnopqrstuvwxyz'">
<!ENTITY uppercase "'ABCDEFGHIJKLMNOPQRSTUVWXYZ'">

<!ENTITY primary   'normalize-space(concat(primary/@sortas, primary[not(@sortas)]))'>
<!ENTITY secondary 'normalize-space(concat(secondary/@sortas, secondary[not(@sortas)]))'>
<!ENTITY tertiary  'normalize-space(concat(tertiary/@sortas, tertiary[not(@sortas)]))'>

<!ENTITY sep '" "'>
<!ENTITY scope 'count(ancestor::node()|$scope) = count(ancestor::node())'>
]>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
                xmlns:fo="http://www.w3.org/1999/XSL/Format";
                xmlns:rx="http://www.renderx.com/XSL/Extensions";
                xmlns:axf="http://www.antennahouse.com/names/XSL/Extensions";
                version="1.0"
                xmlns:func="http://exslt.org/functions";
                extension-element-prefixes="func"
                exclude-result-prefixes="func i l"
                xmlns:i="urn:cz-kosek:functions:index"
                xmlns:l="urn:cz-kosek:data:index">

<!-- ********************************************************************
     $Id: autoidx-ng.xsl,v 1.25 2003/10/04 17:11:10 kosek Exp $
     ********************************************************************

     This file is part of the DocBook XSL Stylesheet distribution.
     See ../README or http://docbook.sf.net/ for copyright
     and other information.

     ******************************************************************** -->

<!-- jkj: Temporal attempt to do better internacionalized indexing -->

<!-- Attribute i defines sort order in index and assigns differents letter to
     the same index group. In fact this table allows you to handle accented letters
     as non-accented if necessary and can handle even double character letters like
     "ch". If it accepted to normal stylesheets this table would g  -->
<l:letters lang="cs">
  <l i="-1"/>
  <l i="0">Symboly</l>
  <l i="1">A</l>
  <l i="1">a</l>
  <l i="1">Á</l>
  <l i="1">á</l>
  <l i="2">B</l>
  <l i="2">b</l>
  <l i="3">C</l>
  <l i="3">c</l>
  <l i="4">Č</l>
  <l i="4">č</l>
  <l i="5">D</l>
  <l i="5">d</l>
  <l i="5">Ď</l>
  <l i="5">ď</l>
  <l i="7">E</l>
  <l i="7">e</l>
  <l i="7">É</l>
  <l i="7">é</l>
  <l i="7">Ě</l>
  <l i="7">ě</l>
  <l i="7">Ë</l>
  <l i="7">ë</l>
  <l i="8">F</l>
  <l i="8">f</l>
  <l i="9">G</l>
  <l i="9">g</l>
  <l i="10">H</l>
  <l i="10">h</l>
  <l i="11">Ch</l>
  <l i="11">ch</l>
  <l i="11">cH</l>
  <l i="11">CH</l>
  <l i="12">I</l>
  <l i="12">i</l>
  <l i="12">Í</l>
  <l i="12">í</l>
  <l i="13">J</l>
  <l i="13">j</l>
  <l i="14">K</l>
  <l i="14">k</l>
  <l i="15">L</l>
  <l i="15">l</l>
  <l i="16">M</l>
  <l i="16">m</l>
  <l i="17">N</l>
  <l i="17">n</l>
  <l i="17">Ň</l>
  <l i="17">ň</l>
  <l i="19">O</l>
  <l i="19">o</l>
  <l i="19">Ó</l>
  <l i="19">ó</l>
  <l i="19">Ö</l>
  <l i="19">ö</l>
  <l i="20">P</l>
  <l i="20">p</l>
  <l i="21">Q</l>
  <l i="21">q</l>
  <l i="22">R</l>
  <l i="22">r</l>
  <l i="23">Ř</l>
  <l i="23">ř</l>
  <l i="24">S</l>
  <l i="24">s</l>
  <l i="25">Š</l>
  <l i="25">š</l>
  <l i="26">T</l>
  <l i="26">t</l>
  <l i="26">Ť</l>
  <l i="26">ť</l>
  <l i="28">U</l>
  <l i="28">u</l>
  <l i="28">Ú</l>
  <l i="28">ú</l>
  <l i="28">Ů</l>
  <l i="28">ů</l>
  <l i="28">Ü</l>
  <l i="28">ü</l>
  <l i="29">V</l>
  <l i="29">v</l>
  <l i="30">W</l>
  <l i="30">w</l>
  <l i="31">X</l>
  <l i="31">x</l>
  <l i="32">Y</l>
  <l i="32">y</l>
  <l i="32">Ý</l>
  <l i="32">ý</l>
  <l i="33">Z</l>
  <l i="33">z</l>
  <l i="34">Ž</l>
  <l i="34">ž</l>
</l:letters>

<!-- Returns index group code for given term  -->
<func:function name="i:group-index">
  <xsl:param name="term"/>
  <xsl:variable name="long-letter-index" select="document('')/*/l:letters/l[. = substring($term,1,2)]/@i"/>
  <xsl:variable name="short-letter-index" select="document('')/*/l:letters/l[. = substring($term,1,1)]/@i"/>
  <xsl:variable name="letter-index">
    <xsl:choose>
      <xsl:when test="$long-letter-index">
        <xsl:value-of select="$long-letter-index"/>
      </xsl:when>
      <xsl:when test="$short-letter-index">
        <xsl:value-of select="$short-letter-index"/>
      </xsl:when>
      <xsl:otherwise>0</xsl:otherwise>
    </xsl:choose>
  </xsl:variable>
  <func:result select="number($letter-index)"/>
</func:function>

<!-- Return index group letter for given group code -->
<func:function name="i:group-letter">
  <xsl:param name="index"/>
  <func:result select="document('')/*/l:letters/l[@i=$index][1]"/>
</func:function>

<xsl:key name="group-code"
         match="indexterm"
         use="i:group-index(&primary;)"/>

<!-- Modified original code is using index group codes instead of just first letter 
     to gain better grouping -->
<xsl:template name="generate-index">
  <xsl:param name="scope" select="(ancestor::book|/)[last()]"/>

  <xsl:variable name="terms"
                select="//indexterm[count(.|key('group-code',
                                                i:group-index(&primary;))[&scope;][1]) = 1
                                    and not(@class = 'endofrange')]"/>

  <xsl:apply-templates select="$terms" mode="index-div">
    <xsl:with-param name="scope" select="$scope"/>
    <xsl:sort select="i:group-index(&primary;)" data-type="number"/>
  </xsl:apply-templates>
</xsl:template>

<xsl:template match="indexterm" mode="index-div">
  <xsl:param name="scope" select="."/>

  <xsl:variable name="key"
                select="i:group-index(&primary;)"/>

  <xsl:if test="key('group-code', $key)[&scope;]
                [count(.|key('primary', &primary;)[&scope;][1]) = 1]">
    <fo:block>
      <xsl:call-template name="indexdiv.title">
        <xsl:with-param name="titlecontent">
          <xsl:value-of select="i:group-letter($key)"/>
        </xsl:with-param>
      </xsl:call-template>
      <fo:block>
        <xsl:apply-templates select="key('group-code', $key)[&scope;]
                                     [count(.|key('primary', &primary;)[&scope;][1])=1]"
                             mode="index-primary">
          <xsl:sort select="translate(&primary;, &lowercase;, &uppercase;)"/>
          <xsl:with-param name="scope" select="$scope"/>
        </xsl:apply-templates>
      </fo:block>
    </fo:block>
  </xsl:if>
</xsl:template>

</xsl:stylesheet>

S/MIME Cryptographic Signature



[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]