View Javadoc

1   package org.apache.turbine.services.mimetype.util;
2   
3   /*
4    * Copyright 2001-2005 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License")
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.io.FileInputStream;
21  import java.io.IOException;
22  import java.io.InputStream;
23  
24  import java.util.HashMap;
25  import java.util.Hashtable;
26  import java.util.Locale;
27  import java.util.Map;
28  import java.util.Properties;
29  
30  /***
31   * This class maintains a set of mappers defining mappings
32   * between locales and the corresponding charsets. The mappings
33   * are defined as properties between locale and charset names.
34   * The definitions can be listed in property files located in user's
35   * home directory, Java home directory or the current class jar.
36   * In addition, this class maintains static default mappings
37   * and constructors support application specific mappings.
38   *
39   * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
40   * @version $Id: CharSetMap.java 264148 2005-08-29 14:21:04Z henning $
41   */
42  public class CharSetMap
43  {
44      /***
45       * The default charset when nothing else is applicable.
46       */
47      public static final String DEFAULT_CHARSET = "ISO-8859-1";
48  
49      /***
50       * The name for charset mapper resources.
51       */
52      public static final String CHARSET_RESOURCE = "charset.properties";
53  
54      /***
55       * Priorities of available mappers.
56       */
57      private static final int MAP_CACHE = 0;
58      private static final int MAP_PROG = 1;
59      private static final int MAP_HOME = 2;
60      private static final int MAP_SYS = 3;
61      private static final int MAP_JAR = 4;
62      private static final int MAP_COM = 5;
63  
64      /***
65       * A common charset mapper for languages.
66       */
67      private static HashMap commonMapper = new HashMap();
68  
69      static
70      {
71          commonMapper.put("ar", "ISO-8859-6");
72          commonMapper.put("be", "ISO-8859-5");
73          commonMapper.put("bg", "ISO-8859-5");
74          commonMapper.put("ca", "ISO-8859-1");
75          commonMapper.put("cs", "ISO-8859-2");
76          commonMapper.put("da", "ISO-8859-1");
77          commonMapper.put("de", "ISO-8859-1");
78          commonMapper.put("el", "ISO-8859-7");
79          commonMapper.put("en", "ISO-8859-1");
80          commonMapper.put("es", "ISO-8859-1");
81          commonMapper.put("et", "ISO-8859-1");
82          commonMapper.put("fi", "ISO-8859-1");
83          commonMapper.put("fr", "ISO-8859-1");
84          commonMapper.put("hr", "ISO-8859-2");
85          commonMapper.put("hu", "ISO-8859-2");
86          commonMapper.put("is", "ISO-8859-1");
87          commonMapper.put("it", "ISO-8859-1");
88          commonMapper.put("iw", "ISO-8859-8");
89          commonMapper.put("ja", "Shift_JIS");
90          commonMapper.put("ko", "EUC-KR");
91          commonMapper.put("lt", "ISO-8859-2");
92          commonMapper.put("lv", "ISO-8859-2");
93          commonMapper.put("mk", "ISO-8859-5");
94          commonMapper.put("nl", "ISO-8859-1");
95          commonMapper.put("no", "ISO-8859-1");
96          commonMapper.put("pl", "ISO-8859-2");
97          commonMapper.put("pt", "ISO-8859-1");
98          commonMapper.put("ro", "ISO-8859-2");
99          commonMapper.put("ru", "ISO-8859-5");
100         commonMapper.put("sh", "ISO-8859-5");
101         commonMapper.put("sk", "ISO-8859-2");
102         commonMapper.put("sl", "ISO-8859-2");
103         commonMapper.put("sq", "ISO-8859-2");
104         commonMapper.put("sr", "ISO-8859-5");
105         commonMapper.put("sv", "ISO-8859-1");
106         commonMapper.put("tr", "ISO-8859-9");
107         commonMapper.put("uk", "ISO-8859-5");
108         commonMapper.put("zh", "GB2312");
109         commonMapper.put("zh_TW", "Big5");
110     }
111 
112     /***
113      * An array of available charset mappers.
114      */
115     private Map mappers[] = new Map[6];
116 
117     /***
118      * Loads mappings from a stream.
119      *
120      * @param input an input stream.
121      * @return the mappings.
122      * @throws IOException for an incorrect stream.
123      */
124     protected static Map loadStream(InputStream input)
125             throws IOException
126     {
127         Properties props = new Properties();
128         props.load(input);
129         return new HashMap(props);
130     }
131 
132     /***
133      * Loads mappings from a file.
134      *
135      * @param file a file.
136      * @return the mappings.
137      * @throws IOException for an incorrect file.
138      */
139     protected static Map loadFile(File file)
140             throws IOException
141     {
142         return loadStream(new FileInputStream(file));
143     }
144 
145     /***
146      * Loads mappings from a file path.
147      *
148      * @param path a file path.
149      * @return the mappings.
150      * @throws IOException for an incorrect file.
151      */
152     protected static Map loadPath(String path)
153             throws IOException
154     {
155         return loadFile(new File(path));
156     }
157 
158     /***
159      * Loads mappings from a resource.
160      *
161      * @param name a resource name.
162      * @return the mappings.
163      */
164     protected static Map loadResource(String name)
165     {
166         InputStream input = CharSetMap.class.getResourceAsStream(name);
167         if (input != null)
168         {
169             try
170             {
171                 return loadStream(input);
172             }
173             catch (IOException x)
174             {
175                 return null;
176             }
177         }
178         else
179         {
180             return null;
181         }
182     }
183 
184     /***
185      * Constructs a new charset map with default mappers.
186      */
187     public CharSetMap()
188     {
189         String path;
190         try
191         {
192             // Check whether the user directory contains mappings.
193             path = System.getProperty("user.home");
194             if (path != null)
195             {
196                 path = path + File.separator + CHARSET_RESOURCE;
197                 mappers[MAP_HOME] = loadPath(path);
198             }
199         }
200         catch (Exception x)
201         {
202         }
203 
204         try
205         {
206             // Check whether the system directory contains mappings.
207             path = System.getProperty("java.home") +
208                     File.separator + "lib" + File.separator + CHARSET_RESOURCE;
209             mappers[MAP_SYS] = loadPath(path);
210         }
211         catch (Exception x)
212         {
213         }
214 
215         // Check whether the current class jar contains mappings.
216         mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
217 
218         // Set the common mapper to have the lowest priority.
219         mappers[MAP_COM] = commonMapper;
220 
221         // Set the cache mapper to have the highest priority.
222         mappers[MAP_CACHE] = new Hashtable();
223     }
224 
225     /***
226      * Contructs a charset map from properties.
227      *
228      * @param props charset mapping propeties.
229      */
230     public CharSetMap(Properties props)
231     {
232         this();
233         mappers[MAP_PROG] = new HashMap(props);
234     }
235 
236     /***
237      * Contructs a charset map read from a stream.
238      *
239      * @param input an input stream.
240      * @throws IOException for an incorrect stream.
241      */
242     public CharSetMap(InputStream input)
243             throws IOException
244     {
245         this();
246         mappers[MAP_PROG] = loadStream(input);
247     }
248 
249     /***
250      * Contructs a charset map read from a property file.
251      *
252      * @param file a property file.
253      * @throws IOException for an incorrect property file.
254      */
255     public CharSetMap(File file)
256             throws IOException
257     {
258         this();
259         mappers[MAP_PROG] = loadFile(file);
260     }
261 
262     /***
263      * Contructs a charset map read from a property file path.
264      *
265      * @param path a property file path.
266      * @throws IOException for an incorrect property file.
267      */
268     public CharSetMap(String path)
269             throws IOException
270     {
271         this();
272         mappers[MAP_PROG] = loadPath(path);
273     }
274 
275     /***
276      * Sets a locale-charset mapping.
277      *
278      * @param key the key for the charset.
279      * @param charset the corresponding charset.
280      */
281     public synchronized void setCharSet(String key,
282                                         String charset)
283     {
284         HashMap mapper = (HashMap) mappers[MAP_PROG];
285         mapper = mapper != null ?
286                 (HashMap) mapper.clone() : new HashMap();
287         mapper.put(key, charset);
288         mappers[MAP_PROG] = mapper;
289         mappers[MAP_CACHE].clear();
290     }
291 
292     /***
293      * Gets the charset for a locale. First a locale specific charset
294      * is searched for, then a country specific one and lastly a language
295      * specific one. If none is found, the default charset is returned.
296      *
297      * @param locale the locale.
298      * @return the charset.
299      */
300     public String getCharSet(Locale locale)
301     {
302         // Check the cache first.
303         String key = locale.toString();
304         if (key.length() == 0)
305         {
306             key = "__" + locale.getVariant();
307             if (key.length() == 2)
308             {
309                 return DEFAULT_CHARSET;
310             }
311         }
312         String charset = searchCharSet(key);
313         if (charset.length() == 0)
314         {
315             // Not found, perform a full search and update the cache.
316             String[] items = new String[3];
317             items[2] = locale.getVariant();
318             items[1] = locale.getCountry();
319             items[0] = locale.getLanguage();
320             charset = searchCharSet(items);
321             if (charset.length() == 0)
322             {
323                 charset = DEFAULT_CHARSET;
324             }
325             mappers[MAP_CACHE].put(key, charset);
326         }
327         return charset;
328     }
329 
330     /***
331      * Gets the charset for a locale with a variant. The search
332      * is performed in the following order:
333      * "lang"_"country"_"variant"="charset",
334      * _"counry"_"variant"="charset",
335      * "lang"__"variant"="charset",
336      * __"variant"="charset",
337      * "lang"_"country"="charset",
338      * _"country"="charset",
339      * "lang"="charset".
340      * If nothing of the above is found, the default charset is returned.
341      *
342      * @param locale the locale.
343      * @param variant a variant field.
344      * @return the charset.
345      */
346     public String getCharSet(Locale locale,
347                              String variant)
348     {
349         // Check the cache first.
350         if ((variant != null) &&
351                 (variant.length() > 0))
352         {
353             String key = locale.toString();
354             if (key.length() == 0)
355             {
356                 key = "__" + locale.getVariant();
357                 if (key.length() > 2)
358                 {
359                     key += '_' + variant;
360                 }
361                 else
362                 {
363                     key += variant;
364                 }
365             }
366             else if (locale.getCountry().length() == 0)
367             {
368                 key += "__" + variant;
369             }
370             else
371             {
372                 key += '_' + variant;
373             }
374             String charset = searchCharSet(key);
375             if (charset.length() == 0)
376             {
377                 // Not found, perform a full search and update the cache.
378                 String[] items = new String[4];
379                 items[3] = variant;
380                 items[2] = locale.getVariant();
381                 items[1] = locale.getCountry();
382                 items[0] = locale.getLanguage();
383                 charset = searchCharSet(items);
384                 if (charset.length() == 0)
385                 {
386                     charset = DEFAULT_CHARSET;
387                 }
388                 mappers[MAP_CACHE].put(key, charset);
389             }
390             return charset;
391         }
392         else
393         {
394             return getCharSet(locale);
395         }
396     }
397 
398     /***
399      * Gets the charset for a specified key.
400      *
401      * @param key the key for the charset.
402      * @return the found charset or the default one.
403      */
404     public String getCharSet(String key)
405     {
406         String charset = searchCharSet(key);
407         return charset.length() > 0 ? charset : DEFAULT_CHARSET;
408     }
409 
410     /***
411      * Gets the charset for a specified key.
412      *
413      * @param key the key for the charset.
414      * @param def the default charset if none is found.
415      * @return the found charset or the given default.
416      */
417     public String getCharSet(String key,
418                              String def)
419     {
420         String charset = searchCharSet(key);
421         return charset.length() > 0 ? charset : def;
422     }
423 
424     /***
425      * Searches for a charset for a specified locale.
426      *
427      * @param items an array of locale items.
428      * @return the found charset or an empty string.
429      */
430     private String searchCharSet(String[] items)
431     {
432         String charset;
433         StringBuffer sb = new StringBuffer();
434         for (int i = items.length; i > 0; i--)
435         {
436             charset = searchCharSet(items, sb, i);
437             if (charset.length() > 0)
438             {
439                 return charset;
440             }
441             sb.setLength(0);
442         }
443         return "";
444     }
445 
446     /***
447      * Searches recursively for a charset for a specified locale.
448      *
449      * @param items an array of locale items.
450      * @param base a buffer of base items.
451      * @param count the number of items to go through.
452      * @return the found charset or an empty string.
453      */
454     private String searchCharSet(String[] items,
455                                  StringBuffer base,
456                                  int count)
457     {
458         if ((--count >= 0) &&
459                 (items[count] != null) &&
460                 (items[count].length() > 0))
461         {
462             String charset;
463             base.insert(0, items[count]);
464             int length = base.length();
465             for (int i = count; i > 0; i--)
466             {
467                 if ((i == count) ||
468                         (i <= 1))
469                 {
470                     base.insert(0, '_');
471                     length++;
472                 }
473                 charset = searchCharSet(items, base, i);
474                 if (charset.length() > 0)
475                 {
476                     return charset;
477                 }
478                 base.delete(0, base.length() - length);
479             }
480             return searchCharSet(base.toString());
481         }
482         else
483         {
484             return "";
485         }
486     }
487 
488     /***
489      * Searches for a charset for a specified key.
490      *
491      * @param key the key for the charset.
492      * @return the found charset or an empty string.
493      */
494     private String searchCharSet(String key)
495     {
496         if ((key != null) &&
497                 (key.length() > 0))
498         {
499             // Go through mappers.
500             Map mapper;
501             String charset;
502             for (int i = 0; i < mappers.length; i++)
503             {
504                 mapper = mappers[i];
505                 if (mapper != null)
506                 {
507                     charset = (String) mapper.get(key);
508                     if (charset != null)
509                     {
510                         // Update the cache.
511                         if (i > MAP_CACHE)
512                         {
513                             mappers[MAP_CACHE].put(key, charset);
514                         }
515                         return charset;
516                     }
517                 }
518             }
519 
520             // Not found, add an empty string to the cache.
521             mappers[MAP_CACHE].put(key, "");
522         }
523         return "";
524     }
525 
526     /***
527      * Sets a common locale-charset mapping.
528      *
529      * @param key the key for the charset.
530      * @param charset the corresponding charset.
531      */
532     protected synchronized void setCommonCharSet(String key,
533                                                  String charset)
534     {
535         HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
536         mapper.put(key, charset);
537         mappers[MAP_COM] = mapper;
538         mappers[MAP_CACHE].clear();
539     }
540 }