diff --git a/bibtexbrowser-documentation.wiki b/bibtexbrowser-documentation.wiki index 4a805b0..285e447 100755 --- a/bibtexbrowser-documentation.wiki +++ b/bibtexbrowser-documentation.wiki @@ -41,7 +41,7 @@ This documentation is collaborative, you can improve it using a [[https://github * bibtexbrowser can display all entries with a bib keyword e.g. "?keywords=components". It matches against the "keywords" bibtex field. [[http://bibtexbrowser.sourceforge.net/bibtexbrowser.php?bib=metrics.bib&keywords=components|demo]] * bibtexbrowser outputs valid XHTML 1.0 Transitional * bibtexbrowser can display all entries for an author [[http://www.monperrus.net/martin/bibtexbrowser.php?bib=metrics.bib&author=Barbara+A.+Kitchenham|demo]] -* bibtexbrowser can be used with different encodings (change the default iso-8859-1 encoding if your bib file is in UTF-8 ''define('ENCODING','UTF-8')'' ) +* bibtexbrowser can be used with different encodings (change the default iso-8859-1 encoding if your bib file is in UTF-8 ''define('BIBTEX_INPUT_ENCODING','UTF-8')'' ) =====Download===== @@ -202,10 +202,8 @@ You can use your personalized function to add support for new fields in bibtex ( ====How to specify the encoding of bibtex files (UTF-8/ISO-8859-1/etc.)? ==== - - By default, bibtexbrowser assumes that the bibtex file is UTF-8 encoded. If you want to change it to e.g. ISO-8859-1, add into ''bibtexbrowser.local.php'': -
define('ENCODING','ISO-8859-1');
+
define('BIBTEX_INPUT_ENCODING','ISO-8859-1');
Note that if the bibtex only contains latex-encoded diacritics (e.g. ''\'e''), it does not matter. The encoding of the bibtex file and the one of the generated HTML is identical. @@ -410,6 +408,15 @@ class PgmDispatcher extends Dispatcher { ?> +====How to remove fields from the bibtex text?==== + +If you want to remove some fields (e.g. abbrv and comment) from the resulting bibtex, add in ''bibtexbrowser.local.php'': +
+define('BIBTEXBROWSER_BIBTEX_VIEW','reconstructed');
+define('BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT','abbrv|comment');
+
+ + =====Related tools===== Old-fashioned: diff --git a/bibtexbrowser.php b/bibtexbrowser.php index 9c48240..6d027e7 100755 --- a/bibtexbrowser.php +++ b/bibtexbrowser.php @@ -23,7 +23,7 @@ define('BIBTEXBROWSER','v__GITHUB__'); // support for configuration // set with bibtexbrowser_configure, get with config_value -// you may have bibtexbrowser_configure('ENCODING', 'latin1') in bibtexbrowser.local.php +// you may have bibtexbrowser_configure('foo', 'bar') in bibtexbrowser.local.php global $CONFIGURATION; $CONFIGURATION = array(); function bibtexbrowser_configure($key, $value) { @@ -37,10 +37,10 @@ function bibtexbrowser_configure($key, $value) { // the changes that require existing bibtexbrowser symbols should be in bibtexbrowser.after.php (included at the end of this file) @include(preg_replace('/\.php$/','.local.php',__FILE__)); -// there is no encoding transformation from the bibtex file to the html file -// if your bibtex file contains 8 bits characters in utf-8 -// change the following parameter -@define('ENCODING','UTF-8');//@define('ENCODING','iso-8859-1');//define('ENCODING','windows-1252'); +// the encoding of your bibtex file +@define('BIBTEX_INPUT_ENCODING','UTF-8');//@define('BIBTEX_INPUT_ENCODING','iso-8859-1');//define('BIBTEX_INPUT_ENCODING','windows-1252'); +// the encoding of the HTML output +@define('OUTPUT_ENCODING','UTF-8'); // number of bib items per page // we use the same parameter 'num' as Google @define('PAGE_SIZE',isset($_GET['num'])?(preg_match('/^\d+$/',$_GET['num'])?$_GET['num']:10000):14); @@ -126,6 +126,14 @@ function bibtexbrowser_configure($key, $value) { // for ordered_list, the index is given by HTML directly (in increasing order) @define('BIBTEXBROWSER_LAYOUT','table'); +// should the original bibtex be displayed or a reconstructed one with filtering +// values: original/reconstructed +// warning, with reconstructed, the latex markup for accents/diacritics is lost +@define('BIBTEXBROWSER_BIBTEX_VIEW','original'); +// a list of fields that will not be shown in the bibtex view if BIBTEXBROWSER_BIBTEX_VIEW=reconstructed +@define('BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT','comment|note|file'); + + // Which is the first html level that should be used in embedded mode? @define('BIBTEXBROWSER_HTMLHEADINGLEVEL', 2); @@ -655,7 +663,7 @@ see snippet of [[#StateBasedBibParser]] class XMLPrettyPrinter { function beginFile() { header('Content-type: text/xml;'); - print ''; + print ''; print ''; } @@ -987,7 +995,7 @@ function latex2html($line) { /** encodes strings for Z3988 URLs. Note that & are encoded as %26 and not as &. */ function s3988($s) { // first remove the HTML entities (e.g. é) then urlencode them - return urlencode(html_entity_decode($s, ENT_NOQUOTES, ENCODING)); + return urlencode($s); } /** @@ -1091,8 +1099,20 @@ class BibEntry { // but instead could contain HTML code (with links using the character "~" for example) // so "comment" is not transformed too if ($name!='url' && $name!='comment') { + // 1. trim space $value = xtrim($value); + + // 2. transform Latex markup to HTML entities (easier than a one to one mapping to each character) + // HTML entity is an intermediate format $value = latex2html($value); + + // 3. transform existing encoded character in the new format + if (function_exists('mb_convert_encoding') && OUTPUT_ENCODING != BIBTEX_INPUT_ENCODING) { + $vaue = mb_convert_encoding($value, OUTPUT_ENCODING, BIBTEX_INPUT_ENCODING); + } + + // 4. transform to the target output encoding + $value = html_entity_decode($value, ENT_QUOTES|ENT_XHTML, OUTPUT_ENCODING); } else { //echo "xx".$value."xx\n"; } @@ -1492,9 +1512,25 @@ class BibEntry { return $this; } - function getText() { + /** Returns the verbatim text of this bib entry. */ - return $this->text; + function getText() { + if (BIBTEXBROWSER_BIBTEX_VIEW == 'original') { + return $this->text; + } + if (BIBTEXBROWSER_BIBTEX_VIEW == 'reconstructed') { + $result = '@'.$this->getType().'{'.$this->getKey().",\n"; + foreach ($this->fields as $k=>$v) { + if ( !preg_match('/^('.BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT.')$/i', $k) + && !preg_match('/^(key|'.Q_INNER_AUTHOR.'|'.Q_INNER_TYPE.')$/i', $k) ) + { + $result .= ' '.$k.' = {'.$v.'},'."\n"; + } + } + $result .= "}\n"; + return $result; + } + throw new Exception('incorrect value of BIBTEXBROWSER_BIBTEX_VIEW: '+BIBTEXBROWSER_BIBTEX_VIEW); } /** Returns true if this bib entry contains the given phrase (PREG regexp) @@ -1502,15 +1538,10 @@ class BibEntry { * Note that this method is NOT case sensitive */ function hasPhrase($phrase, $field = null) { - // 2010-01-25 - // bug found by jacob kellner - // we have to search in the formatted fileds and not in the raw entry + // we have to search in the formatted fields and not in the raw entry // i.e. all latex markups are not considered for searches - // i.e. added join(" ",$this->getFields()) - // and html_entity_decode if (!$field) { - // warning html_entity_decode supports encoding since PHP5 - return preg_match('/'.$phrase.'/i',$this->getConstants().' '.@html_entity_decode(join(" ",$this->getFields()),ENT_NOQUOTES,ENCODING)); + return preg_match('/'.$phrase.'/i',$this->getConstants().' '.join(" ",$this->getFields())); //return stripos($this->getText(), $phrase) !== false; } if ($this->hasField($field) && (preg_match('/'.$phrase.'/i',$this->getField($field)) ) ) { @@ -1659,7 +1690,7 @@ class BibEntry { function toEntryUnformatted() { $result = ""; $result .= '
'; // pre is nice when it is embedded with no CSS available
-    $entry = htmlspecialchars($this->getFullText());
+    $entry = htmlspecialchars($this->getFullText(),ENT_NOQUOTES|ENT_XHTML, OUTPUT_ENCODING);
 
     // Fields that should be hyperlinks
     // the order matters
@@ -2717,7 +2748,7 @@ function query2title(&$query) {
 	}
 	$v = join($v, ',');
       }
-      $headers[$k] = __(ucwords($k)).': '.ucwords(htmlspecialchars($v));
+      $headers[$k] = __(ucwords($k)).': '.ucwords(htmlspecialchars($v,ENT_NOQUOTES|ENT_XHTML, OUTPUT_ENCODING));
   }
   return join(' & ',$headers);
 }
@@ -3796,13 +3827,13 @@ function HTMLTemplate(&$content) {
 
 // when we load a page with AJAX
 // the HTTP header is taken into account, not the 
-header('Content-type: text/html; charset='.ENCODING);
+header('Content-type: text/html; charset='.OUTPUT_ENCODING);
 echo ''."\n";
 
 ?>
 
 
-
+
 
 getRSS()!='') echo '';
@@ -3900,10 +3931,10 @@ class BibtexDisplay {
   function setWrapper($x) { $x->wrapper = 'NoWrapper'; }
 
   function display() {
-    header('Content-type: text/plain; charset='.ENCODING);
+    header('Content-type: text/plain; charset='.OUTPUT_ENCODING);
     echo '% generated by bibtexbrowser '."\n";
     echo '% '.@$this->title."\n";
-    echo '% Encoding: '.ENCODING."\n";
+    echo '% Encoding: '.OUTPUT_ENCODING."\n";
     foreach($this->entries as $bibentry) { echo $bibentry->getText()."\n"; }
     exit;
   }
@@ -4022,15 +4053,12 @@ class RSSDisplay {
   function setTitle($title) { $this->title = $title; return $this; }
 
   /** tries to always output a valid XML/RSS string
-    * based on ENCODING, HTML tags, and the transformations
+    * based on OUTPUT_ENCODING, HTML tags, and the transformations
     * that happened in latex2html */
   function text2rss($desc) {
     // first strip HTML tags
     $desc = strip_tags($desc);
 
-    // then decode characters encoded by latex2html, preserve ENCODING
-    $desc = html_entity_decode($desc, ENT_COMPAT, ENCODING);
-
     // some entities may still be here, we remove them
     // we replace html entities e.g. é by nothing
     // however XML entities are kept (e.g. 5)
@@ -4044,8 +4072,8 @@ class RSSDisplay {
 
     // final test with encoding:
     if (function_exists('mb_check_encoding')) { // (PHP 4 >= 4.4.3, PHP 5 >= 5.1.3)
-      if (!mb_check_encoding($desc,ENCODING)) {
-        return 'encoding error: please check the content of ENCODING';
+      if (!mb_check_encoding($desc,OUTPUT_ENCODING,BIBTEX_INPUT_ENCODING)) {
+        return 'encoding error: please check the content of OUTPUT_ENCODING';
       }
     }
 
@@ -4061,7 +4089,7 @@ class RSSDisplay {
 
   function display() {
     header('Content-type: application/rss+xml');
-    echo '';
+    echo '';
 //
 
 ?>
@@ -4082,7 +4110,6 @@ class RSSDisplay {
          
           text2rss(bib2html($bibentry)."\n".$bibentry->getAbstract());
           ?>
           
@@ -4259,7 +4286,7 @@ class Dispatcher {
   }
 
   function search() {
-    if (preg_match('/utf-?8/i',ENCODING)) {
+    if (preg_match('/utf-?8/i',OUTPUT_ENCODING)) {
       $_GET[Q_SEARCH] = urldecode($_GET[Q_SEARCH]);
     }
     $this->query[Q_SEARCH]=$_GET[Q_SEARCH];
@@ -4437,7 +4464,7 @@ class Dispatcher {
     
     
     
-    
+    
     You are browsing <?php echo $_GET[Q_FILE]; ?> with bibtexbrowser