@ -23,7 +23,7 @@ define('BIBTEXBROWSER','v__GITHUB__');
// support for configuration
// set with bibtexbrowser_configure, get with config_value
// you may have bibtexbrowser_configure('ENCODING', 'latin1 ') in bibtexbrowser.local.php
// you may have bibtexbrowser_configure('foo', 'bar ') in bibtexbrowser.local.php
global $CONFIGURATION ;
$CONFIGURATION = array ();
function bibtexbrowser_configure ( $key , $value ) {
@ -37,10 +37,10 @@ function bibtexbrowser_configure($key, $value) {
// the changes that require existing bibtexbrowser symbols should be in bibtexbrowser.after.php (included at the end of this file)
@ include ( preg_replace ( '/\.php$/' , '.local.php' , __FILE__ ));
// there is no encoding transformation from the bibtex file to the html file
// if your bibtex file contains 8 bits characters in utf-8
// change the following parameter
@ define ( 'ENCODING' , 'UTF-8' ); //@define('ENCODING','iso-8859-1');//define('ENCODING','windows-1252');
// the encoding of your bibtex file
@ define ( 'BIBTEX_INPUT_ENCODING' , 'UTF-8' ); //@define('BIBTEX_INPUT_ENCODING','iso-8859-1');//define('BIBTEX_INPUT_ENCODING','windows-1252');
// the encoding of the HTML output
@ define ( 'OUTPUT_ ENCODING' , 'UTF-8' );
// number of bib items per page
// we use the same parameter 'num' as Google
@ define ( 'PAGE_SIZE' , isset ( $_GET [ 'num' ]) ? ( preg_match ( '/^\d+$/' , $_GET [ 'num' ]) ? $_GET [ 'num' ] : 10000 ) : 14 );
@ -126,6 +126,14 @@ function bibtexbrowser_configure($key, $value) {
// for ordered_list, the index is given by HTML directly (in increasing order)
@ define ( 'BIBTEXBROWSER_LAYOUT' , 'table' );
// should the original bibtex be displayed or a reconstructed one with filtering
// values: original/reconstructed
// warning, with reconstructed, the latex markup for accents/diacritics is lost
@ define ( 'BIBTEXBROWSER_BIBTEX_VIEW' , 'original' );
// a list of fields that will not be shown in the bibtex view if BIBTEXBROWSER_BIBTEX_VIEW=reconstructed
@ define ( 'BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT' , 'comment|note|file' );
// Which is the first html <hN> level that should be used in embedded mode?
@ define ( 'BIBTEXBROWSER_HTMLHEADINGLEVEL' , 2 );
@ -655,7 +663,7 @@ see snippet of [[#StateBasedBibParser]]
class XMLPrettyPrinter {
function beginFile () {
header ( 'Content-type: text/xml;' );
print '<?xml version="1.0" encoding="' . ENCODING . '"?>' ;
print '<?xml version="1.0" encoding="' . OUTPUT_ ENCODING. '"?>' ;
print '<bibfile>' ;
}
@ -987,7 +995,7 @@ function latex2html($line) {
/** encodes strings for Z3988 URLs. Note that & are encoded as %26 and not as &. */
function s3988 ( $s ) {
// first remove the HTML entities (e.g. é) then urlencode them
return urlencode ( html_entity_decode ( $s , ENT_NOQUOTES , ENCODING ) );
return urlencode ( $s );
}
/**
@ -1091,8 +1099,20 @@ class BibEntry {
// but instead could contain HTML code (with links using the character "~" for example)
// so "comment" is not transformed too
if ( $name != 'url' && $name != 'comment' ) {
// 1. trim space
$value = xtrim ( $value );
// 2. transform Latex markup to HTML entities (easier than a one to one mapping to each character)
// HTML entity is an intermediate format
$value = latex2html ( $value );
// 3. transform existing encoded character in the new format
if ( function_exists ( 'mb_convert_encoding' ) && OUTPUT_ENCODING != BIBTEX_INPUT_ENCODING ) {
$vaue = mb_convert_encoding ( $value , OUTPUT_ENCODING , BIBTEX_INPUT_ENCODING );
}
// 4. transform to the target output encoding
$value = html_entity_decode ( $value , ENT_QUOTES | ENT_XHTML , OUTPUT_ENCODING );
} else {
//echo "xx".$value."xx\n";
}
@ -1492,9 +1512,25 @@ class BibEntry {
return $this ;
}
function getText () {
/** Returns the verbatim text of this bib entry. */
return $this -> text ;
function getText () {
if ( BIBTEXBROWSER_BIBTEX_VIEW == 'original' ) {
return $this -> text ;
}
if ( BIBTEXBROWSER_BIBTEX_VIEW == 'reconstructed' ) {
$result = '@' . $this -> getType () . '{' . $this -> getKey () . " , \n " ;
foreach ( $this -> fields as $k => $v ) {
if ( ! preg_match ( '/^(' . BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT . ')$/i' , $k )
&& ! preg_match ( '/^(key|' . Q_INNER_AUTHOR . '|' . Q_INNER_TYPE . ')$/i' , $k ) )
{
$result .= ' ' . $k . ' = {' . $v . '},' . " \n " ;
}
}
$result .= " } \n " ;
return $result ;
}
throw new Exception ( 'incorrect value of BIBTEXBROWSER_BIBTEX_VIEW: ' + BIBTEXBROWSER_BIBTEX_VIEW );
}
/** Returns true if this bib entry contains the given phrase ( PREG regexp )
@ -1502,15 +1538,10 @@ class BibEntry {
* Note that this method is NOT case sensitive */
function hasPhrase ( $phrase , $field = null ) {
// 2010-01-25
// bug found by jacob kellner
// we have to search in the formatted fileds and not in the raw entry
// we have to search in the formatted fields and not in the raw entry
// i.e. all latex markups are not considered for searches
// i.e. added join(" ",$this->getFields())
// and html_entity_decode
if ( ! $field ) {
// warning html_entity_decode supports encoding since PHP5
return preg_match ( '/' . $phrase . '/i' , $this -> getConstants () . ' ' .@ html_entity_decode ( join ( " " , $this -> getFields ()), ENT_NOQUOTES , ENCODING ));
return preg_match ( '/' . $phrase . '/i' , $this -> getConstants () . ' ' . join ( " " , $this -> getFields ()));
//return stripos($this->getText(), $phrase) !== false;
}
if ( $this -> hasField ( $field ) && ( preg_match ( '/' . $phrase . '/i' , $this -> getField ( $field )) ) ) {
@ -1659,7 +1690,7 @@ class BibEntry {
function toEntryUnformatted () {
$result = " " ;
$result .= '<pre class="purebibtex">' ; // pre is nice when it is embedded with no CSS available
$entry = htmlspecialchars ( $this -> getFullText ());
$entry = htmlspecialchars ( $this -> getFullText (), ENT_NOQUOTES | ENT_XHTML , OUTPUT_ENCODING );
// Fields that should be hyperlinks
// the order matters
@ -2717,7 +2748,7 @@ function query2title(&$query) {
}
$v = join ( $v , ',' );
}
$headers [ $k ] = __ ( ucwords ( $k )) . ': ' . ucwords ( htmlspecialchars ( $v ));
$headers [ $k ] = __ ( ucwords ( $k )) . ': ' . ucwords ( htmlspecialchars ( $v , ENT_NOQUOTES | ENT_XHTML , OUTPUT_ENCODING ));
}
return join ( ' & ' , $headers );
}
@ -3796,13 +3827,13 @@ function HTMLTemplate(&$content) {
// when we load a page with AJAX
// the HTTP header is taken into account, not the <meta http-equiv>
header ( 'Content-type: text/html; charset=' . ENCODING );
header ( 'Content-type: text/html; charset=' . OUTPUT_ ENCODING);
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' . " \n " ;
?>
< html xmlns = " http://www.w3.org/1999/xhtml " >
< head >
< meta http - equiv = " Content-Type " content = " text/html; charset=<?php echo ENCODING ?> " />
< meta http - equiv = " Content-Type " content = " text/html; charset=<?php echo OUTPUT_ ENCODING ?> " />
< meta name = " generator " content = " bibtexbrowser v__GITHUB__ " />
< ? php
// if ($content->getRSS()!='') echo '<link rel="alternate" type="application/rss+xml" title="RSS" href="'.$content->getRSS().'&rss" />';
@ -3900,10 +3931,10 @@ class BibtexDisplay {
function setWrapper ( $x ) { $x -> wrapper = 'NoWrapper' ; }
function display () {
header ( 'Content-type: text/plain; charset=' . ENCODING );
header ( 'Content-type: text/plain; charset=' . OUTPUT_ ENCODING);
echo '% generated by bibtexbrowser <http://www.monperrus.net/martin/bibtexbrowser/>' . " \n " ;
echo '% ' .@ $this -> title . " \n " ;
echo '% Encoding: ' . ENCODING . " \n " ;
echo '% Encoding: ' . OUTPUT_ ENCODING. " \n " ;
foreach ( $this -> entries as $bibentry ) { echo $bibentry -> getText () . " \n " ; }
exit ;
}
@ -4022,15 +4053,12 @@ class RSSDisplay {
function setTitle ( $title ) { $this -> title = $title ; return $this ; }
/** tries to always output a valid XML / RSS string
* based on ENCODING , HTML tags , and the transformations
* based on OUTPUT_ ENCODING, HTML tags , and the transformations
* that happened in latex2html */
function text2rss ( $desc ) {
// first strip HTML tags
$desc = strip_tags ( $desc );
// then decode characters encoded by latex2html, preserve ENCODING
$desc = html_entity_decode ( $desc , ENT_COMPAT , ENCODING );
// some entities may still be here, we remove them
// we replace html entities e.g. é by nothing
// however XML entities are kept (e.g. 5)
@ -4044,8 +4072,8 @@ class RSSDisplay {
// final test with encoding:
if ( function_exists ( 'mb_check_encoding' )) { // (PHP 4 >= 4.4.3, PHP 5 >= 5.1.3)
if ( ! mb_check_encoding ( $desc , ENCODING )) {
return 'encoding error: please check the content of ENCODING' ;
if ( ! mb_check_encoding ( $desc , OUTPUT_ENCODING , BIBTEX_INPUT_ ENCODING)) {
return 'encoding error: please check the content of OUTPUT_ ENCODING' ;
}
}
@ -4061,7 +4089,7 @@ class RSSDisplay {
function display () {
header ( 'Content-type: application/rss+xml' );
echo '<?xml version="1.0" encoding="' . ENCODING . '"?>' ;
echo '<?xml version="1.0" encoding="' . OUTPUT_ ENCODING. '"?>' ;
//
?>
@ -4082,7 +4110,6 @@ class RSSDisplay {
< description >
< ? php
// we are in XML, so we cannot have HTML entitites
// however the encoding is specified in preamble
echo $this -> text2rss ( bib2html ( $bibentry ) . " \n " . $bibentry -> getAbstract ());
?>
</ description >
@ -4259,7 +4286,7 @@ class Dispatcher {
}
function search () {
if ( preg_match ( '/utf-?8/i' , ENCODING )) {
if ( preg_match ( '/utf-?8/i' , OUTPUT_ ENCODING)) {
$_GET [ Q_SEARCH ] = urldecode ( $_GET [ Q_SEARCH ]);
}
$this -> query [ Q_SEARCH ] = $_GET [ Q_SEARCH ];
@ -4437,7 +4464,7 @@ class Dispatcher {
< html xmlns = " http://www.w3.org/1999/xhtml " >
< head >
< meta name = " generator " content = " bibtexbrowser v__GITHUB__ " />
< meta http - equiv = " Content-Type " content = " text/html; charset=<?php echo ENCODING ?> " />
< meta http - equiv = " Content-Type " content = " text/html; charset=<?php echo OUTPUT_ ENCODING ?> " />
< title > You are browsing < ? php echo $_GET [ Q_FILE ]; ?> with bibtexbrowser</title>
</ head >
< frameset cols = " 15%,* " >