Browse Source

adds support for filtering the bibtex

pull/48/head
Martin Monperrus 10 years ago
parent
commit
a535ef30bc
  1. 15
      bibtexbrowser-documentation.wiki
  2. 89
      bibtexbrowser.php

15
bibtexbrowser-documentation.wiki

@ -41,7 +41,7 @@ This documentation is collaborative, you can improve it using a [[https://github
* bibtexbrowser can display all entries with a bib keyword e.g. "?keywords=components". It matches against the "keywords" bibtex field. [[http://bibtexbrowser.sourceforge.net/bibtexbrowser.php?bib=metrics.bib&keywords=components|demo]] * bibtexbrowser can display all entries with a bib keyword e.g. "?keywords=components". It matches against the "keywords" bibtex field. [[http://bibtexbrowser.sourceforge.net/bibtexbrowser.php?bib=metrics.bib&keywords=components|demo]]
* bibtexbrowser outputs valid XHTML 1.0 Transitional * bibtexbrowser outputs valid XHTML 1.0 Transitional
* bibtexbrowser can display all entries for an author [[http://www.monperrus.net/martin/bibtexbrowser.php?bib=metrics.bib&author=Barbara+A.+Kitchenham|demo]] * bibtexbrowser can display all entries for an author [[http://www.monperrus.net/martin/bibtexbrowser.php?bib=metrics.bib&author=Barbara+A.+Kitchenham|demo]]
* bibtexbrowser can be used with different encodings (change the default iso-8859-1 encoding if your bib file is in UTF-8 ''define('ENCODING','UTF-8')'' )
* bibtexbrowser can be used with different encodings (change the default iso-8859-1 encoding if your bib file is in UTF-8 ''define('BIBTEX_INPUT_ENCODING','UTF-8')'' )
=====Download===== =====Download=====
@ -202,10 +202,8 @@ You can use your personalized function to add support for new fields in bibtex (
====How to specify the encoding of bibtex files (UTF-8/ISO-8859-1/etc.)? ==== ====How to specify the encoding of bibtex files (UTF-8/ISO-8859-1/etc.)? ====
By default, bibtexbrowser assumes that the bibtex file is UTF-8 encoded. If you want to change it to e.g. ISO-8859-1, add into ''bibtexbrowser.local.php'': By default, bibtexbrowser assumes that the bibtex file is UTF-8 encoded. If you want to change it to e.g. ISO-8859-1, add into ''bibtexbrowser.local.php'':
<pre>define('ENCODING','ISO-8859-1');</pre>
<pre>define('BIBTEX_INPUT_ENCODING','ISO-8859-1');</pre>
Note that if the bibtex only contains latex-encoded diacritics (e.g. ''\'e''), it does not matter. The encoding of the bibtex file and the one of the generated HTML is identical. Note that if the bibtex only contains latex-encoded diacritics (e.g. ''\'e''), it does not matter. The encoding of the bibtex file and the one of the generated HTML is identical.
@ -410,6 +408,15 @@ class PgmDispatcher extends Dispatcher {
?> ?>
</pre> </pre>
====How to remove fields from the bibtex text?====
If you want to remove some fields (e.g. abbrv and comment) from the resulting bibtex, add in ''bibtexbrowser.local.php'':
<pre>
define('BIBTEXBROWSER_BIBTEX_VIEW','reconstructed');
define('BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT','abbrv|comment');
</pre>
=====Related tools===== =====Related tools=====
Old-fashioned: Old-fashioned:

89
bibtexbrowser.php

@ -23,7 +23,7 @@ define('BIBTEXBROWSER','v__GITHUB__');
// support for configuration // support for configuration
// set with bibtexbrowser_configure, get with config_value // set with bibtexbrowser_configure, get with config_value
// you may have bibtexbrowser_configure('ENCODING', 'latin1') in bibtexbrowser.local.php
// you may have bibtexbrowser_configure('foo', 'bar') in bibtexbrowser.local.php
global $CONFIGURATION; global $CONFIGURATION;
$CONFIGURATION = array(); $CONFIGURATION = array();
function bibtexbrowser_configure($key, $value) { function bibtexbrowser_configure($key, $value) {
@ -37,10 +37,10 @@ function bibtexbrowser_configure($key, $value) {
// the changes that require existing bibtexbrowser symbols should be in bibtexbrowser.after.php (included at the end of this file) // the changes that require existing bibtexbrowser symbols should be in bibtexbrowser.after.php (included at the end of this file)
@include(preg_replace('/\.php$/','.local.php',__FILE__)); @include(preg_replace('/\.php$/','.local.php',__FILE__));
// there is no encoding transformation from the bibtex file to the html file
// if your bibtex file contains 8 bits characters in utf-8
// change the following parameter
@define('ENCODING','UTF-8');//@define('ENCODING','iso-8859-1');//define('ENCODING','windows-1252');
// the encoding of your bibtex file
@define('BIBTEX_INPUT_ENCODING','UTF-8');//@define('BIBTEX_INPUT_ENCODING','iso-8859-1');//define('BIBTEX_INPUT_ENCODING','windows-1252');
// the encoding of the HTML output
@define('OUTPUT_ENCODING','UTF-8');
// number of bib items per page // number of bib items per page
// we use the same parameter 'num' as Google // we use the same parameter 'num' as Google
@define('PAGE_SIZE',isset($_GET['num'])?(preg_match('/^\d+$/',$_GET['num'])?$_GET['num']:10000):14); @define('PAGE_SIZE',isset($_GET['num'])?(preg_match('/^\d+$/',$_GET['num'])?$_GET['num']:10000):14);
@ -126,6 +126,14 @@ function bibtexbrowser_configure($key, $value) {
// for ordered_list, the index is given by HTML directly (in increasing order) // for ordered_list, the index is given by HTML directly (in increasing order)
@define('BIBTEXBROWSER_LAYOUT','table'); @define('BIBTEXBROWSER_LAYOUT','table');
// should the original bibtex be displayed or a reconstructed one with filtering
// values: original/reconstructed
// warning, with reconstructed, the latex markup for accents/diacritics is lost
@define('BIBTEXBROWSER_BIBTEX_VIEW','original');
// a list of fields that will not be shown in the bibtex view if BIBTEXBROWSER_BIBTEX_VIEW=reconstructed
@define('BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT','comment|note|file');
// Which is the first html <hN> level that should be used in embedded mode? // Which is the first html <hN> level that should be used in embedded mode?
@define('BIBTEXBROWSER_HTMLHEADINGLEVEL', 2); @define('BIBTEXBROWSER_HTMLHEADINGLEVEL', 2);
@ -655,7 +663,7 @@ see snippet of [[#StateBasedBibParser]]
class XMLPrettyPrinter { class XMLPrettyPrinter {
function beginFile() { function beginFile() {
header('Content-type: text/xml;'); header('Content-type: text/xml;');
print '<?xml version="1.0" encoding="'.ENCODING.'"?>';
print '<?xml version="1.0" encoding="'.OUTPUT_ENCODING.'"?>';
print '<bibfile>'; print '<bibfile>';
} }
@ -987,7 +995,7 @@ function latex2html($line) {
/** encodes strings for Z3988 URLs. Note that & are encoded as %26 and not as &amp. */ /** encodes strings for Z3988 URLs. Note that & are encoded as %26 and not as &amp. */
function s3988($s) { function s3988($s) {
// first remove the HTML entities (e.g. &eacute;) then urlencode them // first remove the HTML entities (e.g. &eacute;) then urlencode them
return urlencode(html_entity_decode($s, ENT_NOQUOTES, ENCODING));
return urlencode($s);
} }
/** /**
@ -1091,8 +1099,20 @@ class BibEntry {
// but instead could contain HTML code (with links using the character "~" for example) // but instead could contain HTML code (with links using the character "~" for example)
// so "comment" is not transformed too // so "comment" is not transformed too
if ($name!='url' && $name!='comment') { if ($name!='url' && $name!='comment') {
// 1. trim space
$value = xtrim($value); $value = xtrim($value);
// 2. transform Latex markup to HTML entities (easier than a one to one mapping to each character)
// HTML entity is an intermediate format
$value = latex2html($value); $value = latex2html($value);
// 3. transform existing encoded character in the new format
if (function_exists('mb_convert_encoding') && OUTPUT_ENCODING != BIBTEX_INPUT_ENCODING) {
$vaue = mb_convert_encoding($value, OUTPUT_ENCODING, BIBTEX_INPUT_ENCODING);
}
// 4. transform to the target output encoding
$value = html_entity_decode($value, ENT_QUOTES|ENT_XHTML, OUTPUT_ENCODING);
} else { } else {
//echo "xx".$value."xx\n"; //echo "xx".$value."xx\n";
} }
@ -1492,25 +1512,36 @@ class BibEntry {
return $this; return $this;
} }
function getText() {
/** Returns the verbatim text of this bib entry. */ /** Returns the verbatim text of this bib entry. */
function getText() {
if (BIBTEXBROWSER_BIBTEX_VIEW == 'original') {
return $this->text; return $this->text;
} }
if (BIBTEXBROWSER_BIBTEX_VIEW == 'reconstructed') {
$result = '@'.$this->getType().'{'.$this->getKey().",\n";
foreach ($this->fields as $k=>$v) {
if ( !preg_match('/^('.BIBTEXBROWSER_BIBTEX_VIEW_FILTEREDOUT.')$/i', $k)
&& !preg_match('/^(key|'.Q_INNER_AUTHOR.'|'.Q_INNER_TYPE.')$/i', $k) )
{
$result .= ' '.$k.' = {'.$v.'},'."\n";
}
}
$result .= "}\n";
return $result;
}
throw new Exception('incorrect value of BIBTEXBROWSER_BIBTEX_VIEW: '+BIBTEXBROWSER_BIBTEX_VIEW);
}
/** Returns true if this bib entry contains the given phrase (PREG regexp) /** Returns true if this bib entry contains the given phrase (PREG regexp)
* in the given field. if $field is null, all fields are considered. * in the given field. if $field is null, all fields are considered.
* Note that this method is NOT case sensitive */ * Note that this method is NOT case sensitive */
function hasPhrase($phrase, $field = null) { function hasPhrase($phrase, $field = null) {
// 2010-01-25
// bug found by jacob kellner
// we have to search in the formatted fileds and not in the raw entry
// we have to search in the formatted fields and not in the raw entry
// i.e. all latex markups are not considered for searches // i.e. all latex markups are not considered for searches
// i.e. added join(" ",$this->getFields())
// and html_entity_decode
if (!$field) { if (!$field) {
// warning html_entity_decode supports encoding since PHP5
return preg_match('/'.$phrase.'/i',$this->getConstants().' '.@html_entity_decode(join(" ",$this->getFields()),ENT_NOQUOTES,ENCODING));
return preg_match('/'.$phrase.'/i',$this->getConstants().' '.join(" ",$this->getFields()));
//return stripos($this->getText(), $phrase) !== false; //return stripos($this->getText(), $phrase) !== false;
} }
if ($this->hasField($field) && (preg_match('/'.$phrase.'/i',$this->getField($field)) ) ) { if ($this->hasField($field) && (preg_match('/'.$phrase.'/i',$this->getField($field)) ) ) {
@ -1659,7 +1690,7 @@ class BibEntry {
function toEntryUnformatted() { function toEntryUnformatted() {
$result = ""; $result = "";
$result .= '<pre class="purebibtex">'; // pre is nice when it is embedded with no CSS available $result .= '<pre class="purebibtex">'; // pre is nice when it is embedded with no CSS available
$entry = htmlspecialchars($this->getFullText());
$entry = htmlspecialchars($this->getFullText(),ENT_NOQUOTES|ENT_XHTML, OUTPUT_ENCODING);
// Fields that should be hyperlinks // Fields that should be hyperlinks
// the order matters // the order matters
@ -2717,7 +2748,7 @@ function query2title(&$query) {
} }
$v = join($v, ','); $v = join($v, ',');
} }
$headers[$k] = __(ucwords($k)).': '.ucwords(htmlspecialchars($v));
$headers[$k] = __(ucwords($k)).': '.ucwords(htmlspecialchars($v,ENT_NOQUOTES|ENT_XHTML, OUTPUT_ENCODING));
} }
return join(' &amp; ',$headers); return join(' &amp; ',$headers);
} }
@ -3796,13 +3827,13 @@ function HTMLTemplate(&$content) {
// when we load a page with AJAX // when we load a page with AJAX
// the HTTP header is taken into account, not the <meta http-equiv> // the HTTP header is taken into account, not the <meta http-equiv>
header('Content-type: text/html; charset='.ENCODING);
header('Content-type: text/html; charset='.OUTPUT_ENCODING);
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'."\n"; echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'."\n";
?> ?>
<html xmlns="http://www.w3.org/1999/xhtml"> <html xmlns="http://www.w3.org/1999/xhtml">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=<?php echo ENCODING ?>"/>
<meta http-equiv="Content-Type" content="text/html; charset=<?php echo OUTPUT_ENCODING ?>"/>
<meta name="generator" content="bibtexbrowser v__GITHUB__" /> <meta name="generator" content="bibtexbrowser v__GITHUB__" />
<?php <?php
// if ($content->getRSS()!='') echo '<link rel="alternate" type="application/rss+xml" title="RSS" href="'.$content->getRSS().'&amp;rss" />'; // if ($content->getRSS()!='') echo '<link rel="alternate" type="application/rss+xml" title="RSS" href="'.$content->getRSS().'&amp;rss" />';
@ -3900,10 +3931,10 @@ class BibtexDisplay {
function setWrapper($x) { $x->wrapper = 'NoWrapper'; } function setWrapper($x) { $x->wrapper = 'NoWrapper'; }
function display() { function display() {
header('Content-type: text/plain; charset='.ENCODING);
header('Content-type: text/plain; charset='.OUTPUT_ENCODING);
echo '% generated by bibtexbrowser <http://www.monperrus.net/martin/bibtexbrowser/>'."\n"; echo '% generated by bibtexbrowser <http://www.monperrus.net/martin/bibtexbrowser/>'."\n";
echo '% '.@$this->title."\n"; echo '% '.@$this->title."\n";
echo '% Encoding: '.ENCODING."\n";
echo '% Encoding: '.OUTPUT_ENCODING."\n";
foreach($this->entries as $bibentry) { echo $bibentry->getText()."\n"; } foreach($this->entries as $bibentry) { echo $bibentry->getText()."\n"; }
exit; exit;
} }
@ -4022,15 +4053,12 @@ class RSSDisplay {
function setTitle($title) { $this->title = $title; return $this; } function setTitle($title) { $this->title = $title; return $this; }
/** tries to always output a valid XML/RSS string /** tries to always output a valid XML/RSS string
* based on ENCODING, HTML tags, and the transformations
* based on OUTPUT_ENCODING, HTML tags, and the transformations
* that happened in latex2html */ * that happened in latex2html */
function text2rss($desc) { function text2rss($desc) {
// first strip HTML tags // first strip HTML tags
$desc = strip_tags($desc); $desc = strip_tags($desc);
// then decode characters encoded by latex2html, preserve ENCODING
$desc = html_entity_decode($desc, ENT_COMPAT, ENCODING);
// some entities may still be here, we remove them // some entities may still be here, we remove them
// we replace html entities e.g. &eacute; by nothing // we replace html entities e.g. &eacute; by nothing
// however XML entities are kept (e.g. &#53;) // however XML entities are kept (e.g. &#53;)
@ -4044,8 +4072,8 @@ class RSSDisplay {
// final test with encoding: // final test with encoding:
if (function_exists('mb_check_encoding')) { // (PHP 4 >= 4.4.3, PHP 5 >= 5.1.3) if (function_exists('mb_check_encoding')) { // (PHP 4 >= 4.4.3, PHP 5 >= 5.1.3)
if (!mb_check_encoding($desc,ENCODING)) {
return 'encoding error: please check the content of ENCODING';
if (!mb_check_encoding($desc,OUTPUT_ENCODING,BIBTEX_INPUT_ENCODING)) {
return 'encoding error: please check the content of OUTPUT_ENCODING';
} }
} }
@ -4061,7 +4089,7 @@ class RSSDisplay {
function display() { function display() {
header('Content-type: application/rss+xml'); header('Content-type: application/rss+xml');
echo '<?xml version="1.0" encoding="'.ENCODING.'"?>';
echo '<?xml version="1.0" encoding="'.OUTPUT_ENCODING.'"?>';
// //
?> ?>
@ -4082,7 +4110,6 @@ class RSSDisplay {
<description> <description>
<?php <?php
// we are in XML, so we cannot have HTML entitites // we are in XML, so we cannot have HTML entitites
// however the encoding is specified in preamble
echo $this->text2rss(bib2html($bibentry)."\n".$bibentry->getAbstract()); echo $this->text2rss(bib2html($bibentry)."\n".$bibentry->getAbstract());
?> ?>
</description> </description>
@ -4259,7 +4286,7 @@ class Dispatcher {
} }
function search() { function search() {
if (preg_match('/utf-?8/i',ENCODING)) {
if (preg_match('/utf-?8/i',OUTPUT_ENCODING)) {
$_GET[Q_SEARCH] = urldecode($_GET[Q_SEARCH]); $_GET[Q_SEARCH] = urldecode($_GET[Q_SEARCH]);
} }
$this->query[Q_SEARCH]=$_GET[Q_SEARCH]; $this->query[Q_SEARCH]=$_GET[Q_SEARCH];
@ -4437,7 +4464,7 @@ class Dispatcher {
<html xmlns="http://www.w3.org/1999/xhtml"> <html xmlns="http://www.w3.org/1999/xhtml">
<head> <head>
<meta name="generator" content="bibtexbrowser v__GITHUB__" /> <meta name="generator" content="bibtexbrowser v__GITHUB__" />
<meta http-equiv="Content-Type" content="text/html; charset=<?php echo ENCODING ?>"/>
<meta http-equiv="Content-Type" content="text/html; charset=<?php echo OUTPUT_ENCODING ?>"/>
<title>You are browsing <?php echo $_GET[Q_FILE]; ?> with bibtexbrowser</title> <title>You are browsing <?php echo $_GET[Q_FILE]; ?> with bibtexbrowser</title>
</head> </head>
<frameset cols="15%,*"> <frameset cols="15%,*">

Loading…
Cancel
Save