Source code for omfit_classes.omfit_bibtex

try:
    # framework is running
    from .startup_choice import *
except ImportError as _excp:
    # class is imported by itself
    if (
        'attempted relative import with no known parent package' in str(_excp)
        or 'No module named \'omfit_classes\'' in str(_excp)
        or "No module named '__main__.startup_choice'" in str(_excp)
    ):
        from startup_choice import *
    else:
        raise

from omfit_classes.omfit_ascii import *
from omfit_classes.omfit_weblink import OMFITwebLink
import numpy as np

__all__ = ['OMFITbibtex', 'searchdoi']

try:
    import bibtexparser
except Exception as _excp:
    bibtexparser = None
    printe('WARNING! Compromised omfit_bibtex support due to failure to import bibtexparser!')


[docs]class OMFITbibtex(OMFITascii, SortedDict): r""" Class used to parse bibtex files The class should be saved as a dictionary or dictionaries (one dictionary for each bibtex entry) Each bibtex entry must have defined the keys: `ENTRYTYPE` and `ID` :param filename: filename of the .bib file to parse :param \**kw: keyword dictionary passed to OMFITascii class To generate list of own publications: 1. Export all of your citations from https://scholar.google.com to a `citation.bib` bibtex file 2. OMFIT['bib']=OMFITbibtex('.bib') # load citations as OMFITbibtex 3. OMFIT['bib'].doi(deleteNoDOI=True) # remove entries which do not have a DOI (ie.conferences) 4. OMFIT['bib'].sanitize() # fix entries where needed 4. OMFIT['bib'].update_ID(as_author='Meneghini') # Sort entries and distinguish between first author or contributed 5. print('\n\n'.join(OMFIT['bib'].write_format())) # write to whatever format desired """ def __init__(self, filename, **kw): OMFITascii.__init__(self, filename, **kw) SortedDict.__init__(self, caseInsensitive=True) if filename is None: from datetime import date self[0] = {'ID': os.environ['USER'] + str(date.today().year), 'ENTRYTYPE': 'article'} else: self.dynaLoad = True
[docs] @dynaLoad def load(self): with open(self.filename, 'r') as f: bibtex_str = f.read() bib_database = bibtexparser.loads(bibtex_str) for k, item in enumerate(bib_database.get_entry_list()): label0 = label = item['ID'] k = 0 while label in self: k += 1 label = label0 + '_%d' % k self[label] = item
[docs] def write_format(self, form='\\item[]{{{author}; {title}; {journal} {volume} {year}: \\href{{http://dx.doi.org/{doi}}}{{doi:{doi}}}}}'): """ returns list with entries formatted according to `form` string :param form: format to use to :return: list of strings """ txt = [] for item in list(self.keys()): tmp = self[item].copy() tmp['author'] = re.sub('others', 'et al.', ', '.join([re.sub(', *', ' ', x) for x in tmp['author'].split(' and ')])) tmp['volume'] = tmp.get('volume', '1') txt.append(form.format(**tmp)) return txt
[docs] @dynaSave def save(self): tmp = bibtexparser.bibdatabase.BibDatabase() tmp.entries = list(self.values()) writer = bibtexparser.bwriter.BibTexWriter() bibtex_str = encode_ascii_ignore(bibtexparser.dumps(tmp)) if self.filename is not None: with open(self.filename, 'w') as f: f.write(bibtex_str) else: return bibtex_str
[docs] def doi(self, deleteNoDOI=False): """ method for adding DOI information to bibtex :param deleteNoDOI: delete entries without DOI """ for item in list(self.keys()): # only articles if ( 'doi' in self[item] or np.any([k not in self[item] for k in ['ENTRYTYPE', 'author', 'title', 'journal']]) or self[item]['journal'].lower() in [x.lower() for x in _noDOIjournals] or self[item]['ENTRYTYPE'].lower() != 'article' ): printi('%s: %s' % (encode_ascii_ignore(item), self[item].get('doi', ''))) continue # search for this paper tmp = searchdoi(self[item]['title'], self[item]['author']) self[item]['__doi__'] = tmp self[item]['doi'] = tmp[0]['DOI'] self[item]['doi_score'] = tmp[0]['score'] if self[item]['title'].lower() != tmp[0]['title'][0].lower(): # if the title does not match exactly this is a bad sign printw('%s: %s' % (encode_ascii_ignore(item), self[item]['doi'])) else: if np.all([k in tmp[0] for k in ['container-title', 'issue', 'page', 'publisher']]): # trust the info from crossref.org self[item]['journal'] = tmp[0]['container-title'][0] self[item]['number'] = tmp[0]['issue'] self[item]['pages'] = tmp[0]['page'] self[item]['publisher'] = tmp[0]['publisher'] self[item]['author'] = ' and '.join( [', '.join([_f for _f in [x.get('family', 'others'), x.get('given', '')] if _f]) for x in tmp[0]['author']] ) printi('%s: %s' % (encode_ascii_ignore(item), self[item]['doi'])) # create URL entry based on DOI for item in list(self.keys()): if 'doi' in self[item]: self[item]['url'] = OMFITwebLink('doi.org/' + self[item]['doi']) elif deleteNoDOI: del self[item] # finar clearnup for item in list(self.keys()): if '__doi__' in self[item]: del self[item]['__doi__'] if 'doi_score' in self[item]: del self[item]['doi_score']
[docs] def sanitize(self): """ Sanitizes the database entries: 1. Fix all-caps author names 2. Fix unicodes """ for item in self: # Fix all-caps author names authors = self[item]['author'].split() for k, a in enumerate(authors): if '.' in a or len(a) > 2: authors[k] = a.title() self[item]['author'] = ' '.join(authors) # Fix unicodes for sub in list(self[item].keys()): self[item][sub] = encode_ascii_ignore(self[item][sub])
[docs] def filter(self, conditions): """ filter database given a set of conditions :param conditions: list of strings (eg. ['int(year)>2012'] :return: filtered OMFITbibtex object """ conditions = tolist(conditions) self = copy.deepcopy(self) for condition in conditions: for item in list(self.keys()): try: if not eval(condition, {}, self[item]): del self[item] except Exception: del self[item] return self
[docs] def update_ID(self, fmt=['year_1stAuthor_jrnl', 'lower1stAuthor_year'][1], separator=':', as_author=False): """ set bibtex ID :param fmt: string with format 'year_1stAuthor_jrnl' :param separator: string with separator for fmt :param as_author: only keep entries that have `as_author` as author """ for item in list(self.keys()): tmp = self[item] del self[item] try: tmp['1stAuthor'] = tmp['author'].split()[0].strip(',') tmp['lower1stAuthor'] = tmp['1stAuthor'].lower() except Exception: pass try: tmp['jrnl'] = ''.join([x[0] for x in tmp['journal'].split()]) tmp['lowerjrnl'] = tmp['jrnl'].lower() except Exception: pass label0 = label = separator.join([str(tmp[k]) for k in fmt.split('_')]) if as_author: if as_author.lower() not in tmp['author'].lower(): continue if tmp['author'].split()[0].strip(',').lower() == as_author.lower(): label = label0 = '_' + label k = 0 while label in self: k += 1 label = label0 + '%s' % chr(k + ord('a')) for item in ['1stAuthor', 'jrnl', 'lower1stAuthor', 'lowerjrnl']: if item in tmp: del tmp[item] self[label] = tmp self[label]['ID'] = label self.sort() return self
[docs]def searchdoi(title, author): """ This function returns a list of dictionaries containing the best matching papers for the title and authors according to the crossref.org website :param title: string with the title :param author: string with the authors :return: list of dictionaries with info about the papers found """ data = {"query.author": author.encode('utf-8'), "query.title": title.encode('utf-8')} url = "http://api.crossref.org/works?" import requests import json response = requests.get(url, data, verify=False) try: response.raise_for_status() return json.loads(response.text)['message']['items'] except Exception: raise
_noDOIjournals = ['Bulletin of the American Physical Society', 'Bull. Am. Phys. Soc.'] ############################################ if __name__ == '__main__': test_classes_main_header() import requests bibtex_text = ''' @article{meneghini2013integrated, title={Integrated modeling of tokamak experiments with OMFIT}, author={Meneghini, Orso and Lao, Lang}, journal={Plasma and Fusion Research}, volume={8}, pages={2403009--2403009}, year={2013}, publisher={The Japan Society of Plasma Science and Nuclear Fusion Research} } @article{meneghini2015integrated, title={Integrated modeling applications for tokamak experiments with OMFIT}, author={Meneghini, O and Smith, SP and Lao, LL and Izacard, O and Ren, Q and Park, JM and Candy, J and Wang, Z and Luna, CJ and Izzo, VA and others}, journal={Nuclear Fusion}, volume={55}, number={8}, pages={083008}, year={2015}, publisher={IOP Publishing} } ''' bib = OMFITbibtex('OMFIT_bibtex', fromString=bibtex_text) # filter filtered_bib = bib.filter(['int(year)>2014']) # sanitize filtered_bib.sanitize() # get the DOI (can take some time, and is thus best done after filtering) try: bib.doi() except requests.exceptions.HTTPError: pass pprint(filtered_bib)