Skip to content

Commit 36128f4

Browse files
Provide __str__ where possible for different views
* #306 Fix of AbstractRetrieval-META * #306 Fix of AbstractRetrieval-REF * #306 Fix AuthorRetrieval - Light. The error in the __str__ was a result of the innability of many properties to retrieve the information of the LIGHT view. The reason is that the LIGHT view response has a different structure. Insead of changing the __str__ function some fixes to the properties were conducted. * #306 Fix of AuthorRetrieval - Metrics. The __str__ was changed to return a string version conditional on the view. * #306 Adjustment to tests and minor changes * #306 AbstractRetrieval corrections * #306 AuthorRetrieval requested changes * #306 test AuthorRetrieval requested chnages * #306 Parse content requested changes * #306 Minor review changes * #306 Minor requested changes II
1 parent f2ff94c commit 36128f4

File tree

4 files changed

+152
-72
lines changed

4 files changed

+152
-72
lines changed

pybliometrics/scopus/abstract_retrieval.py

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def affiliation(self) -> Optional[List[NamedTuple]]:
2424
aff = namedtuple('Affiliation', 'id name city country')
2525
affs = listify(self._json.get('affiliation', []))
2626
for item in affs:
27-
new = aff(id=int(item['@id']), name=item.get('affilname'),
27+
new = aff(id=make_int_if_possible(item.get('@id')), name=item.get('affilname'),
2828
city=item.get('affiliation-city'),
2929
country=item.get('affiliation-country'))
3030
out.append(new)
@@ -747,29 +747,55 @@ def __str__(self):
747747
Assumes the document is a journal article and was loaded with
748748
view="META_ABS" or view="FULL".
749749
"""
750-
date = self.get_cache_file_mdate().split()[0]
751-
# Authors
752-
if self.authors:
753-
if len(self.authors) > 1:
754-
authors = _list_authors(self.authors)
750+
def convert_citedbycount(entry):
751+
try:
752+
return float(entry.citedbycount) or 0
753+
except (ValueError, TypeError):
754+
return 0
755+
756+
def get_date(coverDate):
757+
try:
758+
return coverDate[:4]
759+
except TypeError:
760+
return None
761+
762+
if self._view in ('FULL', 'META_ABS', 'META'):
763+
date = self.get_cache_file_mdate().split()[0]
764+
# Authors
765+
if self.authors:
766+
if len(self.authors) > 1:
767+
authors = _list_authors(self.authors)
768+
else:
769+
a = self.authors[0]
770+
authors = str(a.given_name) + ' ' + str(a.surname)
755771
else:
756-
a = self.authors[0]
757-
authors = str(a.given_name) + ' ' + str(a.surname)
758-
else:
759-
authors = "(No author found)"
760-
# All other information
761-
s = f'{authors}: "{self.title}", {self.publicationName}, {self.volume}'
762-
if self.issueIdentifier:
763-
s += f'({self.issueIdentifier})'
764-
s += ', '
765-
s += _parse_pages(self)
766-
s += f'({self.coverDate[:4]}).'
767-
if self.doi:
768-
s += f' https://doi.org/{self.doi}.\n'
769-
s += f'{self.citedby_count} citation(s) as of {date}'
770-
if self.affiliation:
771-
s += "\n Affiliation(s):\n "
772-
s += '\n '.join([aff.name for aff in self.affiliation])
772+
authors = "(No author found)"
773+
# All other information
774+
s = f'{authors}: "{self.title}", {self.publicationName}, {self.volume}'
775+
if self.issueIdentifier:
776+
s += f'({self.issueIdentifier})'
777+
s += ', '
778+
s += _parse_pages(self)
779+
s += f'({self.coverDate[:4]}).'
780+
if self.doi:
781+
s += f' https://doi.org/{self.doi}.\n'
782+
s += f'{self.citedby_count} citation(s) as of {date}'
783+
if self.affiliation:
784+
s += "\n Affiliation(s):\n "
785+
s += '\n '.join([aff.name for aff in self.affiliation])
786+
787+
elif self._view in ('REF'):
788+
# Sort reference list by citationcount
789+
top_n = 5
790+
references = sorted(self.references, key=convert_citedbycount, reverse=True)
791+
792+
top_references = [f'{reference.title} ({get_date(reference.coverDate)}). '+
793+
f'EID: {reference.id}' for reference in references[:top_n]]
794+
795+
s = f'A total of {self.refcount} references were found. '
796+
s += f'Top {top_n} references:\n\t'
797+
s += '\n\t'.join(top_references)
798+
773799
return s
774800

775801
def get_bibtex(self) -> str:

pybliometrics/scopus/author_retrieval.py

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,13 @@ def affiliation_current(self) -> Optional[List[NamedTuple]]:
2323
when it lookes correct in the web view. In this case please request
2424
a correction.
2525
"""
26-
affs = chained_get(self._profile, ["affiliation-current", "affiliation"])
27-
return parse_affiliation(affs)
26+
if self._view in ('STANDARD', 'ENHANCED'):
27+
affs = chained_get(self._profile, ["affiliation-current", "affiliation"])
28+
elif self._view == 'LIGHT':
29+
affs = self._json.get('affiliation-current')
30+
else:
31+
return None
32+
return parse_affiliation(affs, self._view)
2833

2934
@property
3035
def affiliation_history(self) -> Optional[List[NamedTuple]]:
@@ -128,7 +133,21 @@ def identifier(self) -> int:
128133
@property
129134
def indexed_name(self) -> Optional[str]:
130135
"""Author's name as indexed by Scopus."""
131-
return html_unescape(chained_get(self._profile, ['preferred-name', 'indexed-name']))
136+
if self._view in ('STANDARD', 'ENHANCED'):
137+
indexed_name = html_unescape(chained_get(self._profile, ['preferred-name', 'indexed-name']))
138+
elif self._view == 'LIGHT':
139+
# Try to get indexed name from name-variants
140+
name_variants = chained_get(self._json, ['name-variants', 'name-variant'])
141+
if name_variants:
142+
indexed_name = chained_get(name_variants[0], ['name-variant', 'indexed-name'])
143+
else:
144+
# In case of no name-variants get name from preferred-name
145+
preferred_name = self._json.get('preferred-name')
146+
indexed_name = ' '.join([preferred_name.get('initials', ''), preferred_name.get('surname', '')])
147+
else:
148+
indexed_name = None
149+
150+
return indexed_name
132151

133152
@property
134153
def initials(self) -> Optional[str]:
@@ -155,12 +174,23 @@ def orcid(self) -> Optional[str]:
155174

156175
@property
157176
def publication_range(self) -> Optional[Tuple[int, int]]:
158-
"""Tuple containing years of first and last publication."""
159-
r = self._profile.get('publication-range')
160-
try:
161-
return int(r['@start']), int(r['@end'])
162-
except TypeError:
163-
return None
177+
"""Tuple containing years of first and last publication."""
178+
if self._view in ('STANDARD', 'ENHANCED', 'LIGHT'):
179+
if self._view in ('STANDARD', 'ENHANCED'):
180+
r = self._profile.get('publication-range')
181+
start = '@start'
182+
end = '@end'
183+
elif self._view == 'LIGHT':
184+
r = self._json.get('publication-range')
185+
start = 'start'
186+
end = 'end'
187+
188+
try:
189+
return int(r.get(start)), int(r.get(end))
190+
except TypeError:
191+
return None
192+
193+
return None
164194

165195
@property
166196
def scopus_author_link(self) -> Optional[str]:
@@ -269,13 +299,19 @@ def __init__(self,
269299

270300
def __str__(self):
271301
"""Return a summary string."""
272-
date = self.get_cache_file_mdate().split()[0]
273-
main_aff = self.affiliation_current[0]
274-
s = f"{self.indexed_name} from {main_aff.preferred_name} in "\
275-
f"{main_aff.country},\npublished {int(self.document_count):,} "\
276-
f"document(s) since {self.publication_range[0]} "\
277-
f"\nwhich were cited by {int(self.cited_by_count):,} author(s) in "\
278-
f"{int(self.citation_count):,} document(s) as of {date}"
302+
if self._view in ('STANDARD', 'ENHANCED', 'LIGHT'):
303+
date = self.get_cache_file_mdate().split()[0]
304+
main_aff = self.affiliation_current[0]
305+
s = f"{self.indexed_name} from {main_aff.preferred_name} in "\
306+
f"{main_aff.country},\npublished {int(self.document_count):,} "\
307+
f"document(s) since {self.publication_range[0]} "\
308+
f"\nwhich were cited by {int(self.cited_by_count):,} author(s) in "\
309+
f"{int(self.citation_count):,} document(s) as of {date}"
310+
elif self._view == 'METRICS':
311+
s = f'Author with ID {self._id}\n'\
312+
f'published {int(self.document_count):,} document(s)\n'\
313+
f'which were cited by {int(self.cited_by_count):,} author(s) '\
314+
f'in {int(self.citation_count):,} document(s)'
279315
return s
280316

281317
def get_coauthors(self) -> Optional[List[NamedTuple]]:

pybliometrics/scopus/tests/test_AuthorRetrieval.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,34 @@
1818

1919
def test_affiliation_current():
2020
assert_equal(metrics.affiliation_current, None)
21-
assert_equal(light.affiliation_current, None)
21+
2222
order = 'id parent type relationship afdispname preferred_name '\
2323
'parent_preferred_name country_code country address_part city '\
2424
'state postal_code org_domain org_URL'
25-
aff = namedtuple('Affiliation', order)
26-
expected = aff(id=110785688, parent=60027950, type='dept',
27-
relationship='author', afdispname=None, country='United States',
28-
preferred_name='Department of Chemical Engineering',
29-
parent_preferred_name='Carnegie Mellon University', country_code='usa',
25+
aff = namedtuple('Affiliation', order, defaults=(None,) * len(order.split()))
26+
27+
expected_std_enh = aff(id=60027950, parent=None, type='parent',
28+
relationship='author', afdispname=None, preferred_name='Carnegie Mellon University',
29+
parent_preferred_name=None, country_code='usa', country='United States',
3030
address_part='5000 Forbes Avenue', city='Pittsburgh', state='PA',
31-
postal_code='15213-3890', org_domain='cmu.edu',
32-
org_URL='https://www.cmu.edu/')
33-
for a in (standard, enhanced):
31+
postal_code='15213-3890', org_domain='cmu.edu', org_URL='https://www.cmu.edu/')
32+
33+
expected_lgh = aff(id=None, parent=None, type=None,
34+
relationship=None, afdispname=None, preferred_name='Carnegie Mellon University',
35+
parent_preferred_name=None, country_code=None, country='United States',
36+
address_part=None, city='Pittsburgh', state=None,
37+
postal_code=None, org_domain=None, org_URL=None)
38+
39+
for a in (standard, enhanced, light):
3440
received = a.affiliation_current
3541
assert_true(isinstance(received, list))
3642
assert_true(len(received) >= 1)
37-
assert_true(expected in received)
43+
44+
for a in (standard, enhanced):
45+
received = a.affiliation_current
46+
assert_true(expected_std_enh in received)
47+
48+
assert_true(expected_lgh in light.affiliation_current)
3849

3950

4051
def test_affiliation_history():
@@ -199,7 +210,7 @@ def test_identifier():
199210

200211
def test_indexed_name():
201212
assert_equal(metrics.indexed_name, None)
202-
assert_equal(light.indexed_name, None)
213+
assert_equal(light.indexed_name, 'Kitchin J.')
203214
assert_equal(standard.indexed_name, 'Kitchin J.')
204215
assert_equal(enhanced.indexed_name, 'Kitchin J.')
205216

@@ -231,8 +242,7 @@ def test_orcid():
231242

232243
def test_publication_range():
233244
assert_equal(metrics.publication_range, None)
234-
assert_equal(light.publication_range, None)
235-
for a in (standard, enhanced):
245+
for a in (standard, enhanced, light):
236246
assert_equal(a.publication_range[0], 1995)
237247
assert_true(a.publication_range[1] >= 2021)
238248

pybliometrics/scopus/utils/parse_content.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -140,32 +140,40 @@ def make_search_summary(self, keyword, results, joiner="\n "):
140140
return s
141141

142142

143-
def parse_affiliation(affs):
143+
def parse_affiliation(affs, view):
144144
"""Helper function to parse list of affiliation-related information."""
145145
order = 'id parent type relationship afdispname preferred_name '\
146146
'parent_preferred_name country_code country address_part city '\
147147
'state postal_code org_domain org_URL'
148-
aff = namedtuple('Affiliation', order)
148+
aff = namedtuple('Affiliation', order, defaults=(None,) * len(order.split()))
149149
out = []
150-
for item in listify(affs):
151-
if not item:
152-
continue
153-
doc = item.get('ip-doc', {}) or {}
154-
address = doc.get('address', {}) or {}
155-
try:
156-
parent = int(item['@parent'])
157-
except KeyError:
158-
parent = None
159-
new = aff(id=int(item['@affiliation-id']), parent=parent,
160-
type=doc.get('@type'), relationship=doc.get('@relationship'),
161-
afdispname=doc.get('@afdispname'),
162-
preferred_name=doc.get('preferred-name', {}).get('$'),
163-
parent_preferred_name=doc.get('parent-preferred-name', {}).get('$'),
164-
country_code=address.get('@country'), country=address.get('country'),
165-
address_part=address.get("address-part"), city=address.get('city'),
166-
postal_code=address.get('postal-code'), state=address.get('state'),
167-
org_domain=doc.get('org-domain'), org_URL=doc.get('org-URL'))
150+
151+
if view in ('STANDARD', 'ENHANCED'):
152+
for item in listify(affs):
153+
if not item:
154+
continue
155+
doc = item.get('ip-doc', {}) or {}
156+
address = doc.get('address', {}) or {}
157+
try:
158+
parent = int(item['@parent'])
159+
except KeyError:
160+
parent = None
161+
new = aff(id=int(item['@affiliation-id']), parent=parent,
162+
type=doc.get('@type'), relationship=doc.get('@relationship'),
163+
afdispname=doc.get('@afdispname'),
164+
preferred_name=doc.get('preferred-name', {}).get('$'),
165+
parent_preferred_name=doc.get('parent-preferred-name', {}).get('$'),
166+
country_code=address.get('@country'), country=address.get('country'),
167+
address_part=address.get("address-part"), city=address.get('city'),
168+
postal_code=address.get('postal-code'), state=address.get('state'),
169+
org_domain=doc.get('org-domain'), org_URL=doc.get('org-URL'))
170+
out.append(new)
171+
elif view == 'LIGHT':
172+
new = aff(preferred_name=affs.get('affiliation-name'),
173+
city=affs.get('affiliation-city'),
174+
country=affs.get('affiliation-country'))
168175
out.append(new)
176+
169177
return out or None
170178

171179

0 commit comments

Comments
 (0)