From 5a4791e0b1be259f5ba1bc4aa0ee4fc9bc7651a3 Mon Sep 17 00:00:00 2001 From: radiangle Date: Wed, 9 Oct 2024 16:54:32 -0700 Subject: [PATCH] remame example, remove eval related components --- examples/FinanceBench-AMD/.env.template | 2 + examples/FinanceBench-AMD/.gitignore | 15 + examples/FinanceBench-AMD/Makefile | 33 + examples/FinanceBench-AMD/README.md | 57 + examples/FinanceBench-AMD/dana.py | 147 + .../FinanceBench-AMD/data_and_knowledge.py | 332 ++ examples/FinanceBench-AMD/ground-truths.yml | 4608 +++++++++++++++++ examples/FinanceBench-AMD/knowledge-store.txt | 45 + examples/FinanceBench-AMD/log.py | 39 + examples/FinanceBench-AMD/program-store.yml | 36 + .../FinanceBench-AMD/rag-ground-truths.yml | 914 ++++ examples/FinanceBench-AMD/util.py | 44 + 12 files changed, 6272 insertions(+) create mode 100644 examples/FinanceBench-AMD/.env.template create mode 100644 examples/FinanceBench-AMD/.gitignore create mode 100644 examples/FinanceBench-AMD/Makefile create mode 100644 examples/FinanceBench-AMD/README.md create mode 100644 examples/FinanceBench-AMD/dana.py create mode 100644 examples/FinanceBench-AMD/data_and_knowledge.py create mode 100644 examples/FinanceBench-AMD/ground-truths.yml create mode 100644 examples/FinanceBench-AMD/knowledge-store.txt create mode 100644 examples/FinanceBench-AMD/log.py create mode 100644 examples/FinanceBench-AMD/program-store.yml create mode 100644 examples/FinanceBench-AMD/rag-ground-truths.yml create mode 100644 examples/FinanceBench-AMD/util.py diff --git a/examples/FinanceBench-AMD/.env.template b/examples/FinanceBench-AMD/.env.template new file mode 100644 index 000000000..9c9789785 --- /dev/null +++ b/examples/FinanceBench-AMD/.env.template @@ -0,0 +1,2 @@ +HF_API_KEY=[... HuggingFace API key if running HuggingFace-hosted models ...] +OPENAI_API_KEY=[... OpenAI API key if running on OpenAI services ...] diff --git a/examples/FinanceBench-AMD/.gitignore b/examples/FinanceBench-AMD/.gitignore new file mode 100644 index 000000000..1b80d89fc --- /dev/null +++ b/examples/FinanceBench-AMD/.gitignore @@ -0,0 +1,15 @@ +# data files +.data/ + +# environment variables +.env + +# iPython/Jupyter notebooks +*.ipynb + +# log files +.log/ +*.log + +# Streamlit secrets +.streamlit/secrets.toml diff --git a/examples/FinanceBench-AMD/Makefile b/examples/FinanceBench-AMD/Makefile new file mode 100644 index 000000000..dc5045571 --- /dev/null +++ b/examples/FinanceBench-AMD/Makefile @@ -0,0 +1,33 @@ +dana-solve: + @poetry run python dana.py ${id} + +dana-solve-w-knowledge: + @poetry run python dana.py ${id} --knowledge + +dana-solve-w-prog-store: + @poetry run python dana.py ${id} --prog-store + +dana-solve-w-knowledge-and-prog-store: + @poetry run python dana.py ${id} --knowledge --prog-store + +dana-solve-w-llama3: + @poetry run python dana.py ${id} --llama3 + +dana-solve-w-knowledge-w-llama3: + @poetry run python dana.py ${id} --knowledge --llama3 + +dana-solve-w-prog-store-w-llama3: + @poetry run python dana.py ${id} --prog-store --llama3 + +dana-solve-w-knowledge-and-prog-store-w-llama3: + @poetry run python dana.py ${id} --knowledge --prog-store --llama3 + +dana-solve-all-combos: + @poetry run python dana.py ${id} + @poetry run python dana.py ${id} --knowledge + @poetry run python dana.py ${id} --prog-store + @poetry run python dana.py ${id} --knowledge --prog-store + @poetry run python dana.py ${id} --llama3 + @poetry run python dana.py ${id} --knowledge --llama3 + @poetry run python dana.py ${id} --prog-store --llama3 + @poetry run python dana.py ${id} --knowledge --prog-store --llama3 diff --git a/examples/FinanceBench-AMD/README.md b/examples/FinanceBench-AMD/README.md new file mode 100644 index 000000000..638517d2f --- /dev/null +++ b/examples/FinanceBench-AMD/README.md @@ -0,0 +1,57 @@ + + +# OpenSSA-FinanceBench Lite benchmarking + +This is a lite version of the benchmarking of `OpenSSA` performance +on the `FinanceBench` dataset. We will use 1 question from the dataset to demonstrate the use of `OpenSSA` with `DANA` architecture. + +## [`FinanceBench` Dataset](https://github.com/patronus-ai/financebench/blob/main/financebench_sample_150.csv) + +## Getting Started with DANA Agent + +Have Python 3.12 installed. + +__Install__ project, and update its dependencies from time to time: +__`make install`__. + +Create `.env` file following the `.env.template` and fill in necessary credentials. + +__Solve__ the problem corresponding to a problem `00807` `financebench_id`: +__`make dana-solve id=00807`__. + + +**Question**: + +`Does 3M have a reasonably healthy liquidity profile based on its quick ratio for Q2 of FY2023? If the quick ratio is not relevant to measure liquidity, please state that and explain why.` + +**Knowledge** + +To solve this question, you can add knowledge related to `liquidity`. See the example below: + +- Liquidity Metric Formulas + - `(Net) Working Capital` = `(Total) Current Assets` - `(Total) Current Liabilities` + - `Working Capital Ratio` = `(Total) Current Assets` / `(Total) Current Liabilities` + +Go to `knowledge-store.txt` to add relevant knowledge yourself and see how it helps the agent to solve this question. + +**Program** + +With the above-provided knowledge, the program we can provide to the agent could be as below: +- Goal: To assess liquidity health of a company, calculate `quick ratio` + - Task: To calculate `quick ratio`, use this formula + `Quick Ratio` = ( + (`Cash & Cash Equivalents` + + `Short-Term Investments or (Current) Marketable Securities` + + `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`) + / `(Total) Current Liabilities` + ) + - Sub-task 1: What are values in dollars of `Cash & Cash Equivalents`? + - Sub-task 2: What are values in dollars of `Short-Term Investments or (Current) Marketable Securities`? + - Sub-task 3: What are values in dollars of `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`? + - Sub-task 4: What are values in dolloars of `(Total) Current Liabilities`? + +Go to `program-store.yml` to see details of the program yourself! You can experimenting with different plans to see how it helps the agent solve the problem as well. + +## Advancing DANA Agent with Domain Knowledge and Program Store +- To solve the question with added domain knowledge, run `make dana-solve-w-knowledge id=00807` +- To solve the question with added domain knowledge and program store, run `make dana-solve-w-knowledge-and-prog-store id=00807` diff --git a/examples/FinanceBench-AMD/dana.py b/examples/FinanceBench-AMD/dana.py new file mode 100644 index 000000000..8d7e1ca22 --- /dev/null +++ b/examples/FinanceBench-AMD/dana.py @@ -0,0 +1,147 @@ +from argparse import ArgumentParser +from functools import cache + +from openssa import DANA, ProgramStore, HTP, HTPlanner, FileResource, LMConfig +from openssa.core.util.lm.huggingface import HuggingFaceLM +from openssa.core.util.lm.openai import OpenAILM, default_llama_index_openai_lm + +# pylint: disable=wrong-import-order,wrong-import-position +from data_and_knowledge import (DocName, FbId, Answer, Doc, FB_ID_COL_NAME, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID, + EXPERT_KNOWLEDGE, EXPERT_PROGRAMS, EXPERT_HTP_COMPANY_KEY, EXPERT_HTP_PERIOD_KEY) +from util import QAFunc, log_qa_and_update_output_file + + +@cache +def get_main_lm(use_llama3: bool = False): + return (HuggingFaceLM if use_llama3 else OpenAILM).from_defaults() + + +@cache +def get_or_create_expert_program_store(use_llama3: bool = False) -> ProgramStore: + program_store = ProgramStore(lm=get_main_lm(use_llama3=use_llama3)) + + for program_name, htp_dict in EXPERT_PROGRAMS.items(): + htp = HTP.from_dict(htp_dict) + program_store.add_or_update_program(name=program_name, description=htp.task.ask, program=htp) + + return program_store + + +@cache +def get_or_create_agent(doc_name: DocName, expert_knowledge: bool = False, expert_programs: bool = False, + max_depth=3, max_subtasks_per_decomp=6, + use_llama3: bool = False, + llama_index_openai_lm_name: str = LMConfig.OPENAI_DEFAULT_MODEL) -> DANA: + # pylint: disable=too-many-arguments + return DANA(knowledge={EXPERT_KNOWLEDGE} if expert_knowledge else None, + + program_store=(get_or_create_expert_program_store(use_llama3=use_llama3) + if expert_programs + else ProgramStore()), + + programmer=HTPlanner(lm=get_main_lm(use_llama3=use_llama3), + max_depth=max_depth, max_subtasks_per_decomp=max_subtasks_per_decomp), + + resources={FileResource(path=Doc(name=doc_name).dir_path, + lm=default_llama_index_openai_lm(llama_index_openai_lm_name))}) + + +@cache +def get_or_create_adaptations(doc_name: DocName) -> dict[str, str]: + return {EXPERT_HTP_COMPANY_KEY: (doc := Doc(name=doc_name)).company, EXPERT_HTP_PERIOD_KEY: doc.period} + + +@log_qa_and_update_output_file(output_name='DANA') +def solve(fb_id: FbId) -> Answer: + return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id]).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wKnowledge') +def solve_with_knowledge(fb_id: FbId) -> Answer: + return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wProgStore') +def solve_with_program_store(fb_id: FbId) -> Answer: + return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore') +def solve_with_knowledge_and_program_store(fb_id: FbId) -> Answer: + return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wLlama3') +def solve_with_llama3(fb_id: FbId) -> Answer: + return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], use_llama3=True).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wLlama3') +def solve_with_knowledge_with_llama3(fb_id: FbId) -> Answer: + return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, use_llama3=True).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wProgStore-wLlama3') +def solve_with_program_store_with_llama3(fb_id: FbId) -> Answer: + return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True, use_llama3=True).solve( + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore-wLlama3') +def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer: + return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True, use_llama3=True).solve( # noqa: E501 + problem=QS_BY_FB_ID[fb_id], + adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id])) + + +if __name__ == '__main__': + arg_parser = ArgumentParser() + arg_parser.add_argument('fb_id') + arg_parser.add_argument('--from-id', action='store_true') + arg_parser.add_argument('--knowledge', action='store_true') + arg_parser.add_argument('--prog-store', action='store_true') + arg_parser.add_argument('--llama3', action='store_true') + args = arg_parser.parse_args() + + match (args.knowledge, args.prog_store, args.llama3): + case (False, False, False): + solve_func: QAFunc = solve + + case (True, False, False): + solve_func: QAFunc = solve_with_knowledge + + case (False, True, False): + solve_func: QAFunc = solve_with_program_store + + case (True, True, False): + solve_func: QAFunc = solve_with_knowledge_and_program_store + + case (False, False, True): + solve_func: QAFunc = solve_with_llama3 + + case (True, False, True): + solve_func: QAFunc = solve_with_knowledge_with_llama3 + + case (False, True, True): + solve_func: QAFunc = solve_with_program_store_with_llama3 + + case (True, True, True): + solve_func: QAFunc = solve_with_knowledge_and_program_store_with_llama3 + + if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME): + fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}' + + solve_func(f'from:{fb_id}' if args.from_id else fb_id) diff --git a/examples/FinanceBench-AMD/data_and_knowledge.py b/examples/FinanceBench-AMD/data_and_knowledge.py new file mode 100644 index 000000000..7dbf1e41e --- /dev/null +++ b/examples/FinanceBench-AMD/data_and_knowledge.py @@ -0,0 +1,332 @@ +from __future__ import annotations + +from collections import Counter +from dataclasses import dataclass, field +import base64 +from enum import StrEnum +from functools import cached_property +from pathlib import Path +from typing import TypedDict, Required, NotRequired, Literal, TYPE_CHECKING + +from dotenv import load_dotenv +from pandas import DataFrame, read_json, read_csv +import requests +import yaml + +if TYPE_CHECKING: + from openssa.core.planning.hierarchical.plan import HTPDict + + +load_dotenv() + + +type DocName = str +type FbId = str +type Question = str +type Answer = str +type ExpertPlanId = str + + +class Category(StrEnum): + RETRIEVE: str = '0-RETRIEVE' + COMPARE: str = '1-COMPARE' + CALC_CHANGE: str = '2-CALC-CHANGE' + CALC_COMPLEX: str = '3-CALC-COMPLEX' + CALC_AND_JUDGE: str = '4-CALC-AND-JUDGE' + EXPLAIN_FACTORS: str = '5-EXPLAIN-FACTORS' + OTHER_ADVANCED: str = '6-OTHER-ADVANCED' + + +type GroundTruth = TypedDict('GroundTruth', {'sector': Required[str], + + 'company': Required[str], + 'period': Required[int], + 'doc-type': Required[str], + 'doc': Required[DocName], + + 'question-type': Required[str], + 'question-reasoning': Required[str], + 'domain-question-num': Required[str | None], + 'question': Required[Question], + + 'answer': Required[Answer], + 'justification': Required[str], + 'page(s)-0based': Required[int], + 'page(s)': Required[str], + + 'category': Required[Category], + 'correctness': Required[str], + 'answer-inadequate': NotRequired[Literal[True]], + 'evaluator-unreliable': NotRequired[Literal[True]]}, + total=False) + + +type RAGGroundTruths = TypedDict('RAGGroundTruths', {'defs': Required[dict[str, str]], + 'ground-truths': Required[dict[str, # doc + dict[str, # statement + dict[str, # line item + dict[int | str, # period + str # ground truth + ]]]]]}) + + +NON_BOT_REQUEST_HEADERS: dict[str, str] = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" +} + + +REPO_RAW_CONTENT_URL_PREFIX: str = 'https://raw.githubusercontent.com/patronus-ai/financebench' +DOC_INFO_URL: str = f'{REPO_RAW_CONTENT_URL_PREFIX}/main/data/financebench_document_information.jsonl' +METADATA_JSONL_URL: str = f'{REPO_RAW_CONTENT_URL_PREFIX}/main/data/financebench_open_source.jsonl' +METADATA_CSV_URL: str = f'{REPO_RAW_CONTENT_URL_PREFIX}/641ae9ece2cae93c671cf59c2d53742b51c7f1aa/financebench_sample_150.csv' + +FB_ID_COL_NAME: str = 'financebench_id' + +META_DF: DataFrame = (read_json(METADATA_JSONL_URL, + orient='records', typ='frame', + dtype=True, convert_axes=True, + convert_dates=True, keep_default_dates=True, + precise_float=False, date_unit=None, + encoding='utf-8', encoding_errors='strict', + lines=True, chunksize=None, + compression=None, nrows=None, + storage_options=None, + dtype_backend='pyarrow', engine='ujson') + + .merge(right=read_json( + DOC_INFO_URL, + orient='records', typ='frame', + dtype=True, convert_axes=True, + convert_dates=True, keep_default_dates=True, + precise_float=False, date_unit=None, + encoding='utf-8', encoding_errors='strict', + lines=True, chunksize=None, + compression=None, nrows=None, + storage_options=None, + dtype_backend='pyarrow', engine='ujson'), + + how='left', on='doc_name', # left_on='doc_name', right_on='doc_name', + left_index=False, right_index=False, + sort=False, + suffixes=('', '_'), + copy=False, + indicator=False, + validate=None # TODO: 'many_to_one' after Patronus AI fixes FOOTLOCKER_2022_annualreport + ) + + .set_index(keys=FB_ID_COL_NAME, + drop=True, append=False, + inplace=False, + verify_integrity=True)) + +META_DF.fillna(value='', method=None, axis=None, inplace=True, limit=None) # replace PyArrow NAs + +LEGACY_META_DF: DataFrame = read_csv(METADATA_CSV_URL, + sep=',', # delimiter=',', + header='infer', names=None, index_col=FB_ID_COL_NAME, usecols=None, + dtype=None, engine='pyarrow', converters=None, true_values=None, false_values=None, + skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, + na_values=None, na_filter=None, keep_default_na=True, + skip_blank_lines=True, + parse_dates=False, date_format=None, dayfirst=False, cache_dates=True, + iterator=False, chunksize=None, compression=None, + thousands=None, decimal='.', + lineterminator=None, + quotechar=None, quoting=0, doublequote=True, + escapechar=None, comment=None, + encoding='utf-8', encoding_errors='strict', + dialect=None, + on_bad_lines='error', + low_memory=True, memory_map=False, + float_precision=None, + storage_options=None, + dtype_backend='pyarrow') + +assert (META_DF.index == LEGACY_META_DF.index).all() +# assert (META_DF.doc_name == LEGACY_META_DF.doc_name).all() # J&J docs have been fixed +assert (META_DF.doc_period == LEGACY_META_DF.doc_period).all() +assert (META_DF.doc_link == LEGACY_META_DF.doc_link).all() +assert (META_DF.question_type == LEGACY_META_DF.question_type).all() +assert (META_DF.question == LEGACY_META_DF.question).all() +# assert (META_DF.answer == LEGACY_META_DF.answer).all() # 01107 answer has been fixed + +DOC_NAMES: list[DocName] = sorted(META_DF.doc_name.unique()) +DOC_LINKS_BY_NAME: dict[DocName, str] = dict(zip(META_DF.doc_name, META_DF.doc_link)) +DOC_NAMES_BY_FB_ID: dict[FbId, DocName] = META_DF.doc_name.to_dict() + +FB_IDS: list[FbId] = META_DF.index.to_list() +FB_IDS_BY_DOC_NAME: dict[DocName, list[FbId]] = META_DF.groupby('doc_name').apply(lambda _: _.index.to_list()) + +QS_BY_FB_ID: dict[FbId, Question] = META_DF.question.to_dict() + + +LOCAL_CACHE_DIR_PATH: Path = Path(__file__).parent / '.data' +LOCAL_CACHE_DOCS_DIR_PATH: Path = LOCAL_CACHE_DIR_PATH / 'docs' +OUTPUT_FILE_PATH: Path = LOCAL_CACHE_DIR_PATH / 'output.csv' + + +GROUND_TRUTHS_FILE_PATH = Path(__file__).parent / 'ground-truths.yml' +with open(file=GROUND_TRUTHS_FILE_PATH, + buffering=-1, + encoding='utf-8', + errors='strict', + newline=None, + closefd=True, + opener=None) as f: + GROUND_TRUTHS: dict[FbId, GroundTruth] = yaml.safe_load(stream=f) + +N_CASES: int = len(GROUND_TRUTHS) +CAT_DISTRIB: Counter[Category] = Counter(ground_truth['category'] for ground_truth in GROUND_TRUTHS.values()) + + +EXPERT_KNOWLEDGE_FILE_PATH: Path = Path(__file__).parent / 'knowledge-store.txt' +with open(file=EXPERT_KNOWLEDGE_FILE_PATH, + buffering=-1, + encoding='utf-8', + errors='strict', + newline=None, + closefd=True, + opener=None) as f: + EXPERT_KNOWLEDGE: str = f.read() + + +EXPERT_PROGRAMS_FILE_PATH: Path = Path(__file__).parent / 'program-store.yml' +with open(file=EXPERT_PROGRAMS_FILE_PATH, + buffering=-1, + encoding='utf-8', + errors='strict', + newline=None, + closefd=True, + opener=None) as f: + EXPERT_PROGRAMS: dict[ExpertPlanId, HTPDict] = yaml.safe_load(stream=f) + +EXPERT_HTP_COMPANY_KEY: str = 'COMPANY' +EXPERT_HTP_PERIOD_KEY: str = 'PERIOD' + + +RAG_GROUND_TRUTHS_FILE_PATH: Path = Path(__file__).parent / 'rag-ground-truths.yml' +with open(file=RAG_GROUND_TRUTHS_FILE_PATH, + buffering=-1, + encoding='utf-8', + errors='strict', + newline=None, + closefd=True, + opener=None) as f: + RAG_GROUND_TRUTHS: RAGGroundTruths = yaml.safe_load(stream=f) + + +@dataclass +class Doc: + name: DocName + company: str = field(init=False, repr=False) + period: str = field(init=False, repr=False) + type: str = field(init=False, repr=False) + + def __post_init__(self): + self.company, self.period, self.type = self.name.split(sep='_', maxsplit=2) + + def request(self) -> requests.Response: + try: + response: requests.Response = requests.get( + url=(url := ((base64.b64decode(doc_link.split(sep=q, maxsplit=-1)[-1], altchars=None) + .decode(encoding='utf-8', errors='strict')) + if (q := '?pdfTarget=') in (doc_link := DOC_LINKS_BY_NAME[self.name]) + else doc_link)), + timeout=60, + stream=True) + + except requests.exceptions.ConnectionError: + response: requests.Response = requests.get( + url=(url := f'{REPO_RAW_CONTENT_URL_PREFIX}/main/pdfs/{self.name}.pdf'), + timeout=60, + stream=True) + + if response.headers.get('Content-Type') != 'application/pdf': + response: requests.Response = requests.get(url=url, + headers=NON_BOT_REQUEST_HEADERS, + timeout=60, + stream=True) + + return response + + @cached_property + def dir_path(self) -> Path: + dir_path: Path = LOCAL_CACHE_DOCS_DIR_PATH / self.name + + if not (file_path := dir_path / f'{self.name}.pdf').is_file(): + dir_path.mkdir(parents=True, exist_ok=True) + + response: requests.Response = self.request() + + with open(file=file_path, mode='wb', buffering=-1, encoding=None, newline=None, closefd=True, opener=None) as f: + f.write(response.content) + + return dir_path + + @cached_property + def file_path(self) -> Path: + return self.dir_path / f'{self.name}.pdf' + + +def create_or_update_ground_truths() -> dict[FbId, GroundTruth]: + ground_truths: dict[FbId, GroundTruth] = {fb_id: {'sector': row.gics_sector, + 'company': row.company, 'period': row.doc_period, 'doc-type': row.doc_type, + 'doc': row.doc_name, + 'question-type': row.question_type, + 'question-reasoning': row.question_reasoning, + 'domain-question-num': row.domain_question_num, + 'question': row.question, + 'answer': row.answer, 'justification': row.justification, + 'page(s)-0based': row.evidence[0]['evidence_page_num']} + for fb_id, row in META_DF.iterrows()} + + if GROUND_TRUTHS_FILE_PATH.is_file(): + with open(file=GROUND_TRUTHS_FILE_PATH, + buffering=-1, + encoding='utf-8', + errors='strict', + newline=None, + closefd=True, + opener=None) as f: + existing_ground_truths: dict[FbId, GroundTruth] = yaml.safe_load(stream=f) + + for fb_id, ground_truth in ground_truths.items(): + if (existing_ground_truth := existing_ground_truths.get(fb_id)): + for existing_key in set(existing_ground_truth).difference(ground_truth): + ground_truth[existing_key] = existing_ground_truth[existing_key] + + with open(file=GROUND_TRUTHS_FILE_PATH, + mode='w', + buffering=-1, + encoding='utf-8', + errors='strict', + newline=None, + closefd=True, + opener=None) as f: + yaml.safe_dump(data=ground_truths, + stream=f, + default_style=None, + default_flow_style=False, + canonical=None, + indent=2, + width=80, + allow_unicode=True, + line_break=None, + encoding='utf-8', + explicit_start=None, + explicit_end=None, + version=None, + tags=None, + sort_keys=False) + + return ground_truths + + +def get_or_create_output_df() -> DataFrame: + output_df: DataFrame = (read_csv(OUTPUT_FILE_PATH, index_col=FB_ID_COL_NAME) + if OUTPUT_FILE_PATH.is_file() + else META_DF[['doc_name', 'question', 'answer']]) + + output_df.loc[:, 'category'] = [GROUND_TRUTHS[fb_id]['category'] for fb_id in output_df.index] + + return output_df diff --git a/examples/FinanceBench-AMD/ground-truths.yml b/examples/FinanceBench-AMD/ground-truths.yml new file mode 100644 index 000000000..7cc0d1fc3 --- /dev/null +++ b/examples/FinanceBench-AMD/ground-truths.yml @@ -0,0 +1,4608 @@ +financebench_id_03029: + sector: Industrials + + company: 3M + period: 2018 + doc-type: 10k + doc: 3M_2018_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: What is the FY2018 capital expenditure amount (in USD millions) for 3M? + Give a response to the question by relying on the details shown in the cash flow + statement. + + answer: $1577.00 + justification: 'The metric capital expenditures was directly extracted from the + company 10K. The line item name, as seen in the 10K, was: Purchases of property, + plant and equipment (PP&E).' + page(s)-0based: 59 + page(s): '60' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 1577, 1577 million, 1.577 billion, + 1600, 1600 million or 1.6 billion + + +financebench_id_04672: + sector: Industrials + + company: 3M + period: 2018 + doc-type: 10k + doc: 3M_2018_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: 'Assume that you are a public equities analyst. Answer the following question + by primarily using information that is shown in the balance sheet: what is the + year end FY2018 net PPNE for 3M? Answer in USD billions.' + + answer: $8.70 + justification: "The metric ppne, net was directly extracted from the company 10K.\ + \ The line item name, as seen in the 10K, was: Property, plant and equipment รข\x80\ + \x94 net." + page(s)-0based: 57 + page(s): '58' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 8.738, 8.738 billion, 8738 million, + 8.7, 8.7 billion or 8700 million + + evaluator-unreliable: true + + +financebench_id_00499: + sector: Industrials + + company: 3M + period: 2022 + doc-type: 10k + doc: 3M_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) + domain-question-num: dg06 + question: Is 3M a capital-intensive business based on FY2022 data? + + answer: 'No, the company is managing its CAPEX and Fixed Assets pretty efficiently, + which is evident from below key metrics: + + CAPEX/Revenue Ratio: 5.1% + + Fixed assets/Total Assets: 20% + + Return on Assets= 12.4%' + justification: 'CAPEX/Revenue + + Fixed Assets/Total Assets + + ROA=Net Income/Total Assets' + page(s)-0based: 47 + page(s): 48,50,52 + + category: 6-OTHER-ADVANCED + correctness: |- + the answer opines that 3M is actually managing capital assets efficiently, and justifies such opinion + by certain calculated financial ratio metric value(s) showing at least one of the following: + - Fixed Assets is not large as proportion of Total Assets; + - Capital Expenditure (CapEx) is not high relative to Revenue; and/or + - Return on (Total) Assets (RoA or RoTA) is quite good + + evaluator-unreliable: true + + +financebench_id_01226: + sector: Industrials + + company: 3M + period: 2022 + doc-type: 10k + doc: 3M_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical + reasoning OR Logical reasoning + domain-question-num: dg17 + question: What drove operating margin change as of FY2022 for 3M? If operating margin + is not a useful metric for a company like this, then please state that and explain + why. + + answer: "Operating Margin for 3M in FY2022 has decreased by 1.7% primarily due to:\ + \ \n-Decrease in gross Margin\n-mostly one-off charges including Combat Arms Earplugs\ + \ litigation, impairment related to exiting PFAS manufacturing, costs related\ + \ to exiting Russia and divestiture-related restructuring\ncharges" + justification: '' + page(s)-0based: 26 + page(s): '27' + + category: 0-RETRIEVE + correctness: |- + the answer mentions at least 1 salient change among those discussed below: + + COST OF SALES: + Cost of sales, measured as a percent of sales, increased in 2022 when compared to the same period last year. + Increases were primarily due to 2022 special item costs for significant litigation from additional commitments + to address PFAS-related matters at 3M's Zwijndrecht, Belgium site, higher raw materials and logistics costs, + manufacturing productivity headwinds which were further magnified by the shutdown of certain operations in Belgium + and progress on restarting previously-idled operations, and investments in growth, productivity and sustainability. + On a percent of sales basis, these increases were partially offset by increases in selling prices. + + SELLING, GENERAL AND ADMINISTRATIVE EXPENSES: + SG&A, measured as a percent of sales, increased in 2022 when compared to the same period last year. + SG&A was impacted by increased special item costs for significant litigation primarily related to steps toward + resolving Combat Arms Earplugs litigation resulting in a 2022 second quarter pre-tax charge of approximately $1.2 billion, + certain impairment costs related to exiting PFAS manufacturing, costs related to exiting Russia, + divestiture-related restructuring charges, and continued investment in key growth initiatives. + These increases were partially offset by restructuring benefits and ongoing general 3M cost management. + + RESEARCH, DEVELOPMENT AND RELATED EXPENSES: + R&D, measured as a percent of sales, decreased in 2022 when compared to the same period last year. + 3M continues to invest in a range of R&D activities from application development, product and manufacturing support, + product development and technology development aimed at disruptive innovations. + + GAIN ON BUSINESS DIVESTITURES: + In the third quarter of 2022, 3M recorded a pre-tax gain of $2.7 billion ($2.7 billion after tax) + related to the split-off and combination of its Food Safety business with Neogen Corporation. + + GOODWILL IMPAIRMENT EXPENSE: + As a result of 3M's commitment to exit per- and polyfluoroalkyl substance (PFAS) manufacturing, + 3M recorded a goodwill impairment charge related to the Advanced Materials reporting unit + (within the Transportation and Electronics business). + + +financebench_id_01865: # tricky: Total Sales Change contains zero Acquisitions but non-zero Divestitures + sector: Industrials + + company: 3M + period: 2022 + doc-type: 10k + doc: 3M_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: If we exclude the impact of M&A, which segment has dragged down 3M's overall + growth in 2022? + + answer: The consumer segment shrunk by 0.9% organically. + justification: '' + page(s)-0based: 24 + page(s): '25' + + category: 1-COMPARE + correctness: >- + the answer identifies Consumer segment as negative contributor + + +financebench_id_00807: + sector: Industrials + + company: 3M + period: 2023 + doc-type: 10q + doc: 3M_2023Q2_10Q + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical + reasoning + domain-question-num: dg01 + question: Does 3M have a reasonably healthy liquidity profile based on its quick + ratio for Q2 of FY2023? If the quick ratio is not relevant to measure liquidity, + please state that and explain why. + + answer: No. The quick ratio for 3M was 0.96 by Jun'23 close, which needs a bit of + an improvement to touch the 1x mark + justification: 'Quick Ratio= (Total current assets-Total inventories)/Total current + liabilities + + (15,754-5,280)/10,936' + page(s)-0based: 4 + page(s): '5' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains a calculated Quick Ratio decimal value that is over 0.75 but less than 1.00, + or, alternatively, a calculated percentage value that is over 75% but less than 100% + + +financebench_id_00941: + sector: Industrials + + company: 3M + period: 2023 + doc-type: 10q + doc: 3M_2023Q2_10Q + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg04 + question: Which debt securities are registered to trade on a national securities + exchange under 3M's name as of Q2 of 2023? + + answer: 'Following debt securities registered under 3M''s name are listed to trade + on the New York Stock Exchange: + + -1.500% Notes due 2026 (Trading Symbol: MMM26) + + -1.750% Notes due 2030 (Trading Symbol: MMM30) + + -1.500% Notes due 2031 (Trading Symbol: MMM31)' + justification: '' + page(s)-0based: 0 + page(s): '1' + + category: 0-RETRIEVE + correctness: >- + the answer mentions notes/securities due 2026, 2030 and 2031 + + evaluator-unreliable: true + + +financebench_id_01858: + sector: Industrials + + company: 3M + period: 2023 + doc-type: 10q + doc: 3M_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Does 3M maintain a stable trend of dividend distribution? + + answer: Yes, not only they distribute the dividends on a routine basis, 3M has also + been increasing the per share dividend for consecutive 65 years + justification: '' + page(s)-0based: 61 + page(s): '62' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that dividends have been stable, and/or mentions "65 years", "65th year" or something similar + + evaluator-unreliable: true + + +financebench_id_02987: + sector: Communication Services + + company: Activision Blizzard + period: 2019 + doc-type: 10k + doc: ACTIVISIONBLIZZARD_2019_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is the FY2019 fixed asset turnover ratio for Activision Blizzard? + Fixed asset turnover ratio is defined as: FY2019 revenue / (average PP&E between + FY2018 and FY2019). Round your answer to two decimal places. Base your judgments + on the information provided primarily in the statement of income and the statement + of financial position.' + + answer: '24.26' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total revenue. This metric was located in the 10K as a single line item + named: Total net revenues. + + + Metric 2: Ppne, net. This metric was located in the 10K as a single line item + named: Property and equipment, net.' + page(s)-0based: 68 + page(s): 69,70 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Fixed Asset Turnover Ratio decimal value that is in the range from 23.00 to 25.00 + (if the answer is a single number, assume that it is that calculated Fixed Asset Turnover Ratio decimal value) + + evaluator-unreliable: true + + +financebench_id_07966: + sector: Communication Services + + company: Activision Blizzard + period: 2019 + doc-type: 10k + doc: ACTIVISIONBLIZZARD_2019_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is the FY2017 - FY2019 3 year average of capex as a % of revenue + for Activision Blizzard? Answer in units of percents and round to one decimal + place. Calculate (or extract) the answer from the statement of income and the + cash flow statement. + + answer: 1.9% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Capital expenditures. This metric was located in the 10K as a single + line item named: Capital expenditures. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: Total net revenues.' + page(s)-0based: 69 + page(s): 70,73 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated percentage value that is in the range from 1.70% to 2.10%, + or, alternatively, a calculated decimal value that is in the range from 0.0170 to 0.0210 + (if the answer is a single number, assume that it is that calculated metric value) + + evaluator-unreliable: true + + +financebench_id_04735: + sector: Information Technology + + company: Adobe + period: 2015 + doc-type: 10k + doc: ADOBE_2015_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'You are an investment banker and your only resource(s) to answer the + following question is (are): the statement of financial position and the cash + flow statement. Here''s the question: what is the FY2015 operating cash flow ratio + for Adobe? Operating cash flow ratio is defined as: cash from operations / total + current liabilities. Round your answer to two decimal places.' + + answer: '0.66' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cash from operations. This metric was located in the 10K as a single + line item named: Net cash provided by operating activities. + + + Metric 2: Total current liabilities. This metric was located in the 10K as a single + line item named: Total current liabilities.' + page(s)-0based: 58 + page(s): 59,63 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Operating Cash Flow Ratio decimal value that is in the range from 0.6000 to 0.7000, + or, alternatively, a calculated percentage value that is in the range from 60.00% to 70.00% + (if the answer is a single number, assume that it is that calculated Operating Cash Flow Ratio metric value) + + +financebench_id_07507: + sector: Information Technology + + company: Adobe + period: 2016 + doc-type: 10k + doc: ADOBE_2016_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is Adobe's year-over-year change in unadjusted operating income from + FY2015 to FY2016 (in units of percents and round to one decimal place)? Give a + solution to the question by using the income statement. + + answer: 65.4% + justification: 'The metric unadjusted operating income was directly extracted from + the company 10K. The line item name, as seen in the 10K, was: Operating income. + The final step was to execute the desired percent change calculation on unadjusted + operating income.' + page(s)-0based: 61 + page(s): '62' + + category: 2-CALC-CHANGE + correctness: >- + the answer contains a calculated Operating Income change percentage value that is in the range from 60.0% or 70.0% + (if the answer is a single number, assume that it is that calculated Operating Income change percentage value) + + +financebench_id_03856: + sector: Information Technology + + company: Adobe + period: 2017 + doc-type: 10k + doc: ADOBE_2017_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is the FY2017 operating cash flow ratio for Adobe? Operating cash + flow ratio is defined as: cash from operations / total current liabilities. Round + your answer to two decimal places. Please utilize information provided primarily + within the balance sheet and the cash flow statement.' + + answer: '0.83' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cash from operations. This metric was located in the 10K as a single + line item named: Net cash provided by operating activities. + + + Metric 2: Total current liabilities. This metric was located in the 10K as a single + line item named: Total current liabilities.' + page(s)-0based: 56 + page(s): 57,61 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Operating Cash Flow Ratio decimal value that is in the range from 0.8000 to 0.8500, + or, alternatively, a calculated percentage value that is in the range from 80.00% to 85.00% + (if the answer is a single number, assume that it is that calculated Operating Cash Flow Ratio metric value) + + +financebench_id_00438: + sector: Information Technology + + company: Adobe + period: 2022 + doc-type: 10k + doc: ADOBE_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR information extraction + domain-question-num: dg14 + question: Does Adobe have an improving operating margin profile as of FY2022? If + operating margin is not a useful metric for a company like this, then state that + and explain why. + + answer: No the operating margins of Adobe have recently declined from 36.8% in FY + 2021 to 34.6% in FY2022. A drop by 2.2% in a year. + justification: '6098/16388 + + 5802/14573' + page(s)-0based: 53 + page(s): '54' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains calculated Operating Margin percentage or decimal values for 2021 and 2022, + and concludes that such metric decreased + + evaluator-unreliable: true + + +financebench_id_00591: + sector: Information Technology + + company: Adobe + period: 2022 + doc-type: 10k + doc: ADOBE_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Does Adobe have an improving Free cashflow conversion as of FY2022? + + answer: Yes, the FCF conversion (using net income as the denominator) for Adobe + has improved by ~13% from 143% in 2021 to 156% in 2022 + justification: 'FCF Conversion: (Net cash provided by operating activities - Purchases + of property and equipment)/Net income + + (7838-442)/4756 + + (7230-348)/4822' + page(s)-0based: 56 + page(s): '57' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains calculated Free Cash Flow Conversion Ratio percentage or decimal values for 2021 and 2022, + and concludes that such metric increased + + evaluator-unreliable: true + + +financebench_id_01319: + sector: Utilities + + company: AES Corporation + period: 2022 + doc-type: 10k + doc: AES_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg21 + question: What is the quantity of restructuring costs directly outlined in AES Corporation's + income statements for FY2022? If restructuring costs are not explicitly outlined + then state 0. + + answer: '0' + justification: '' + page(s)-0based: 131 + page(s): '132' + + category: 0-RETRIEVE + correctness: >- + the answer states 0, zero, and/or that restructuring costs are not explicitly mentioned/reported + + evaluator-unreliable: true + + +financebench_id_00540: + sector: Utilities + + company: AES Corporation + period: 2022 + doc-type: 10k + doc: AES_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR Logical reasoning + domain-question-num: dg25 + question: Roughly how many times has AES Corporation sold its inventory in FY2022? + Calculate inventory turnover ratio for the FY2022; if conventional inventory management + is not meaningful for the company then state that and explain why. + + answer: AES has converted inventory 9.5 times in FY 2022. + justification: 'Cost of sales/Inventory + + 10069/1055' + page(s)-0based: 129 + page(s): 130,132 + + category: 3-CALC-COMPLEX + correctness: |- + the answer contains a calculated Inventory Turnover Ratio (or Inventory Conversion Ratio) decimal value that is either: + - in the range from 9.0 to 10.0 times (implicitly using ending Inventory as denominator), or + - approximately 12.0 times (implicitly using average Inventory as denominator) + (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value) + + +financebench_id_10420: + sector: Utilities + + company: AES Corporation + period: 2022 + doc-type: 10k + doc: AES_2022_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'Based on the information provided primarily in the statement of financial + position and the statement of income, what is AES''s FY2022 return on assets (ROA)? + ROA is defined as: FY2022 net income / (average total assets between FY2021 and + FY2022). Round your answer to two decimal places.' + answer: '-0.02' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Net income. This metric was located in the 10K as a single line item + named: NET INCOME (LOSS) ATTRIBUTABLE TO THE AES CORPORATION. + + + Metric 2: Total assets. This metric was located in the 10K as a single line item + named: TOTAL ASSETS.' + page(s)-0based: 129 + page(s): 130,132 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Return on Assets (RoA) + percentage value that is NEGATIVE and in the range from -2.00% to -1.40%, + or, alternatively, a calculated decimal value that is NEGATIVE and in the range from -0.0200 to -0.0140 + (if the answer is a single number, assume that it is that calculated Return on Assets (RoA) metric value) + + evaluator-unreliable: true + + +financebench_id_06655: + sector: Consumer Discretionary + + company: Amazon + period: 2017 + doc-type: 10k + doc: AMAZON_2017_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is Amazon''s FY2017 days payable outstanding (DPO)? DPO is defined + as: 365 * (average accounts payable between FY2016 and FY2017) / (FY2017 COGS + + change in inventory between FY2016 and FY2017). Round your answer to two decimal + places. Address the question by using the line items and information shown within + the balance sheet and the P&L statement.' + + answer: '93.86' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Accounts payable. This metric was located in the 10K as a single line + item named: Accounts payable. + + + Metric 2: Inventories. This metric was located in the 10K as a single line item + named: Inventories. + + + Metric 3: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of sales.' + page(s)-0based: 37 + page(s): 38,40 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Days Payable Outstanding (DPO) decimal value that is in the range from 90.00 to 100.00 + (if the answer is a single number, assume that it is that calculated Days Payable Outstanding (DPO) metric value) + + +financebench_id_08135: + sector: Consumer Discretionary + + company: Amazon + period: 2017 + doc-type: 10k + doc: AMAZON_2017_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is Amazon's year-over-year change in revenue from FY2016 to FY2017 + (in units of percents and round to one decimal place)? Calculate what was asked + by utilizing the line items clearly shown in the statement of income. + + answer: 30.8% + justification: 'The metric total revenue was directly extracted from the company + 10K. The line item name, as seen in the 10K, was: Total net sales. The final step + was to execute the desired percent change calculation on total revenue.' + page(s)-0based: 37 + page(s): '38' + + category: 2-CALC-CHANGE + correctness: >- + the answer contains a calculated Revenue change percentage value that is in the range from 30.0% to 31.0% + (if the answer is a single number, assume that it is that calculated Revenue change percentage value) + + +financebench_id_08286: + sector: Consumer Discretionary + + company: Amazon + period: 2019 + doc-type: 10k + doc: AMAZON_2019_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: By drawing conclusions from the information stated only in the income + statement, what is Amazon's FY2019 net income attributable to shareholders (in + USD millions)? + + answer: $11588.00 + justification: 'The metric net income was directly extracted from the company 10K. + The line item name, as seen in the 10K, was: Net income.' + page(s)-0based: 37 + page(s): '38' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 11588, 11588 million, 11.588 billion, + 11600, 11600 million or 11.6 billion + + +financebench_id_03882: + sector: Materials + + company: Amcor + period: 2020 + doc-type: 10k + doc: AMCOR_2020_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: What is Amcor's year end FY2020 net AR (in USD millions)? Address the + question by adopting the perspective of a financial analyst who can only use the + details shown within the balance sheet. + + answer: $1616.00 + justification: 'The metric accounts receivable, net was directly extracted from + the company 10K. The line item name, as seen in the 10K, was: Trade receivables, + net.' + page(s)-0based: 49 + page(s): '50' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 1615.9, 1615.9 million, + 1616, 1616 million, 1.616 billion, + 1600, 1600 million or 1.6 billion + + evaluator-unreliable: true + + +financebench_id_01935: + sector: Materials + + company: Amcor + period: 2022 + doc-type: 8k + doc: AMCOR_2022_8K_dated-2022-07-01 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What was the key agenda of the AMCOR's 8k filing dated 1st July 2022? + + answer: Amcor Finance (USA), Inc. and Amcor Flexibles North America, Inc., entered + into supplemental indentures relating to Guaranteed Senior Notes due 2026 and + 2028. This involved the substitution of the Substitute Issuer (Amcor Flexibles + North America) for the Former Issuer (Amcor Finance) and the assumption of covenants + under the indentures. (In essence a novation agreement) + justification: '' + page(s)-0based: 1 + page(s): '2' + + category: 0-RETRIEVE + correctness: >- + the answer mentions on of the terms "supplemental", "indendure(s)", "substitute" or "substitution" + + evaluator-unreliable: true + + +financebench_id_00799: + sector: Materials + + company: Amcor + period: 2023 + doc-type: 10k + doc: AMCOR_2023_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR Logical reasoning + domain-question-num: dg02 + question: Has AMCOR's quick ratio improved or declined between FY2023 and FY2022? + If the quick ratio is not something that a financial analyst would ask about a + company like this, then state that and explain why. + + answer: The quick ratio has slightly improved from 0.67 times to 0.69 times between + FY 2023 and FY 2022.(3.4% jump) + justification: 'Quick Ratio= (Total current assets-(Raw materials and supplies+Work + in process and finished goods))/Total current liabilities + + (5308-992-1221)/4476 + + (5853-1114-1325)/5103' + page(s)-0based: 51 + page(s): '52' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains calculated Quick Ratio decimal or percentage values for 2022 and 2023, + both over 0.50 but less than 0.75 (if decimal), or, alternatively, over 50% but less than 75% (if percentage); + the answer then concludes that such metric increased + + +financebench_id_01079: + sector: Materials + + company: Amcor + period: 2023 + doc-type: 10k + doc: AMCOR_2023_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg10 + question: What are major acquisitions that AMCOR has done in FY2023, FY2022 and + FY2021? + + answer: 'Amcor completed these acquisitions during FY2023: + + -100% equity interest of a flexibles manufacturing company in the Czech Republic + + - 100% equity interest in a medical device packaging manufacturing site in + + Shanghai, China. + + -acquisition of a New Zealand-based leading manufacturer of state-of-the-art, + automated protein + + packaging machines.' + justification: '' + page(s)-0based: 63 + page(s): '64' + + category: 0-RETRIEVE + correctness: |- + the answer mentions acquisitions in at least 2 of the following: + - Czech Republic; + - New Zealand; and + - Shanghai, China (or, alternatively, just "Shanghai" or just "China") + + +financebench_id_01148: + sector: Materials + + company: Amcor + period: 2023 + doc-type: 10k + doc: AMCOR_2023_10K + + question-type: domain-relevant + question-reasoning: Information extraction OR Logical reasoning OR + domain-question-num: dg12 + question: What industry does AMCOR primarily operate in? + + answer: Amcor is a global leader in packaging production for various use cases. + justification: '' + page(s)-0based: 4 + page(s): '5' + + category: 0-RETRIEVE + correctness: >- + the answer mentions "packaging" + + +financebench_id_00684: + sector: Materials + + company: Amcor + period: 2023 + doc-type: 10k + doc: AMCOR_2023_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR information extraction + domain-question-num: dg13 + question: Does AMCOR have an improving gross margin profile as of FY2023? If gross + margin is not a useful metric for a company like this, then state that and explain + why. + + answer: No. For AMCOR there has been a slight decline in gross margins by 0.8%. + justification: 'Gross Profit/Net Sales + + 2725/14694 + + 2820/14544' + page(s)-0based: 49 + page(s): '50' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains calculated Gross Margin percentage or decimal values for 2022 and 2023, + and concludes that such metric decreased + answer-inadequate: true + + +financebench_id_01936: + sector: Materials + + company: Amcor + period: 2023 + doc-type: 10q + doc: AMCOR_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What is the nature & purpose of AMCOR's restructuring liability as oF + Q2 of FY2023 close? + + answer: 87% of the total restructuring liability is related Employee liabilities. + justification: '' + page(s)-0based: 14 + page(s): '15' + + category: 0-RETRIEVE + correctness: |- + the answer mentions Employee costs or Employee liabilities + + +financebench_id_01928: + sector: Materials + + company: Amcor + period: 2023 + doc-type: Earnings + doc: AMCOR_2023Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What Was AMCOR's Adjusted Non GAAP EBITDA for FY 2023 + + answer: AMCOR's Adj. EBITDA was $2,018mn in FY 2023 + justification: '' + page(s)-0based: 11 + page(s): '12' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 2018 million, 2.018 billion, + 2000 million or 2 billion + + evaluator-unreliable: true + + +financebench_id_01930: + sector: Materials + + company: Amcor + period: 2023 + doc-type: Earnings + doc: AMCOR_2023Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: How much was the Real change in Sales for AMCOR in FY 2023 vs FY 2022, + if we exclude the impact of FX movement, passthrough costs and one-off items? + + answer: The Real Growth was flat in FY 2023 vs FY 2022. + justification: '' + page(s)-0based: 9 + page(s): '10' + + category: 2-CALC-CHANGE + correctness: >- + the answer concludes that the percentage change was approximately 1%, + or, alternatively, concludes that the growth was flat / small + + evaluator-unreliable: true + + +financebench_id_03069: + sector: Information Technology + + company: AMD + period: 2015 + doc-type: 10k + doc: AMD_2015_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: Answer the following question as if you are an equity research analyst + and have lost internet connection so you do not have access to financial metric + providers. According to the details clearly outlined within the P&L statement + and the statement of cash flows, what is the FY2015 depreciation and amortization + (D&A from cash flow statement) % margin for AMD? + + answer: 4.2% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Depreciation and amortization. This metric was located in the 10K as + a single line item named: Depreciation and amortization. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: Net revenue.' + page(s)-0based: 55 + page(s): 56,60 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Depreciation & Amortization (D&A) Margin (using Net Revenue as denominator) + percentage value that is in the range from 4.00% to 4.50%, + or, alternatively, a calculated decimal value that is in the range from 0.0400 to 0.0450 + (if the answer is a single number, assume that it is that calculated Depreciation & Amortization (D&A) Margin metric value) + + +financebench_id_00222: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical + reasoning + domain-question-num: dg01 + question: Does AMD have a reasonably healthy liquidity profile based on its quick + ratio for FY22? If the quick ratio is not relevant to measure liquidity, please + state that and explain why. + + answer: Yes. The quick ratio is 1.57, calculated as (cash and cash equivalents+Short + term investments+Accounts receivable, net+receivables from related parties)/ (current + liabilities). + justification: '' + page(s)-0based: 55 + page(s): '56' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains a calculated Quick Ratio decimal value that is in the range from 1.40 to 1.90, + or, alternatively, a calculated percentage value that is in the range from 140% to 190% + + +financebench_id_00995: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg07 + question: What are the major products and services that AMD sells as of FY22? + + answer: AMD sells server microprocessors (CPUs) and graphics processing units (GPUs), + data processing units (DPUs), Field Programmable Gate Arrays (FPGAs), and Adaptive + System-on-Chip (SoC) products for data centers; CPUs, accelerated processing units + (APUs) that integrate CPUs and GPUs, and chipsets for desktop and notebook personal + computers; discrete GPUs, and semi-custom SoC products and development services; + and embedded CPUs, GPUs, APUs, FPGAs, and Adaptive SoC products. + justification: '' + page(s)-0based: 3 + page(s): '4' + + category: 0-RETRIEVE + correctness: >- + the answer mentions at least graphics (i.e., GPU) and FPGA products + + evaluator-unreliable: true + + +financebench_id_01198: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg15 + question: What drove revenue change as of the FY22 for AMD? + + answer: In 2022, AMD reported Higher sales of their EPYC server processors, higher + semi-custom product sales, and the inclusion of Xilinx embedded product sales + justification: '' + page(s)-0based: 42 + page(s): '43' + + category: 0-RETRIEVE + correctness: |- + the answer mentions at least 2 of the following: + - "Data Center" and/or "EPYC"; + - "Gaming" and/or "semi-custom"; and + - "Embedded" and/or "Xilinx" + + evaluator-unreliable: true + + +financebench_id_00917: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical + reasoning OR Logical reasoning + domain-question-num: dg17 + question: What drove operating margin change as of the FY22 for AMD? If operating + margin is not a useful metric for a company like this, then please state that + and explain why. + + answer: The decrease in AMD's operating income was primarily driven by amortization + of intangible assets associated with the Xilinx acquisition + justification: '' + page(s)-0based: 42 + page(s): '43' + + category: 0-RETRIEVE + correctness: >- + the answer mentions Xilinx + + +financebench_id_01279: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg19 + question: Among operations, investing, and financing activities, which brought in + the most (or lost the least) cash flow for AMD in FY22? + + answer: In 2022, AMD brought in the most cashflow from Operations + justification: '' + page(s)-0based: 57 + page(s): '58' + + category: 1-COMPARE + correctness: >- + the answer identifies Operations / Operating Cash Flows as bringing in most cash + + +financebench_id_00563: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: From FY21 to FY22, excluding Embedded, in which AMD reporting segment + did sales proportionally increase the most? + + answer: Data Center + justification: "Data center: \nFY22: 6,043\nFY21: 3,694 \n6,043/3,694-1 = 63,59%\n\ + \nClient: \nFY22: 6,201\nFY21: 6,887 \n6,201/6,887-1 = -9,96%\n\n\nGaming: \n\ + FY22: 6,805\nFY21: 5,607 \n6,805/5,607-1 = 21,37%" + page(s)-0based: 47 + page(s): '48' + + category: 1-COMPARE + correctness: >- + the answer identifies Data Center segment as proportionally growing most strongly + + +financebench_id_00757: + sector: Information Technology + + company: AMD + period: 2022 + doc-type: 10k + doc: AMD_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Did AMD report customer concentration in FY22? + + answer: Yes, one customer accounted for 16% of consolidated net revenue + justification: One customer ccounting for 16% of net evenue is a high customer concenration + page(s)-0based: 11 + page(s): '12' + + category: 0-RETRIEVE + correctness: >- + the answer mentions that one or a small number of customers + accounted for large portion of revenue + + evaluator-unreliable: true + + +financebench_id_00476: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg04 + question: Which debt securities are registered to trade on a national securities + exchange under American Express' name as of 2022? + + answer: There are none + justification: No debt securities are listed under the securities registered pursuant + to Section 12(b) of the Act, which implies there are none + page(s)-0based: 0 + page(s): '1' + + category: 0-RETRIEVE + correctness: >- + the answer concludes that there are no debt securities traded, + or, alternatively, that no such debt securities are explicitly reported + + evaluator-unreliable: true + + +financebench_id_01028: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg08 + question: What are the geographies that American Express primarily operates in as + of 2022? + + answer: United States, EMEA, APAC, and LACC + justification: '' + page(s)-0based: 154 + page(s): '155' + + category: 0-RETRIEVE + correctness: |- + the answer mentions at least 3 among: + - United States (US); + - Europe, the Middle East and Africa (EMEA); + - Asia Pacific, Australia and New Zealand (APAC); and + - Latin America, Canada and the Caribbean (LACC) + + +financebench_id_00723: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR information extraction + domain-question-num: dg14 + question: Does AMEX have an improving operating margin profile as of 2022? If operating + margin is not a useful metric for a company like this, then state that and explain + why. + + answer: Performance is not measured through operating margin + justification: It's a financial services company and performance is measured through + the Net Interest Margin. + page(s)-0based: 95 + page(s): '96' + + category: 6-OTHER-ADVANCED + correctness: >- + the answer argues that Operating Margin is not a very relevant/useful metric for this business model and/or industry, + or, alternatively, that performance in this business model and/or industry is usually not judged through Operating Margin + + evaluator-unreliable: true + + +financebench_id_00720: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical + reasoning OR Logical reasoning + domain-question-num: dg16 + question: What drove gross margin change as of the FY2022 for American Express? + If gross margin is not a useful metric for a company like this, then please state + that and explain why. + + answer: Performance is not measured through gross margin + justification: It's a financial services company and performance is measured through + the Net Interest Margin. + page(s)-0based: 95 + page(s): '96' + + category: 6-OTHER-ADVANCED + correctness: >- + the answer argues that Gross Margin is not a very relevant/useful metric for this business model and/or industry, + or, alternatively, that performance in this business model and/or industry is usually not judged through Gross Margin + + evaluator-unreliable: true + + +financebench_id_01351: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg23 + question: How much has the effective tax rate of American Express changed between + FY2021 and FY2022? + + answer: The effective tax rate for American Express has changed/dropped from 24.6% + in FY 2021 to 21.6% in FY 2022. + justification: '' + page(s)-0based: 43 + page(s): '44' + + category: 2-CALC-CHANGE + correctness: >- + the answer says Effective Tax Rate changed from 24.6% to 21.6%, + and/or that it decreased by 3 pencentage points or 3% + + evaluator-unreliable: true + + +financebench_id_01964: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What was the largest liability in American Express's Balance Sheet in + 2022? + + answer: Customer deposits + justification: '' + page(s)-0based: 97 + page(s): '98' + + category: 1-COMPARE + correctness: >- + the answer identifies Customer Deposits as largest liability + + evaluator-unreliable: true + + +financebench_id_01981: + sector: Financials + + company: American Express + period: 2022 + doc-type: 10k + doc: AMERICANEXPRESS_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Was American Express able to retain card members during 2022? + + answer: 'Yes' + justification: '' + page(s)-0based: 44 + page(s): '45' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that retention was good/high + + evaluator-unreliable: true + + +financebench_id_05718: + sector: Utilities + + company: American Water Works + period: 2020 + doc-type: 10k + doc: AMERICANWATERWORKS_2020_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: How much (in USD billions) did American Water Works pay out in cash dividends + for FY2020? Compute or extract the answer by primarily using the details outlined + in the statement of cash flows. + + answer: $0.40 + justification: 'The metric total cash dividends paid out was directly extracted + from the company 10K. The line item name, as seen in the 10K, was: Dividends paid.' + page(s)-0based: 85 + page(s): '86' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 0.389, 0.389 billion, 389 million, + 0.4, 0.4 billion or 400 million + + +financebench_id_04254: + sector: Utilities + + company: American Water Works + period: 2021 + doc-type: 10k + doc: AMERICANWATERWORKS_2021_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: Basing your judgments off of the cash flow statement and the income statement, + what is American Water Works's FY2021 unadjusted operating income + depreciation + and amortization from the cash flow statement (unadjusted EBITDA) in USD millions? + + answer: $1832.00 + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Depreciation and amortization. This metric was located in the 10K as + a single line item named: Depreciation and amortization. + + + Metric 2: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operating income.' + page(s)-0based: 85 + page(s): 86,88 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 1832, 1832 million, 1.832 billion, + 1800, 1800 million or 1.8 billion + + +financebench_id_00070: + sector: Utilities + + company: American Water Works + period: 2022 + doc-type: 10k + doc: AMERICANWATERWORKS_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR Logical reasoning + domain-question-num: dg24 + question: Does American Water Works have positive working capital based on FY2022 + data? If working capital is not a useful or relevant metric for this company, + then please state that and explain why. + + answer: No, American Water Works had negative working capital of -$1561M in FY 2022. + justification: 'Accounts receivable+Income tax receivable+Unbilled revenues+Materials + and supplies+other-Accounts payable-Accrued liabilities-Accrued taxes + + 334+114+275+98+312-254-706-49' + page(s)-0based: 80 + page(s): 81,82 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated (Net) Working Capital metric value in dollars + that is NEGATIVE and equivalent to or approximately equal to + minus/negative 1561, minus/negative 1561 million, minus/negative 1.561 billion, + minus/negative 1600, minus/negative 1600 million or minus/negative 1.6 billion + + evaluator-unreliable: true + + +financebench_id_02608: + sector: Consumer Discretionary + + company: Best Buy + period: 2017 + doc-type: 10k + doc: BESTBUY_2017_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: In agreement with the information outlined in the income statement, what + is the FY2015 - FY2017 3 year average net profit margin (as a %) for Best Buy? + Answer in units of percents and round to one decimal place. + + answer: 2.8% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total revenue. This metric was located in the 10K as a single line item + named: Revenue. + + + Metric 2: Net income. This metric was located in the 10K as a single line item + named: Net earnings attributable to Best Buy Co., Inc. shareholders.' + page(s)-0based: 55 + page(s): '56' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Average Net Profit Margin percentage value that is in the range from 2.50% to 3.00%, + or, alternatively, a calculated decimal value that is in the range from 0.0250 to 0.0300 + (if the answer is a single number, assume that it is that calculated Average Net Profit Margin metric value) + + +financebench_id_04417: + sector: Consumer Discretionary + + company: Best Buy + period: 2019 + doc-type: 10k + doc: BESTBUY_2019_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: What is the year end FY2019 total amount of inventories for Best Buy? + Answer in USD millions. Base your judgments on the information provided primarily + in the balance sheet. + + answer: $5409.00 + justification: 'The metric inventories was directly extracted from the company 10K. + The line item name, as seen in the 10K, was: Merchandise inventories.' + page(s)-0based: 51 + page(s): '52' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 5409, 5409 million, 5.409 billion, + 5400, 5400 million or 5.4 billion + + +financebench_id_00685: + sector: Consumer Discretionary + + company: Best Buy + period: 2023 + doc-type: 10k + doc: BESTBUY_2023_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical + reasoning + domain-question-num: dg03 + question: Are Best Buy's gross margins historically consistent (not fluctuating + more than roughly 2% each year)? If gross margins are not a relevant metric for + a company like this, then please state that and explain why. + + answer: Yes, the margins have been consistent, there has been a minor decline of + 1.1% in gross margins between FY2022 and FY2023. + justification: 'Gross Profit/Revenue + + 9912/46298 + + 11640/51761' + page(s)-0based: 39 + page(s): '40' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains calculated Gross Margin + percentage values for 2022 and 2023 that are within 2 percentage points (or 2%) of each other, + or, alternatively, calculated decimal values that are within 0.02 of each other + answer-inadequate: true + + +financebench_id_01077: + sector: Consumer Discretionary + + company: Best Buy + period: 2023 + doc-type: 10k + doc: BESTBUY_2023_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg10 + question: What are major acquisitions that Best Buy has done in FY2023, FY2022 and + FY2021? + + answer: 'Best Buy closed two acquisitions, both these companies were already partially + owned by Best Buy, but Best Buy acquired all outstanding shares of these two companies + during FY 2022: (1) Current Health Ltd and (2) Two Peaks, LLC d/b/a Yardbird Furniture' + justification: '' + page(s)-0based: 50 + page(s): '51' + + category: 0-RETRIEVE + correctness: >- + the answer mentions Current Health and Two Peaks (which is also alternatively called Yardbird) + + +financebench_id_01275: + sector: Consumer Discretionary + + company: Best Buy + period: 2023 + doc-type: 10k + doc: BESTBUY_2023_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg19 + question: Among operations, investing, and financing activities, which brought in + the most (or lost the least) cash flow for Best Buy in FY2023? + + answer: Best Buy generated the most cash flow from operating activities in FY 2023 + ($1.8 bn) + justification: '' + page(s)-0based: 41 + page(s): '42' + + category: 1-COMPARE + correctness: >- + the answer identifies that Operations / Operating Cash Flows as bringing in most cash + + +financebench_id_00288: + sector: Consumer Discretionary + + company: Best Buy + period: 2024 + doc-type: 10q + doc: BESTBUY_2024Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Was there any drop in Cash & Cash equivalents between FY 2023 and Q2 of + FY2024? + + answer: Yes, there was a decline of ~42% between FY2023 and Q2 of FY 2024. + justification: 1093/1874-1 + page(s)-0based: 19 + page(s): '20' + + category: 1-COMPARE + correctness: >- + the answer affirms that Cash & Cash Equivalents decreased + + +financebench_id_00460: + sector: Consumer Discretionary + + company: Best Buy + period: 2024 + doc-type: 10q + doc: BESTBUY_2024Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Was there any change in the number of Best Buy stores between Q2 of FY2024 + and FY2023? + + answer: Yes, there is decline in number stores by 1.32% from 982 stores in Q2 FY + 2023 to 969 by the end of Q2 FY2024. + justification: 969/982-1 + page(s)-0based: 16 + page(s): '17' + + category: 1-COMPARE + correctness: >- + the answer mentions that number of stores decreased + + evaluator-unreliable: true + + +financebench_id_01902: + sector: Consumer Discretionary + + company: Best Buy + period: 2024 + doc-type: 10q + doc: BESTBUY_2024Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Which Best Buy product category performed the best (by top line) in the + domestic (USA) Market during Q2 of FY2024? + + answer: The entertainment segment experienced the highest growth of 9% during Q2 + FY2024, primarily from gaming division. + justification: '' + page(s)-0based: 17 + page(s): '18' + + category: 1-COMPARE + correctness: |- + the answer either: + - identifies Entertainment (or Gaming) category/segment as proportionally growing most; or + - identifies Computing and Mobile Phones category/segment as having highest revenue + + evaluator-unreliable: true + + +financebench_id_04660: + sector: Information Technology + + company: Block + period: 2016 + doc-type: 10k + doc: BLOCK_2016_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: Considering the data in the balance sheet, what is Block's (formerly known + as Square) FY2016 working capital ratio? Define working capital ratio as total + current assets divided by total current liabilities. Round your answer to two + decimal places. + + answer: '1.73' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total current liabilities. This metric was located in the 10K as a single + line item named: Total current liabilities. + + + Metric 2: Total current assets. This metric was located in the 10K as a single + line item named: Total current assets.' + page(s)-0based: 67 + page(s): '68' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Working Capital Ratio decimal value that is in the range from 1.70 to 1.80, + or, alternatively, a calculated percentage value that is in the range from 170% to 180% + (if the answer is a single number, assume that it is that calculated Working Capital Ratio metric value) + + +financebench_id_03838: + sector: Information Technology + + company: Block + period: 2020 + doc-type: 10k + doc: BLOCK_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is the FY2019 - FY2020 total revenue growth rate for Block (formerly + known as Square)? Answer in units of percents and round to one decimal place. + Approach the question asked by assuming the standpoint of an investment banking + analyst who only has access to the statement of income. + + answer: 101.5% + justification: 'The metric total revenue was directly extracted from the company + 10K. The line item name, as seen in the 10K, was: Total net revenue. The final + step was to execute the desired percent change calculation on total revenue.' + page(s)-0based: 85 + page(s): '86' + + category: 2-CALC-CHANGE + correctness: >- + the answer contains a calculated Revenue growth percentage value that is over 100.0% + (if the answer is a single number, assume that it is that calculated Revenue growth percentage value) + + evaluator-unreliable: true + + +financebench_id_07661: + sector: Information Technology + + company: Block + period: 2020 + doc-type: 10k + doc: BLOCK_2020_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: 'Using the cash flow statement, answer the following question to the best + of your abilities: how much did Block (formerly known as Square) generate in cash + flow from operating activities in FY2020? Answer in USD millions.' + + answer: $382.00 + justification: 'The metric cash from operations was directly extracted from the + company 10K. The line item name, as seen in the 10K, was: Net cash provided by + operating activities.' + page(s)-0based: 89 + page(s): '90' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 381.6, 381.6 million, 0.3816 billion, + 382, 382 million, 0.382 billion, + 400, 400 million or 0.4 billion + + +financebench_id_10285: + sector: Industrials + + company: Boeing + period: 2018 + doc-type: 10k + doc: BOEING_2018_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: 'We need to calculate a financial metric by using information only provided + within the balance sheet. Please answer the following question: what is Boeing''s + year end FY2018 net property, plant, and equipment (in USD millions)?' + + answer: $12645.00 + justification: 'The metric ppne, net was directly extracted from the company 10K. + The line item name, as seen in the 10K, was: Property, plant and equipment, net.' + page(s)-0based: 51 + page(s): '52' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 12645, 12645 million, 12.645 billion, + 12600, 12600 million or 12.6 billion + + evaluator-unreliable: true + + +financebench_id_00517: + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) + domain-question-num: dg09 + question: Are there any product categories / service categories that represent more + than 20% of Boeing's revenue for FY2022? + + answer: Yes. Boeing has product and service categories that represent more than + 20% of Boeing's revenue for FY2022. These categories are Commercial Airplanes + which comprises 39% of total revenue, Defence which comprises 35% of total revenue + and Services which comprises 26% of total revenue. + justification: 'Commercial Airplanes%=Revenues: Commercial Airplanes/Total revenues*100=25,867/66,608*100=39%. + Defence%=Defense, Space & Security/Total revenues*100=23,162/66,608*100=35%. Services%=Global + Services/Total revenues*100=17,611/66,608*100=26%.' + page(s)-0based: 61 + page(s): '62' + + category: 3-CALC-COMPLEX + correctness: |- + the answer mentions at least 1 of following categories: + - Commercial Airplanes; + - Defense/Defence (or fully written "Defense, Space & Security"); and + - Services (or fully written "Global Services") + + evaluator-unreliable: true + + +financebench_id_01091: + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg11 + question: Has Boeing reported any materially important ongoing legal battles from + FY2022? + + answer: Yes. Multiple lawsuits have been filed against Boeing resulting from a 2018 + Lion Air crash and a 2019 Ethiopian Airlines crash. + justification: '' + page(s)-0based: 112 + page(s): '113' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that there have been material lawsuits / legal battles + + evaluator-unreliable: true + + +financebench_id_00678: # note: Gross Income is implicit, with missing label + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR information extraction + domain-question-num: dg13 + question: Does Boeing have an improving gross margin profile as of FY2022? If gross + margin is not a useful metric for a company like this, then state that and explain + why. + + answer: Yes. Boeing has an improving gross margin profile as of FY2022. Gross profit + improved from $3,017 million in FY2021 to $3,502 million in FY2022. Gross margin + % improved from 4.8% in FY2021 to 5.3% in FY2022. + justification: Gross margin%=Gross margin/Total revenues*100=3,502/66,608*100=5.3% + for 2022 and 3,017/62,286*100=4.8% for 2021. + page(s)-0based: 54 + page(s): '55' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains calculated Gross Margin percentage or decimal values for 2021 and 2022, + and concludes that such metric increased + + evaluator-unreliable: true + + +financebench_id_01290: + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction OR Logical reasoning + domain-question-num: dg20 + question: Who are the primary customers of Boeing as of FY2022? + + answer: Boeing's primary customers as of FY2022 are a limited number of commercial + airlines and the US government. The US government accounted for 40% of Boeing's + total revenues in FY2022. + justification: '' + page(s)-0based: 7 + page(s): 8, 10, 14 + + category: 0-RETRIEVE + correctness: >- + the answer mentions airlines and government(s) / military(ies) + + evaluator-unreliable: true + + +financebench_id_00464: + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Is Boeing's business subject to cyclicality? + + answer: Yes, Boeing's business is subject to cyclicality due to its exposure to + the airline industry which is a cyclical industry. + justification: A major portion of Boeing's revenue is derived from the sale of aircraft + to commercial airlines. The commercial airlines business is cyclical, and subject + to significant profit swings. + page(s)-0based: 7 + page(s): '8' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that cyclicality is present + + +financebench_id_00494: + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What production rate changes is Boeing forecasting for FY2023? + + answer: Boeing forecasts an increase in the production rates for the 737, 777X and + 787 aircrafts in 2023. + justification: Boeing plans to gradually increase production rates for the 737 and + 787 and to resume production of 777X. + page(s)-0based: 8 + page(s): '9' + + category: 0-RETRIEVE + correctness: >- + the answer mentions increase(s) in production rate(s) + + +financebench_id_00585: # note: correct number signs + sector: Industrials + + company: Boeing + period: 2022 + doc-type: 10k + doc: BOEING_2022_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: How does Boeing's effective tax rate in FY2022 compare to FY2021? + + answer: Effective tax rate in FY2022 was 0.62%, compared to -14.76% in FY2021. + justification: Effective tax rate=Income tax (expense) benefit/ Loss before income + taxes*100=(31)/(5,022)*100=0.62% in 2022 and 743/(5,033)*100=-14.76%. + page(s)-0based: 54 + page(s): '55' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains calculated Effective Tax Rate percentage or decimal values for 2021 and 2022, + with one value being negative and the other value being positive + + evaluator-unreliable: true + + +financebench_id_03473: + sector: Consumer Staples + + company: Coca-Cola + period: 2017 + doc-type: 10k + doc: COCACOLA_2017_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is the FY2017 return on assets (ROA) for Coca Cola? ROA is defined + as: FY2017 net income / (average total assets between FY2016 and FY2017). Round + your answer to two decimal places. Give a response to the question by relying + on the details shown in the balance sheet and the P&L statement.' + + answer: '0.01' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Net income. This metric was located in the 10K as a single line item + named: NET INCOME ATTRIBUTABLE TO SHAREOWNERS OF THE COCA-COLA COMPANY. + + + Metric 2: Total assets. This metric was located in the 10K as a single line item + named: TOTAL ASSETS.' + page(s)-0based: 73 + page(s): 74,76 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Return on Assets (RoA) percentage value that is in the range from 0.90% to 2.00%, + or, alternatively, a calculated decimal value that is in the range from 0.0090 to 0.0200 + (if the answer is a single number, assume that it is that calculated Return on Assets (RoA) metric value) + + evaluator-unreliable: true + + +financebench_id_09724: + sector: Consumer Staples + + company: Coca-Cola + period: 2021 + doc-type: 10k + doc: COCACOLA_2021_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is Coca Cola's FY2021 COGS % margin? Calculate what was asked by + utilizing the line items clearly shown in the income statement. + + answer: 39.7% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of goods sold. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: Net Operating Revenues.' + page(s)-0based: 61 + page(s): '62' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Cost of Goods Sold (COGS) Margin + percentage value that is in the range from 38.00% to 42.00%, + or, alternatively, a calculated decimal value that is in the range from 0.3800 to 0.4200 + (if the answer is a single number, assume that it is that calculated Cost of Goods Sold (COGS) Margin metric value) + + +financebench_id_06272: + sector: Consumer Staples + + company: Coca-Cola + period: 2022 + doc-type: 10k + doc: COCACOLA_2022_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is Coca Cola's FY2022 dividend payout ratio (using total cash dividends + paid and net income attributable to shareholders)? Round answer to two decimal + places. Answer the question asked by assuming you only have access to information + clearly displayed in the cash flow statement and the income statement. + + answer: '0.8' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total cash dividends paid out. This metric was located in the 10K as + a single line item named: Dividends. + + + Metric 2: Net income. This metric was located in the 10K as a single line item + named: Net Income Attributable to Shareowners of The Coca-Cola Company.' + page(s)-0based: 62 + page(s): 63,66 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Dividend Payout Ratio decimal value that is in the range from 0.7800 to 0.8200, + or, alternatively, a calculated percentage value that is in the range from 78.00% to 82.00% + (if the answer is a single number, assume that it is that calculated Dividend Payout Ratio metric value) + + evaluator-unreliable: true + + +financebench_id_10130: + sector: Information Technology + + company: Corning + period: 2020 + doc-type: 10k + doc: CORNING_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'Based on the information provided primarily in the balance sheet and + the statement of income, what is FY2020 days payable outstanding (DPO) for Corning? + DPO is defined as: 365 * (average accounts payable between FY2019 and FY2020) + / (FY2020 COGS + change in inventory between FY2019 and FY2020). Round your answer + to two decimal places.' + + answer: '63.86' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Accounts payable. This metric was located in the 10K as a single line + item named: Accountsร‚ย payable. + + + Metric 2: Inventories. This metric was located in the 10K as a single line item + named: Inventories, net (Note 6). + + + Metric 3: Cost of goods sold. This metric was located in the 10K as a single line + item named: Costร‚ย ofร‚ย sales.' + page(s)-0based: 69 + page(s): 70,72 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Days Payable Outstanding (DPO) decimal value that is in the range from 60.00 to 70.00 + (if the answer is a single number, assume that it is that calculated Days Payable Outstanding (DPO) decimal value) + + +financebench_id_02981: + sector: Information Technology + + company: Corning + period: 2021 + doc-type: 10k + doc: CORNING_2021_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: Taking into account the information outlined in the income statement, + what is the FY2019 - FY2021 3 year average unadjusted operating income % margin + for Corning? Answer in units of percents and round to one decimal place. + + answer: 10.3% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operatingร‚ย income. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: Netร‚ย sales.' + page(s)-0based: 64 + page(s): '65' + + category: 3-CALC-COMPLEX + correctness: >- + the answer constains a calculated Average Operating Income Margin percentage value that is in the range from 9.00% to 12.00%, + or, alternatively, a calculated decimal value that is in the range from 0.0900 to 0.1200 + (if the answer is a single number, assume that it is that calculated Average Operating Income Margin metric value) + + evaluator-unreliable: true + + +financebench_id_01346: + sector: Information Technology + + company: Corning + period: 2022 + doc-type: 10k + doc: CORNING_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg23 + question: How much has the effective tax rate of Corning changed between FY2021 + and FY2022? + + answer: The effective tax rate of Corning has changed from 20% in FY2021 to 23% + in FY 2022. + justification: '' + page(s)-0based: 23 + page(s): '24' + + category: 2-CALC-CHANGE + correctness: >- + the answer says that Effective Tax Rate changed + from approximately 20.2% (or 20%) to approximately 22.9% (or 23%), + and/or that it increased by approximately 2.6, 2.7 or 3 percentage points + (or 2.6%, 2.7%, or 3%) + + evaluator-unreliable: true + + +financebench_id_00005: + sector: Information Technology + + company: Corning + period: 2022 + doc-type: 10k + doc: CORNING_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR Logical reasoning + domain-question-num: dg24 + question: Does Corning have positive working capital based on FY2022 data? If working + capital is not a useful or relevant metric for this company, then please state + that and explain why. + + answer: Yes. Corning had a positive working capital amount of $831 million by FY + 2022 close. This answer considers only operating current assets and current liabilities + that were clearly shown in the balance sheet. + justification: 'Trade accounts receivable, net of doubtful accounts+Inventories+Other + current assets-Accounts payable-Other accrued liabilities + + 1721+2904+1157-1804-3147' + page(s)-0based: 59 + page(s): '60' + + category: 3-CALC-COMPLEX + correctness: >- + the answer affirms that Working Capital is/was positive, + proving so by a calculated Working Capital metric value that is positive + + +financebench_id_04209: + sector: Consumer Staples + + company: Costco + period: 2021 + doc-type: 10k + doc: COSTCO_2021_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: Using only the information within the balance sheet, how much total assets + did Costco have at the end of FY2021? Answer in USD millions. + + answer: $59268.00 + justification: 'The metric total assets was directly extracted from the company + 10K. The line item name, as seen in the 10K, was: TOTAL ASSETS.' + page(s)-0based: 37 + page(s): '38' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity equivalent to or approximately equal to + 59268, 59268 million, 59.268 billion, + 59300, 59300 million, 59.3 billion + 59000, 59000 million or 59 billion + + +financebench_id_05915: + sector: Health Care + + company: CVS Health + period: 2018 + doc-type: 10k + doc: CVSHEALTH_2018_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is the FY2018 fixed asset turnover ratio for CVS Health? Fixed asset + turnover ratio is defined as: FY2018 revenue / (average PP&E between FY2017 and + FY2018). Round your answer to two decimal places. Calculate what was asked by + utilizing the line items clearly shown in the P&L statement and the balance sheet.' + + answer: '17.98' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total revenue. This metric was located in the 10K as a single line item + named: Total revenues. + + + Metric 2: Ppne, net. This metric was located in the 10K as a single line item + named: Property and equipment, net.' + page(s)-0based: 301 + page(s): 302,304 + + category: 3-CALC-COMPLEX + correctness: >- + the answer constains a calculated Fixed Asset Turnover Ratio decimal value that is in the range from 17.00 to 19.00 + (if the answer is a single number, assume that it is that calculated Fixed Asset Turnover Ratio decimal value) + + evaluator-unreliable: true + + +financebench_id_00790: + sector: Health Care + + company: CVS Health + period: 2022 + doc-type: 10k + doc: CVSHEALTH_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) + domain-question-num: dg06 + question: Is CVS Health a capital-intensive business based on FY2022 data? + + answer: Yes, CVS Health requires an extensive asset base to operate, which is evident + from its ROA of only 1.82% in 2022 and 3.39% in 2021, though it should be noted + that a significant portion of this asset base is goodwill, and CVS's fixed assets/total + assets ratio is on the lower side of 5.6%. + justification: 'Property and equipment, net/Total Assets + + 12873/228275 + + + ROA=Net Income/Total Assets + + 4165/228275 + + 7898/232999' + page(s)-0based: 107 + page(s): 108,110 + + category: 6-OTHER-ADVANCED + correctness: |- + the answer either: + - mentions that a calculated Return on Assets (RoA) metric value is quite low (which suggests capital intensity); or + - mentions that Fixed Assets form only a small proportion of Total Assets (which suggests the reverse) + + evaluator-unreliable: true + + +financebench_id_01107: + sector: Health Care + + company: CVS Health + period: 2022 + doc-type: 10k + doc: CVSHEALTH_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg11 + question: Has CVS Health reported any materially important ongoing legal battles + from 2022, 2021 and 2020? + + answer: "Yes, CVS Health has been involved in multiple ongoing legal battles. Some\ + \ notable legal dispute areas for CVS are: (1) usual and customary pricing litigation:\ + \ where it's claimed that CVSรข\x80\x99s retail pharmacies overcharged for prescription\ + \ drugs; (2) PBM litigation and investigations: where it's claimed that that rebate\ + \ agreements between the drug manufacturers and PBMs caused inflated prices for\ + \ certain drug products; and (3) controlled substances litigation: legal matters\ + \ around opioids for which CVS has agreed to pay up to $4.3 billion to claimants\ + \ in remediation and $625 million to attorneys and fees" + justification: '' + page(s)-0based: 172 + page(s): 173,173,174 + + category: 0-RETRIEVE + correctness: >- + the answer affirms that there have been material lawsuits / legal battles + + +financebench_id_01244: + sector: Health Care + + company: CVS Health + period: 2022 + doc-type: 10k + doc: CVSHEALTH_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg18 + question: Has CVS Health paid dividends to common shareholders in Q2 of FY2022? + + answer: Yes, CVS paid a $ 0.55 dividend per share every quarter in FY2022 + justification: '' + page(s)-0based: 67 + page(s): '68' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that dividends have been / were paid + + +financebench_id_00839: + sector: Consumer Discretionary + + company: Foot Locker + period: 2022 + doc-type: 8k + doc: FOOTLOCKER_2022_8K_dated_2022-08-19 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Does Foot Locker's new CEO have previous CEO experience in a similar company + to Footlocker? + + answer: Yes. She was previous CEO of Ulta Beauty which means she had to manage a + large retail company that has brick and mortar + online business. So yes she was + a CEO in a similar company to Foot Locker before this. + justification: '' + page(s)-0based: 1 + page(s): '2' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that Dillon has got experience in relevant and similar organizations and roles + + evaluator-unreliable: true + + +financebench_id_00822: + sector: Consumer Discretionary + + company: Foot Locker + period: 2022 + doc-type: 8k + doc: FOOTLOCKER_2022_8K_dated-2022-05-20 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Were there any board member nominees who had substantially more votes + against joining than the other nominees? + + answer: Yes, his name is Richard A. Johnson + justification: Richard A. Johnson had roughly 16.1 million votes against him joining + whereas the maximum votes against joining among all other candidates was roughly + 6.1 million. + page(s)-0based: 1 + page(s): '2' + + category: 1-COMPARE + correctness: >- + the answer identifies Johnson as receiving many votes against + + evaluator-unreliable: true + + +financebench_id_04103: + sector: Consumer Staples + + company: General Mills + period: 2019 + doc-type: 10k + doc: GENERALMILLS_2019_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is the FY2019 cash conversion cycle (CCC) for General Mills? CCC + is defined as: DIO + DSO - DPO. DIO is defined as: 365 * (average inventory between + FY2018 and FY2019) / (FY2019 COGS). DSO is defined as: 365 * (average accounts + receivable between FY2018 and FY2019) / (FY2019 Revenue). DPO is defined as: 365 + * (average accounts payable between FY2018 and FY2019) / (FY2019 COGS + change + in inventory between FY2018 and FY2019). Round your answer to two decimal places. + Address the question by using the line items and information shown within the + income statement and the balance sheet.' + + answer: '-3.7' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Accounts payable. This metric was located in the 10K as a single line + item named: Accounts payable. + + + Metric 2: Accounts receivable, net. This metric was located in the 10K as a single + line item named: Receivables. + + + Metric 3: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of sales. + + + Metric 4: Total revenue. This metric was located in the 10K as a single line item + named: Net sales. + + + Metric 5: Inventories. This metric was located in the 10K as a single line item + named: Inventories.' + page(s)-0based: 52 + page(s): 53,55 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Cash Conversion Cycle (CCC) metric value + that is NEGATIVE and in the range from -5.00 to -2.00, based on supporting calculated + Days Inventory Oustanding (DIO), Days Sales Outstanding (DSO) and Days Payable Outstanding (DPO) metric values + answer-inadequate: true + + +financebench_id_03471: + sector: Consumer Staples + + company: General Mills + period: 2020 + doc-type: 10k + doc: GENERALMILLS_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: By drawing conclusions from the information stated only in the statement + of financial position, what is General Mills's FY2020 working capital ratio? Define + working capital ratio as total current assets divided by total current liabilities. + Round your answer to two decimal places. + + answer: '0.68' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total current liabilities. This metric was located in the 10K as a single + line item named: Total current liabilities. + + + Metric 2: Total current assets. This metric was located in the 10K as a single + line item named: Total current assets.' + page(s)-0based: 49 + page(s): '50' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Working Capital Ratio decimal value that is in the range from 0.6500 to 0.7000, + or, alternatively, a calculated percentage value that is in the range from 65.00% to 70.00% + (if the answer is a single number, assume that it is that calculated Working Capital Ratio metric value) + + +financebench_id_04854: + sector: Consumer Staples + + company: General Mills + period: 2020 + doc-type: 10k + doc: GENERALMILLS_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'According to the information provided in the statement of cash flows, + what is the FY2020 free cash flow (FCF) for General Mills? FCF here is defined + as: (cash from operations - capex). Answer in USD millions.' + + answer: $3215.00 + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cash from operations. This metric was located in the 10K as a single + line item named: Net cash provided by operating activities. + + + Metric 2: Capital expenditures. This metric was located in the 10K as a single + line item named: Purchases of land, buildings, and equipment.' + page(s)-0based: 51 + page(s): '52' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Free Cash Flows (FCF) metric value that is equivalent to or approximately equal to + 3215.4, 3215.4 million, 3.2154 billion, + 3215, 3215 million, 3.215 billion, + 3200, 3200 million or 3.2 billion + (if the answer is a single number, assume that it is that calculated Free Cash Flows (FCF) metric value) + + evaluator-unreliable: true + + +financebench_id_10136: + sector: Consumer Staples + + company: General Mills + period: 2022 + doc-type: 10k + doc: GENERALMILLS_2022_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'We want to calculate a financial metric. Please help us compute it by + basing your answers off of the cash flow statement and the income statement. Here''s + the question: what is the FY2022 retention ratio (using total cash dividends paid + and net income attributable to shareholders) for General Mills? Round answer to + two decimal places.' + + answer: '0.54' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total cash dividends paid out. This metric was located in the 10K as + a single line item named: Dividends paid. + + + Metric 2: Net income. This metric was located in the 10K as a single line item + named: Net earnings attributable to General Mills.' + page(s)-0based: 44 + page(s): 45,49 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Retention Ratio decimal value that is in the range from 0.5000 to 0.6000, + or, alternatively, a calculated percentage value that is in the range from 50.00% to 60.00% + (if the answer is a single number, assume that it is that calculated Retention Ratio metric value) + + +financebench_id_00956: + sector: Health Care + + company: Johnson & Johnson + period: 2022 + doc-type: 10k + doc: JOHNSON_JOHNSON_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) + domain-question-num: dg05 + question: Are JnJ's FY2022 financials that of a high growth company? + + answer: No, JnJ's FY2022 financials are not of a high growth company as sales grew + by 1.3% in FY2022. + justification: '' + page(s)-0based: 27 + + category: 0-RETRIEVE + correctness: >- + the answer mentions low/slow Sales Revenue growth + + +financebench_id_00669: + sector: Health Care + + company: Johnson & Johnson + period: 2022 + doc-type: 10k + doc: JOHNSON_JOHNSON_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical + reasoning OR Logical reasoning + domain-question-num: dg16 + question: What drove gross margin change as of FY2022 for JnJ? If gross margin is + not a useful metric for a company like this, then please state that and explain + why. + + answer: 'For FY22, JnJ had changes in gross margin due to: One-time COVID-19 vaccine + manufacturing exit related costs, Currency impacts in the Pharmaceutical segment, + Commodity inflation in the MedTech and Consumer Health segments, partially offset + by Supply chain benefits in the Consumer Health segment.' + justification: Gross margin change is equivalent to the increase in cost of products + sold as a percent to sales. + page(s)-0based: 33 + + category: 5-EXPLAIN-FACTORS + correctness: |- + the answer mentions at least 2 of following: + - one-time COVID-19 vaccine manufacturing exit related costs; + - currency impacts in the Pharmaceutical segment; + - commodity inflation in the MedTech and Consumer Health segments; and/or + - supply chain benefits in the Consumer Health segment + + evaluator-unreliable: true + + +financebench_id_00711: + sector: Health Care + + company: Johnson & Johnson + period: 2022 + doc-type: 10k + doc: JOHNSON_JOHNSON_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR Logical reasoning + domain-question-num: dg25 + question: Roughly how many times has JnJ sold its inventory in FY2022? Calculate + inventory turnover ratio for FY2022; if conventional inventory management is not + meaningful for the company then state that and explain why. + + answer: JnJ sold its inventory 2.7 times in FY2022. + justification: Inventory turnover ratio = Cost of products sold/average inventories + = 31,089/((12,483+10,387)/2) = 2.7 + page(s)-0based: 45 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Inventory Turnover Ratio decimal value that is in the range from 2.00 to 3.00 + (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value) + + evaluator-unreliable: true + + +financebench_id_00651: # TODO: retrieve growth rates + sector: Health Care + + company: Johnson & Johnson + period: 2022 + doc-type: Earnings + doc: JOHNSON_JOHNSON_2022Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Is growth in JnJ's adjusted EPS expected to accelerate in FY2023? + + answer: No, rate of growth in adjusted EPS is expected to decelerate slightly from + 3.6% in FY2022 to 3.5% in FY2023. + justification: FY2023 adjusted EPS growth of 3.5% is slightly lower than FY2022 + adjusted EPS growth of 3.6%. + page(s)-0based: 0 + + category: 1-COMPARE + correctness: >- + the answer mentions 3.5% and 3.6%, + or, alternatively, concludes that growth is NOT expected to accelerate + + evaluator-unreliable: true + + +financebench_id_01484: + sector: Health Care + + company: Johnson & Johnson + period: 2022 + doc-type: Earnings + doc: JOHNSON_JOHNSON_2022Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: How did JnJ's US sales growth compare to international sales growth in + FY2022? + + answer: US sales increased 3.0% vs international sales decline of 0.6%. + justification: '' + page(s)-0based: 1 + + category: 1-COMPARE + correctness: >- + the answer mentions US sales increased and international sales decreased + + evaluator-unreliable: true + + +financebench_id_01488: + sector: Health Care + + company: Johnson & Johnson + period: 2023 + doc-type: 8k + doc: JOHNSON_JOHNSON_2023_8K_dated-2023-08-30 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Which business segment of JnJ will be treated as a discontinued operation + from August 30, 2023 onward? + + answer: The Consumer Health business segment will be treated as a discontinued operation + from August 30, 2023 onward. + justification: '' + page(s)-0based: 3 + + category: 0-RETRIEVE + correctness: >- + the answer identifies Consumer Health as discontinued + + +financebench_id_01490: + sector: Health Care + + company: Johnson & Johnson + period: 2023 + doc-type: 8k + doc: JOHNSON_JOHNSON_2023_8K_dated-2023-08-30 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What is the amount of the gain accruing to JnJ as a result of the separation + of its Consumer Health business segment, as of August 30, 2023? + + answer: JnJ will make a gain of approximately $20 billion from the separation of + its Consumer Health business segment. + justification: '' + page(s)-0based: 3 + + category: 0-RETRIEVE + correctness: >- + the answer mentions 20 billion + + +financebench_id_01491: + sector: Health Care + + company: Johnson & Johnson + period: 2023 + doc-type: 8k + doc: JOHNSON_JOHNSON_2023_8K_dated-2023-08-30 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What is the amount of the cash proceeds that JnJ realised from the separation + of Kenvue (formerly Consumer Health business segment), as of August 30, 2023? + + answer: JnJ realised $13.2 billion in cash proceeds from the separation of Kenvue. + justification: '' + page(s)-0based: 3 + + category: 0-RETRIEVE + correctness: >- + the answer mentions 13.2 billion, or, alternatively, approximately 13 billion + + +financebench_id_01487: + sector: Health Care + + company: Johnson & Johnson + period: 2023 + doc-type: Earnings + doc: JOHNSON_JOHNSON_2023Q2_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Did JnJ's net earnings as a percent of sales increase in Q2 of FY2023 + compared to Q2 of FY2022? + + answer: Yes, net earnings as a percent of sales increased from 20% in Q2 of FY2022 + to 20.1% in Q2 of FY2023. + justification: '' + page(s)-0based: 9 + + category: 1-COMPARE + correctness: >- + the answer mentions 20.0% (or 20%) and 20.1%, or, alternatively, mentions a slight increase + + +financebench_id_00299: + sector: Financials + + company: JPMorgan + period: 2021 + doc-type: 10q + doc: JPMORGAN_2021Q1_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Which of JPM's business segments had the lowest net revenue in 2021 Q1? + + answer: Corporate. Its net revenue was -$473 million. + justification: 14,605 > 12,517 > 4,077 > 2,393 > -473 + page(s)-0based: 18 + page(s): '19' + + category: 1-COMPARE + correctness: >- + the answer identifies Corporate segment as having lowest Net Revenue + + +financebench_id_02119: + sector: Financials + + company: JPMorgan + period: 2021 + doc-type: 10q + doc: JPMORGAN_2021Q1_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: If JPM went bankrupted by the end by 2021 Q1 and liquidated all of its + assets to pay its shareholders, how much could each shareholder get? + + answer: They could receive $66.56 per share. + justification: '' + page(s)-0based: 5 + page(s): '6' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity that is in the range from 60.00 to 70.00 + + evaluator-unreliable: true + + +financebench_id_00206: + sector: Financials + + company: JPMorgan + period: 2022 + doc-type: 10k + doc: JPMORGAN_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical + reasoning + domain-question-num: dg03 + question: Are JPM's gross margins historically consistent (not fluctuating more + than roughly 2% each year)? If gross margins are not a relevant metric for a company + like this, then please state that and explain why. + + answer: Since JPM is a financial institution, gross margin is not a relevant metric. + justification: '' + page(s)-0based: 2 + page(s): '3' + + category: 6-OTHER-ADVANCED + correctness: >- + the answer argues that Gross Margin is not a very relevant/useful metric for this business model and/or industry, + or, alternatively, that performance in this business model and/or industry is usually not judged through Gross Margin + + evaluator-unreliable: true + + +financebench_id_00394: + sector: Financials + + company: JPMorgan + period: 2022 + doc-type: 10q + doc: JPMORGAN_2022Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: In 2022 Q2, which of JPM's business segments had the highest net income? + + answer: Corporate & Investment Bank. Its net income was $3725 million. + justification: 3725 > 3100 > 1004 > 994 > -174 + page(s)-0based: 20 + page(s): '21' + + category: 1-COMPARE + correctness: >- + the answer identifies Corporate & Investment Bank segment as having higest Net Income + + +financebench_id_02049: + sector: Financials + + company: JPMorgan + period: 2023 + doc-type: 10q + doc: JPMORGAN_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Looking at VaR, did the risk that JPM faced in the second fiscal quarter + of 2023 decrease compared to the same period in the prior year? + + answer: Yes. It decreased. + justification: '' + page(s)-0based: 84 + page(s): '85' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that VaR decreased + + +financebench_id_10499: + sector: Consumer Staples + + company: Kraft Heinz + period: 2019 + doc-type: 10k + doc: KRAFTHEINZ_2019_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is Kraft Heinz''s FY2019 inventory turnover ratio? Inventory turnover + ratio is defined as: (FY2019 COGS) / (average inventory between FY2018 and FY2019). + Round your answer to two decimal places. Please base your judgments on the information + provided primarily in the balance sheet and the P&L statement.' + + answer: '6.25' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of products sold. + + + Metric 2: Inventories. This metric was located in the 10K as a single line item + named: Inventories.' + page(s)-0based: 49 + page(s): 50,52 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Inventory Turnover Ratio decimal value that is in the range from 6.00 to 6.50 + (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value) + + +financebench_id_04412: + sector: Industrials + + company: Lockheed Martin + period: 2020 + doc-type: 10k + doc: LOCKHEEDMARTIN_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'We need to calculate a reasonable approximation (or exact number if possible) + of a financial metric. Basing your judgment by information plainly provided in + the balance sheet and the P&L statement, what is Lockheed Martin''s FY2020 asset + turnover ratio? Asset turnover ratio is defined as: FY2020 revenue / (average + total assets between FY2019 and FY2020). Round your answer to two decimal places.' + + answer: '1.33' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total revenue. This metric was located in the 10K as a single line item + named: Total net sales. + + + Metric 2: Total assets. This metric was located in the 10K as a single line item + named: Total assets.' + page(s)-0based: 66 + page(s): 67,69 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Asset Turnover Ratio decimal value that is in the range from 1.30 to 1.40 + (if the answer is a single number, assume that it is that calculated Asset Turnover Ratio decimal value) + + +financebench_id_03031: + sector: Industrials + + company: Lockheed Martin + period: 2021 + doc-type: 10k + doc: LOCKHEEDMARTIN_2021_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is Lockheed Martin's FY2021 net working capital? Define net working + capital as total current assets less total current liabilities. Answer in USD + millions. Respond to the question by assuming the perspective of an investment + analyst who can only use the details shown within the balance sheet. + + answer: $5818.00 + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Total current liabilities. This metric was located in the 10K as a single + line item named: Total current liabilities. + + + Metric 2: Total current assets. This metric was located in the 10K as a single + line item named: Total current assets.' + page(s)-0based: 67 + page(s): '68' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Net Working Capital metric value that is equivalent to or approximately equal to + 5818, 5818 million, 5.818 billion, + 5800, 5800 million or 5.8 billion + (if the answer is a single number, assume that it is that calculated Net Working Capital metric value) + + +financebench_id_03718: + sector: Industrials + + company: Lockheed Martin + period: 2022 + doc-type: 10k + doc: LOCKHEEDMARTIN_2022_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is Lockheed Martin's 2 year total revenue CAGR from FY2020 to FY2022 + (in units of percents and round to one decimal place)? Provide a response to the + question by primarily using the statement of income. + + answer: 0.4% + justification: 'The metric total revenue was directly extracted from the company + 10K. The line item name, as seen in the 10K, was: Total net sales. The final step + was to execute the desired CAGR calculation on total revenue.' + page(s)-0based: 62 + page(s): '63' + + category: 2-CALC-CHANGE + correctness: >- + the answer contains a calculated CAGR percentage value that is in the range from 0.400% to 0.500% + (if the answer is a single number, assume that it is that calculated CAGR percentage value) + + evaluator-unreliable: true + + +financebench_id_04171: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2018 + doc-type: 10k + doc: MGMRESORTS_2018_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: Basing your judgments off of the balance sheet, what is the year end FY2018 + amount of accounts payable for MGM Resorts? Answer in USD millions. + + answer: $303.00 + justification: 'The metric accounts payable was directly extracted from the company + 10K. The line item name, as seen in the 10K, was: Accounts payable.' + page(s)-0based: 56 + page(s): '57' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity that is equivalent to or approximately equal to + 302.6, 302.6 million, 0.3026 billion, + 303, 303 million, 0.303 billion, + 300, 300 million or 0.3 billion + + evaluator-unreliable: true + + +financebench_id_03849: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2020 + doc-type: 10k + doc: MGMRESORTS_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is the FY2018 - FY2020 3 year average of capex as a % of revenue + for MGM Resorts? Answer in units of percents and round to one decimal place. Please + utilize information provided primarily within the statement of cash flows and + the statement of income. + + answer: 7.9% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Capital expenditures. This metric was located in the 10K as a single + line item named: Capital expenditures, net of construction payable. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: [blank line item referring to total revenue].' + page(s)-0based: 64 + page(s): 65,67 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated metric percentage value that is in the range from 7.50% to 8.50%, + or, alternatively, a calculated decimal value that is in the range from 0.0750 to 0.0850 + (if the answer is a single number, assume that it is that calculated metric value) + + +financebench_id_01254: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2022 + doc-type: 10k + doc: MGMRESORTS_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg18 + question: Has MGM Resorts paid dividends to common shareholders in FY2022? + + answer: Yes. MGM maintained 0.01$ per share annual dividend through out FY 2022. + justification: '' + page(s)-0based: 31 + page(s): '32' + + category: 0-RETRIEVE + correctness: >- + the answer affirms that dividends have been / were paid + + evaluator-unreliable: true + + +financebench_id_00382: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2022 + doc-type: Earnings + doc: MGMRESORTS_2022Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Which region had the Highest EBITDAR Contribution for MGM during FY2022? + + answer: Las Vegas resorts contributed ~90% of company level EBITDAR during FY2022. + justification: 3142308/3497254 + page(s)-0based: 12 + page(s): '13' + + category: 1-COMPARE + correctness: >- + the answer identifies Las Vegas resorts as having highest EBITDAR + + +financebench_id_01911: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2022 + doc-type: Earnings + doc: MGMRESORTS_2022Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What was MGM's interest coverage ratio using FY2022 Adjusted EBIT as the + numerator and annual Interest Expense as the denominator? + + answer: As adjusted EBIT is negative, coverage ratio is zero + justification: '' + page(s)-0based: 13 + page(s): '14' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Interest Coverage Ratio metric value, + or, alternatively, concludes that Interest Coverage Ratio is zero + answer-inadequate: true + + +financebench_id_01912: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2022 + doc-type: Earnings + doc: MGMRESORTS_2022Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Which region had the worst topline performance for MGM during FY2022? + + answer: MGM China experienced the worst topline performance amongst the other regions + presented. Its revenue declined 44% in FY2022 whereas the other regions presented + increased their revenues. + justification: '' + page(s)-0based: 2 + page(s): 3,4,4 + + category: 1-COMPARE + correctness: >- + the answer identifies MGM China as having worst top-line Revenue performance + + +financebench_id_00407: + sector: Consumer Discretionary + + company: MGM Resorts + period: 2023 + doc-type: 10q + doc: MGMRESORTS_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Which type of debt received the largest investment among the short term + investments for MGM in H1 FY2023? + + answer: the biggest short term investment is in corporate bonds (almost 82% of the + total investment) + justification: 416420/509921 + page(s)-0based: 10 + page(s): '11' + + category: 1-COMPARE + correctness: >- + the answer identifies corporate bonds as having received largest short-term investment + + +financebench_id_04700: + sector: Information Technology + + company: Microsoft + period: 2016 + doc-type: 10k + doc: MICROSOFT_2016_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: What is the FY2016 COGS for Microsoft? Please state answer in USD millions. + Provide a response to the question by primarily using the statement of income. + + answer: $32780.00 + justification: 'The metric cost of goods sold was directly extracted from the company + 10K. The line item name, as seen in the 10K, was: Total cost of revenue.' + page(s)-0based: 51 + page(s): '52' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity that is equivalent to or approximately equal to + 32780, 32780 million, 32.78 billion, + 32800, 32800 million, 32.8 billion + 33000, 33000 million or 33 billion + + +financebench_id_00552: + sector: Information Technology + + company: Microsoft + period: 2023 + doc-type: 10k + doc: MICROSOFT_2023_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg22 + question: Has Microsoft increased its debt on balance sheet between FY2023 and the + FY2022 period? + answer: No. Microsoft decreased its debt by $2.5bn in FY 2023 vs FY 2022. + justification: 'Current portion of long-term debt+Long-term debt + + 5247+41990 + + 2749+47032' + page(s)-0based: 59 + page(s): '60' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains calculated Total Debt values for 2022 and 2023, and concludes that Total Debt decreased + answer-inadequate: true + + +financebench_id_04458: + sector: Communication Services + + company: Netflix + period: 2015 + doc-type: 10k + doc: NETFLIX_2015_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'We want to calculate a financial metric. Please help us compute it by + basing your answers off of the statement of income and the statement of cash flows. + Here''s the question: what is the FY2015 unadjusted EBITDA % margin for Netflix? + Calculate unadjusted EBITDA using unadjusted operating income and D&A (from cash + flow statement).' + + answer: 5.4% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Depreciation and amortization. This metric was located in the 10K as + a single line item named: Depreciation and amortization of property, equipment + and intangibles. + + + Metric 2: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operating income. + + + Metric 3: Total revenue. This metric was located in the 10K as a single line item + named: Revenues.' + page(s)-0based: 39 + page(s): 40,42 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated EBITDA Margin percentage value that is in the range from 5.00% to 5.50%, + or, alternatively, a calculated decimal value that is in the range from 0.0500 to 0.0550, + assuming that EBITDA = "Operating Income" + "Depreciation & Amortization of Property, Equipment & Intangibles" + (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value) + + +financebench_id_03282: + sector: Communication Services + + company: Netflix + period: 2017 + doc-type: 10k + doc: NETFLIX_2017_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: What is Netflix's year end FY2017 total current liabilities (in USD millions)? + Base your judgments on the information provided primarily in the balance sheet. + + answer: $5466.00 + justification: 'The metric total current liabilities was directly extracted from + the company 10K. The line item name, as seen in the 10K, was: Total current liabilities.' + page(s)-0based: 44 + page(s): '45' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity that is equivalent to or approximately equal to + 5466.3, 5466.3 million, 5.4663 billion, + 5466, 5466 million, 5.466 billion, + 5500, 5500 million or 5.5 billion + + evaluator-unreliable: true + + +financebench_id_04302: + sector: Consumer Discretionary + + company: Nike + period: 2018 + doc-type: 10k + doc: NIKE_2018_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: We need to calculate a reasonable approximation (or exact number if possible) + of a financial metric. Basing your judgment by information plainly provided in + the statement of income, what is Nike's three year average of cost of goods sold + as a % of revenue from FY2016 to FY2018? Answer in units of percents and round + to one decimal place. + + answer: 55.1% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of sales. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: Revenues.' + page(s)-0based: 45 + page(s): '46' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated metric percentage value that is in the range from 50.00% to 60.00%, + or, alternatively, a calculated decimal value that is in the range from 0.5000 to 0.6000 + (if the answer is a single number, assume that it is that calculated metric value) + + +financebench_id_03531: + sector: Consumer Discretionary + + company: Nike + period: 2019 + doc-type: 10k + doc: NIKE_2019_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: According to the details clearly outlined within the balance sheet, how + much total current assets did Nike have at the end of FY2019? Answer in USD millions. + + answer: $16525.00 + justification: 'The metric total current assets was directly extracted from the + company 10K. The line item name, as seen in the 10K, was: Total current assets.' + page(s)-0based: 53 + page(s): '54' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity that is equivalent to or approximately equal to + 16525, 16525 million, 16.525 billion, + 16500, 16500 million or 16.5 billion + + +financebench_id_04080: + sector: Consumer Discretionary + + company: Nike + period: 2021 + doc-type: 10k + doc: NIKE_2021_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'When primarily referencing the income statement and the statement of + financial position, what is the FY2021 inventory turnover ratio for Nike? Inventory + turnover ratio is defined as: (FY2021 COGS) / (average inventory between FY2020 + and FY2021). Round your answer to two decimal places.' + + answer: '3.46' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of sales. + + + Metric 2: Inventories. This metric was located in the 10K as a single line item + named: Inventories.' + page(s)-0based: 58 + page(s): 59,61 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Inventory Turnover Ratio decimal value that is in the range from 3.00 to 4.00 + (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value) + + +financebench_id_01163: + sector: Consumer Discretionary + + company: Nike + period: 2023 + doc-type: 10k + doc: NIKE_2023_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg19 + question: Among operations, investing, and financing activities, which brought in + the most (or lost the least) cash flow for Nike in FY2023? + + answer: Among the three, cash flow from operations was the highest for Nike in FY2023. + justification: '' + page(s)-0based: 61 + page(s): '62' + + category: 1-COMPARE + correctness: >- + the answer identifies Operations / Operating Cash Flows as bringing in most cash + + +financebench_id_00080: + sector: Financials + + company: Paypal + period: 2022 + doc-type: 10k + doc: PAYPAL_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning OR Logical reasoning + domain-question-num: dg24 + question: Does Paypal have positive working capital based on FY2022 data? If working + capital is not a useful or relevant metric for this company, then please state + that and explain why. + + answer: Yes. Paypal has a positive working capital of $ 1.6Bn as of FY2022 end. + justification: 'Accounts receivable, net+Loans and interest receivable, net of allowances + +Funds receivable and customer accounts+Prepaid expenses and other current assets-Accounts + payable-Funds payable and amounts due to customers-Accrued expenses and other + current liabilities -Income taxes payable + + 963+7431+36357+1898-126-40107-4055-813' + page(s)-0based: 60 + page(s): '61' + + category: 3-CALC-COMPLEX + correctness: >- + the answer affirms that Working Capital is/was positive, + proving so by a calculated Working Capital metric value that is positive + + +financebench_id_04980: + sector: Consumer Staples + + company: PepsiCo + period: 2021 + doc-type: 10k + doc: PEPSICO_2021_10K + + question-type: metrics-generated + question-reasoning: Information extraction + domain-question-num: '' + question: What is the FY2021 capital expenditure amount (in USD billions) for PepsiCo? + Respond to the question by assuming the perspective of an investment analyst who + can only use the details shown within the statement of cash flows. + + answer: $4.60 + justification: 'The metric capital expenditures was directly extracted from the + company 10K. The line item name, as seen in the 10K, was: Capital spending.' + page(s)-0based: 62 + page(s): '63' + + category: 0-RETRIEVE + correctness: >- + the answer contains a quantity that is equivalent to or approximately equal to + 4.625, 4.625 billion, 4625 million, + 4.6, 4.6 billion or 4600 million + + +financebench_id_01009: + sector: Consumer Staples + + company: PepsiCo + period: 2022 + doc-type: 10k + doc: PEPSICO_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg08 + question: What are the geographies that Pepsico primarily operates in as of FY2022? + + answer: 'As of FY2022, Pepsico primarily operates in the following geographies: + North America, Latin America, Europe, Africa, Middle East, South Asia, Asia Pacific, + Australia, New Zealand and China.' + justification: '' + page(s)-0based: 3 + page(s): 4, 5 + + category: 0-RETRIEVE + correctness: |- + the answer mentions at least 3 of following geographies: + - North America, which includes United States and Canada; + - Latin America (LatAm); + - Europe; + - Africa, Middle East and South Asia (AMESA); and + - Asia Pacific, Australia and New Zealand and China (APAC) + + +financebench_id_00735: + sector: Consumer Staples + + company: PepsiCo + period: 2022 + doc-type: 10k + doc: PEPSICO_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg11 + question: Has Pepsico reported any materially important ongoing legal battles from + FY2022 and FY2021? + + answer: No, Pepsico is not involved in material legal battles. + justification: Management believes the final outcome of legal proceedings will not + have a material adverse outcome. + page(s)-0based: 25 + page(s): '26' + + category: 0-RETRIEVE + correctness: >- + the answer says that there have NOT been material lawsuits / legal battles, + or, alternatively, that lawsuits / legal battles are unlikely to have materially adverse outcomes + + evaluator-unreliable: true + + +financebench_id_01328: + sector: Consumer Staples + + company: PepsiCo + period: 2022 + doc-type: 10k + doc: PEPSICO_2022_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg21 + question: What is the quantity of restructuring costs directly outlined in Pepsico's + income statements for FY2022? If restructuring costs are not explicitly outlined + then state 0. + + answer: Pepsico's restructuring costs in FY2022 amounted to $411 million . + justification: '' + page(s)-0based: 77 + page(s): '78' + + category: 0-RETRIEVE + correctness: |- + the answer either: + - mentions a quantity that is equivalent to or approximately equal to 411 million; or + - states 0, zero, and/or that restructuring costs are not explicitly reported + answer-inadequate: true + + +financebench_id_03620: + sector: Consumer Staples + + company: PepsiCo + period: 2022 + doc-type: 10k + doc: PEPSICO_2022_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is the FY2022 unadjusted EBITDA less capex for PepsiCo? Define unadjusted + EBITDA as unadjusted operating income + depreciation and amortization [from cash + flow statement]. Answer in USD millions. Respond to the question by assuming the + perspective of an investment analyst who can only use the details shown within + the statement of cash flows and the income statement. + + answer: $9068.00 + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Depreciation and amortization. This metric was located in the 10K as + a single line item named: Depreciation and amortization. + + + Metric 2: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operating Profit. + + + Metric 3: Capital expenditures. This metric was located in the 10K as a single + line item named: Capital spending.' + page(s)-0based: 61 + page(s): 62,64 + + category: 3-CALC-COMPLEX + correctness: |- + the answer contains a calculated metric value that is either: + - in the range from 8500 to 9500; + - in the range from 8500 million to 9500 million; + - in the range from 8.5 billion to 9.5 billion; or + - stated as approximately 9000 million or 9 billion + (if the answer is a single number, assume that it is that calculated metric value) + + evaluator-unreliable: true + + +financebench_id_04481: + sector: Consumer Staples + + company: PepsiCo + period: 2022 + doc-type: 10k + doc: PEPSICO_2022_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is the FY2022 unadjusted EBITDA % margin for PepsiCo? Calculate unadjusted + EBITDA using unadjusted operating income and D&A (from cash flow statement). Give + a response to the question by relying on the details shown in the statement of + cash flows and the P&L statement. + + answer: 16.5% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Depreciation and amortization. This metric was located in the 10K as + a single line item named: Depreciation and amortization. + + + Metric 2: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operating Profit. + + + Metric 3: Total revenue. This metric was located in the 10K as a single line item + named: Net Revenue.' + page(s)-0based: 61 + page(s): 62,64 + + category: 3-CALC-COMPLEX + correctness: |- + the answer contains a calculated EBITDA Margin percentage value that is in the range from 16.00% to 17.00%, + or, alternatively, a calculated decimal value that is in the range from 0.1600 to 0.1700 + (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value) + + +financebench_id_01482: + sector: Consumer Staples + + company: PepsiCo + period: 2023 + doc-type: 8k + doc: PEPSICO_2023_8K_dated-2023-05-05 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: At the Pepsico AGM held on May 3, 2023, what was the outcome of the shareholder + vote on the shareholder proposal for a congruency report by Pepsico on net-zero + emissions policies? + + answer: The shareholder proposal for a congruency report by Pepsico on net-zero + emissions policies was defeated. + justification: '' + page(s)-0based: 3 + page(s): '4' + + category: 1-COMPARE + correctness: >- + the answer says proposal related to Net-Zero Emissions was defeated / not successful + + +financebench_id_00705: + sector: Consumer Staples + + company: PepsiCo + period: 2023 + doc-type: 8k + doc: PEPSICO_2023_8K_dated-2023-05-30 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: By how much did Pepsico increase its unsecured five year revolving credit + agreement on May 26, 2023? + + answer: $400,000,000 increase. + justification: Increase in five year unsecured revolving credit agreement = May + 26, 2023, five year unsecured revolving credit agreement amount of $4,200,000,000 + - May 27, 2022, five year unsecured revolving credit agreement amount of $3,800,000,000 + = $400,000,000 + page(s)-0based: 1 + page(s): '2' + + category: 2-CALC-CHANGE + correctness: >- + the answer contains a calculated change quantity that is equivalent to or approximately equal to + 400,000,000, 400 million or 0.4 billion + (if the answer is a single number, assume that it is that calculated change amount) + + +financebench_id_00882: + sector: Consumer Staples + + company: PepsiCo + period: 2023 + doc-type: 8k + doc: PEPSICO_2023_8K_dated-2023-05-30 + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: As of May 26, 2023, what is the total amount Pepsico may borrow under + its unsecured revolving credit agreements? + + answer: Total amount Pepsico may borrow under unsecured revolving credit agreements + = $8,400,000,000. + justification: Total amount that may be borrowed under unsecured revolving credit + agreements = 2023, 364 day unsecured revolving credit agreement amount of $4,200,000,000 + + 2023, five year unsecured revolving credit agreement amount of $4,200,000,000 + = $8,400,000,000. + page(s)-0based: 1 + page(s): '2' + + category: 3-CALC-COMPLEX + correctness: |- + the answer either (or both): + - mentions two separate quantities each equal to 4,200,000,000, 4200 million or 4.2 billion; and/or + - contains a calculated total quantity that is greater than or equal to + 8,400,000,000, 8400 million or 8.4 billion + (if the answer is a single number, assume that it is that latter calculated total amount) + + evaluator-unreliable: true + + +financebench_id_01474: + sector: Consumer Staples + + company: PepsiCo + period: 2023 + doc-type: Earnings + doc: PEPSICO_2023Q1_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: As of FY2023Q1, why did Pepsico raise full year guidance for FY2023? + + answer: Pepsico experienced a strong start to FY2023. + justification: '' + page(s)-0based: 0 + page(s): '1' + + category: 0-RETRIEVE + correctness: >- + the answer mentions strong business performance + + +financebench_id_01476: + sector: Consumer Staples + + company: PepsiCo + period: 2023 + doc-type: Earnings + doc: PEPSICO_2023Q1_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: As of FY2023Q1, by how many percentage points did Pepsico raise full year + guidance in respect of core constant currency EPS growth? + + answer: Pepsico raised full year guidance in respect of core constant currency EPS + growth by 1 percentage point. + justification: '' + page(s)-0based: 0 + page(s): '1' + + category: 2-CALC-CHANGE + correctness: >- + the answer mentions growth guidance raised from 8% to 9%, + and/or growth guidance raised by 1 percentage point or 1% + + evaluator-unreliable: true + + +financebench_id_00302: + sector: Health Care + + company: Pfizer + period: 2021 + doc-type: 10k + doc: PFIZER_2021_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Did Pfizer grow its PPNE between FY20 and FY21? + + answer: Yes, change in PPNE was positive year over year + justification: 14882 - 13745 > 0 + page(s)-0based: 58 + page(s): '59' + + category: 1-COMPARE + correctness: >- + the answer concludes that Property, Plant & Equipment (PP&E or PPNE) increased + + evaluator-unreliable: true + + +financebench_id_00702: + sector: Health Care + + company: Pfizer + period: 2021 + doc-type: 10k + doc: PFIZER_2021_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Were there any potential events that are not in Pfizer's standard business + operations that substantially increased net income in 2019? + + answer: Yes, the gain on completion of Consumer Healthcare JV Transaction + justification: Income statement shows the gain on completion of Consumer Healthcare + JV transaction occured in FY19. In FY21, this event did not affect the net income + at all due to the seemingly one time nature of the line item + page(s)-0based: 56 + page(s): '57' + + category: 5-EXPLAIN-FACTORS + correctness: >- + the answer mentions Consumer Healthcare JV transaction + + +financebench_id_02416: # note: Therachon is mentioned on separate following page + sector: Health Care + + company: Pfizer + period: 2021 + doc-type: 10k + doc: PFIZER_2021_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What are three main companies acquired by Pfizer mentioned in this 10K + report? + + answer: Trillium, Array, and Therachon + justification: '' + page(s)-0based: 69 + page(s): 70, 71 + + category: 0-RETRIEVE + correctness: >- + the answer mentions Trillium and Array + + +financebench_id_00283: + sector: Health Care + + company: Pfizer + period: 2023 + doc-type: 10q + doc: Pfizer_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: How much does Pfizer expect to pay to spin off Upjohn in the future in + USD million? + + answer: '77.78' + justification: '10% cost is remaining amount in the future. Calculation: 700/9 is + 10% of the cost remaining' + page(s)-0based: 40 + page(s): '41' + + category: 6-OTHER-ADVANCED + correctness: >- + the answer mentions 700 million and 90% + + evaluator-unreliable: true + + +financebench_id_00724: + sector: Health Care + + company: Pfizer + period: 2023 + doc-type: 10q + doc: Pfizer_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: For Pfizer, which geographic region had the biggest drop in Q22023 year + over year revenues (on a percentage basis)? + + answer: Developed Rest of the World + justification: It's plainly stated in table format the year over year revenue changes + for each of the regions + page(s)-0based: 37 + page(s): '38' + + category: 1-COMPARE + correctness: >- + the answer identifies Developed Rest of World as having worst percentage/relative decline + + +financebench_id_02419: # tricky: Upjohn spin-off started in 2020 but would complete in 2023 + sector: Health Care + + company: Pfizer + period: 2023 + doc-type: 10q + doc: Pfizer_2023Q2_10Q + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: As of Q2'2023, is Pfizer spinning off any large business segments? + + answer: Yes, it's spinning off Upjohn. + justification: '' + page(s)-0based: 40 + page(s): '41' + + category: 0-RETRIEVE + correctness: >- + the answer mentions Upjohn + + evaluator-unreliable: true + + +financebench_id_00746: + sector: Consumer Discretionary + + company: Ulta Beauty + period: 2023 + doc-type: 10k + doc: ULTABEAUTY_2023_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg04 + question: Which debt securities are registered to trade on a national securities + exchange under Ulta Beauty's name as of FY2023? + + answer: There are none + justification: No debt securities listed under securities registered pursuant to + Section 12(b) of the Act. + page(s)-0based: 0 + page(s): '1' + + category: 0-RETRIEVE + correctness: >- + the answer concludes that there are no debt securities traded, + or, alternatively, that no such debt securities are explicitly reported + + +financebench_id_00521: + sector: Consumer Discretionary + + company: Ulta Beauty + period: 2023 + doc-type: 10k + doc: ULTABEAUTY_2023_10K + + question-type: domain-relevant + question-reasoning: Information extraction + domain-question-num: dg10 + question: What are major acquisitions that Ulta Beauty has done in FY2023 and FY2022? + + answer: Ulta Beauty did not make any acquisitions in FY2023 and FY2022. + justification: Consolidated statement of cash flows reflects - for Acquisitions, + net of cash acquired in FY2023 and FY2022. + page(s)-0based: 56 + page(s): '57' + + category: 0-RETRIEVE + correctness: >- + the answer concludes that there are no major acquisitions, + or, alternatively, that no such major acquisitions are explicitly reported + + +financebench_id_00601: + sector: Consumer Discretionary + + company: Ulta Beauty + period: 2023 + doc-type: Earnings + doc: ULTABEAUTY_2023Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What drove the reduction in SG&A expense as a percent of net sales in + FY2023? + + answer: Lower marketing expenses and leverage of incentive compensation due to higher + sales. The answer here assumes FY2023 refers to the 12 months ended on January + 28, 2023 (although the company refers to this period as its fiscal 2022. + justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022. + page(s)-0based: 1 + page(s): '2' + + category: 0-RETRIEVE + correctness: >- + the answer mentions marketing expenses and incentive compensation + answer-inadequate: true + + +financebench_id_00603: + sector: Consumer Discretionary + + company: Ulta Beauty + period: 2023 + doc-type: Earnings + doc: ULTABEAUTY_2023Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What drove the increase in Ulta Beauty's merchandise inventories balance + at end of FY2023? + + answer: Increase in Merchandise inventories balance was driven by the opening of + 47 new stores. The answer here assumes FY2023 refers to the 12 months ended on + January 28, 2023 (although the company refers to this period as its fiscal 2022. + justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022. + page(s)-0based: 2 + page(s): '2' + + category: 0-RETRIEVE + correctness: >- + the answer mentions new stores + + +financebench_id_00605: + sector: Consumer Discretionary + + company: Ulta Beauty + period: 2023 + doc-type: Earnings + doc: ULTABEAUTY_2023Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: What percent of Ulta Beauty's total spend on stock repurchases for FY + 2023 occurred in Q4 of FY2023? + + answer: 36%. The answer here assumes FY2023 refers to the 12 months ended on January + 28, 2023 (although the company refers to this period as its fiscal 2022. + justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022. Percent spent in Q4 of + FY2023 = Amount spent in Q4 of FY2023/Total amount spent in FY2023*100 =$328.1 + million /$900 million * 100 = 36% + page(s)-0based: 2 + page(s): '3' + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated percentage value that is in the range from 30% to 40% + (if the answer is a single number, assume that it is that calculated percentage value) + + +financebench_id_00606: # tricky: highly implicit wordings + sector: Consumer Discretionary + + company: Ulta Beauty + period: 2023 + doc-type: Earnings + doc: ULTABEAUTY_2023Q4_EARNINGS + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Did Ulta Beauty's wages expense as a percent of net sales increase or + decrease in FY2023? + + answer: Wages expense as a percent of net sales increased in FY2023. The answer + here assumes FY2023 refers to the 12 months ended on January 28, 2023 (although + the company refers to this period as its fiscal 2022. + justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022. Store payroll and benefits + = wages. Store payroll and benefits offsets reduction in SG&A percent of net sales + in FY2023. + page(s)-0based: 1 + page(s): '2' + + category: 6-OTHER-ADVANCED + correctness: >- + the answer concludes that Wages as percent of Net Sales increased + + +financebench_id_00859: + sector: Communication Services + + company: Verizon + period: 2021 + doc-type: 10k + doc: VERIZON_2021_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: Among all of the derivative instruments that Verizon used to manage the + exposure to fluctuations of foreign currencies exchange rates or interest rates, + which one had the highest notional value in FY 2021? + + answer: Cross currency swaps. Its notional value was $32,502 million. + justification: The derivative instruments used to mangae the exposure were interest + rate swaps, cross currency swaps, forward starting interest rate swaps, and foreign + exchange forwards. 32502 > 19779 > 1000 > 932 + page(s)-0based: 84 + page(s): '85' + + category: 1-COMPARE + correctness: >- + the answer identifies Cross Currency Swaps as having highest notional value + + +financebench_id_02024: + sector: Communication Services + + company: Verizon + period: 2021 + doc-type: 10k + doc: VERIZON_2021_10K + + question-type: novel-generated + question-reasoning: '' + domain-question-num: '' + question: As of FY 2021, how much did Verizon expect to pay for its retirees in + 2024? + + answer: The estimated pension benefits were $1097 million, and the estimated health + care and life insurance benefits were $862 million. + justification: '' + page(s)-0based: 62 + page(s): 63, 94 + + category: 0-RETRIEVE + correctness: |- + the answer mentions at least 1 of following: + - amount of 1,097 million, or 1.1 billion, or approximately equivalent amount (explicitly or implicitly for "Pension (Benefits)"); + - amount of 862 million, or approximately equivalent amount (explicitly or implicitly for "Health Care & Life (Insurance)"; or + - total amount of 1,959 million, or 1.96 billion, or 2.0 billion, or an approximately equivalent amount + + +financebench_id_00216: + sector: Communication Services + + company: Verizon + period: 2022 + doc-type: 10k + doc: VERIZON_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical + reasoning + domain-question-num: dg01 + question: Does Verizon have a reasonably healthy liquidity profile based on its + quick ratio for FY 2022? If the quick ratio is not relevant to measure liquidity, + please state that and explain why. + + answer: No. The quick ratio was approximately 0.54 for Verizon. It indicated that + Verizon does not have a healthy liquidity profile. + justification: Quick ratio = (current assets - inventories - prepaid expenses) / + current liabilities = (37857 - 2388 - 8358) / 50171 = 0.5403719 + page(s)-0based: 55 + page(s): '56' + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer contains a calculated Quick Ratio decimal value that is in the range from 0.40 to 0.80, + or, alternatively, a calculated percentage value that is in the range from 40% to 80% + + +financebench_id_00215: + sector: Communication Services + + company: Verizon + period: 2022 + doc-type: 10k + doc: VERIZON_2022_10K + + question-type: domain-relevant + question-reasoning: Logical reasoning (based on numerical reasoning) + domain-question-num: dg06 + question: Is Verizon a capital intensive business based on FY 2022 data? + + answer: Yes. Verizon's capital intensity ratio was approximately 2.774729. This + means that it took approximately $2.77 of assets to generate $1 of revenue and + thus, Verizon can be considered capital intensive. + justification: capital intensity ratio = total asset / revenue = 379680/ 136835 + = 2.774729, which is relatively high + page(s)-0based: 55 + page(s): 56, 23 + + category: 4-CALC-AND-JUDGE + correctness: >- + the answer opines that Verizon's business is capital-intensive, and justifies such opinion with a calculated ratio + + evaluator-unreliable: true + + +financebench_id_00566: + sector: Communication Services + + company: Verizon + period: 2022 + doc-type: 10k + doc: VERIZON_2022_10K + + question-type: domain-relevant + question-reasoning: Numerical reasoning + domain-question-num: dg22 + question: Has Verizon increased its debt on balance sheet between 2022 and the 2021 + fiscal period? + + answer: No. Verizon's debt decreased by $229 million. + justification: debt change = debt in 2022 - debt in 2021 = 150639 - 150868 = -229 + page(s)-0based: 76 + page(s): '77' + + category: 1-COMPARE + correctness: >- + the answer concludes that debt decreased + + evaluator-unreliable: true + + +financebench_id_06247: + sector: Consumer Staples + + company: Walmart + period: 2018 + doc-type: 10k + doc: WALMART_2018_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: 'What is FY2018 days payable outstanding (DPO) for Walmart? DPO is defined + as: 365 * (average accounts payable between FY2017 and FY2018) / (FY2018 COGS + + change in inventory between FY2017 and FY2018). Round your answer to two decimal + places. Please base your judgments on the information provided primarily in the + statement of financial position and the P&L statement.' + + answer: '42.69' + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Accounts payable. This metric was located in the 10K as a single line + item named: Accounts payable. + + + Metric 2: Inventories. This metric was located in the 10K as a single line item + named: Inventories. + + + Metric 3: Cost of goods sold. This metric was located in the 10K as a single line + item named: Cost of sales.' + page(s)-0based: 56 + page(s): 57,59 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated Days Payable Outstanding (DPO) decimal value that is in the range from 35.00 to 50.00 + (if the answer is a single number, assume that it is that calculated Days Payable Outstanding (DPO) decimal value) + + +financebench_id_04784: + sector: Consumer Staples + + company: Walmart + period: 2019 + doc-type: 10k + doc: WALMART_2019_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: Based on the information provided primarily in the statement of income, + what is the FY2018 - FY2019 change in unadjusted operating income % margin for + Walmart? Answer in units of percents and round to one decimal place. + + answer: 0.2% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operating income. + + + Metric 2: Total revenue. This metric was located in the 10K as a single line item + named: Total revenues.' + page(s)-0based: 47 + page(s): '48' + + category: 3-CALC-COMPLEX + correctness: |- + the answer contains either: + - calculated Operating Income Margin percentage values for 2018 and 2019, + and their difference, which is a percentage value less than 0.5% in magnitude; or + - calculated Operating Income Margin decimal values for 2028 and 2019, + and their difference, which is a decimal value less than 0.005 in magnitude + answer-inadequate: true + + +financebench_id_06741: + sector: Consumer Staples + + company: Walmart + period: 2020 + doc-type: 10k + doc: WALMART_2020_10K + + question-type: metrics-generated + question-reasoning: Numerical reasoning + domain-question-num: '' + question: What is the FY2018 - FY2020 3 year average unadjusted EBITDA % margin + for Walmart? Define unadjusted EBITDA as unadjusted operating income + depreciation + and amortization from the cash flow statement. Answer in units of percents and + round to one decimal place. Calculate what was asked by utilizing the line items + clearly shown in the P&L statement and the cash flow statement. + + answer: 6.2% + justification: 'The metric in question was calculated using other simpler metrics. + The various simpler metrics (from the current and, if relevant, previous fiscal + year(s)) used were: + + + Metric 1: Depreciation and amortization. This metric was located in the 10K as + a single line item named: Depreciation and amortization. + + + Metric 2: Unadjusted operating income. This metric was located in the 10K as a + single line item named: Operating income. + + + Metric 3: Total revenue. This metric was located in the 10K as a single line item + named: Total revenues.' + page(s)-0based: 50 + page(s): 51,56 + + category: 3-CALC-COMPLEX + correctness: >- + the answer contains a calculated EBITDA Margin percentage value that is in the range from 5.50% to 6.50%, + or, alternatively, a calculated decimal value that is in the range from 0.0550 to 0.0650 + (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value) diff --git a/examples/FinanceBench-AMD/knowledge-store.txt b/examples/FinanceBench-AMD/knowledge-store.txt new file mode 100644 index 000000000..e623a859d --- /dev/null +++ b/examples/FinanceBench-AMD/knowledge-store.txt @@ -0,0 +1,45 @@ +Liquidity Metric Formulas +------------------------- + +`(Net) Working Capital` = `(Total) Current Assets` - `(Total) Current Liabilities` + +`Working Capital Ratio` = `(Total) Current Assets` / `(Total) Current Liabilities` + +`Quick Ratio` = ( + (`Cash & Cash Equivalents` + + `Short-Term Investments or (Current) Marketable Securities` + + `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`) + / `(Total) Current Liabilities` +) + +`Operating Cash Flow Ratio` = ( + `(Net) Cash Flows from Operations, a.k.a. (Net) Operating Cash Flows` + / `(Total) Current Liabilities` +) + +`Free Cash Flow, a.k.a. FCF` = ( + `(Net) Cash Flows from Operations, a.k.a. (Net) Operating Cash Flows` - + `Capital Expenditure(s), a.k.a. CapEx, or Capital Spending, or Property, Plant & Equipment (PP&E) Expenditure(s)/Purchase(s)` +) + +`Free Cash Flow Conversion Ratio` = `Free Cash Flow, a.k.a. FCF` / `Earnings before Interest, Tax, Depreciation & Amortization, a.k.a. EBITDA` + +`Days Inventory Outstanding, a.k.a. DIO` = ( + 365 * `average (Total) (Net) Inventory(ies), typically between two consecutive fiscal year-ends` + / `(Total) Cost of Goods Sold, a.k.a. (Total) COGS, or (Total) Cost of Sales, or (Total) Cost of Revenue` +) + +`Days Payable Outstanding, a.k.a. DPO` = ( + 365 * `average Accounts Payable, typically between two consecutive fiscal year-ends` + / (`(Total) Cost of Goods Sold, a.k.a. (Total) COGS, or (Total) Cost of Sales, or (Total) Cost of Revenue` + + `change in (Total) (Net) Inventory(ies), typically between two consecutive fiscal year-ends`) +) + +`Days Sales Oustanding, a.k.a. DSO` = ( + 365 * `average (Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables, typically between two consecutive fiscal year-ends` + / `(Total) (Net) (Operating) Revenue(s), a.k.a. (Total) (Net) Sales` +) + +`Cash Conversion Cycle, a.k.a. CCC` = ( + `Days Inventory Outstanding, a.k.a. DIO` + `Days Sales Oustanding, a.k.a. DSO` - `Days Payable Outstanding, a.k.a. DPO` +) diff --git a/examples/FinanceBench-AMD/log.py b/examples/FinanceBench-AMD/log.py new file mode 100644 index 000000000..874f12f53 --- /dev/null +++ b/examples/FinanceBench-AMD/log.py @@ -0,0 +1,39 @@ +from pathlib import Path +# import sys + +from loguru import logger + +from data_and_knowledge import FbId, DOC_NAMES_BY_FB_ID + + +LOG_DIR_PATH: Path = Path(__file__).parent / '.log' +CURRENT_LOG_HANDLER_ID: int | None = None + + +# loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add +# logger.add(sink=sys.stdout, level='DEBUG', +# # format=..., +# filter=None, +# colorize=True, +# serialize=False, +# backtrace=True, diagnose=True, +# enqueue=False, context=None, +# catch=True) + + +def switch_log_file(fb_id: FbId, output_name: str): + global CURRENT_LOG_HANDLER_ID # pylint: disable=global-statement + + if CURRENT_LOG_HANDLER_ID is not None: + logger.remove(handler_id=CURRENT_LOG_HANDLER_ID) + + CURRENT_LOG_HANDLER_ID = logger.add(sink=(Path(LOG_DIR_PATH) / + DOC_NAMES_BY_FB_ID[fb_id] / fb_id[16:] / f'{output_name}.log'), + level='DEBUG', + # format=..., + filter=None, + colorize=True, + serialize=False, + backtrace=True, diagnose=True, + enqueue=False, context=None, + catch=True) diff --git a/examples/FinanceBench-AMD/program-store.yml b/examples/FinanceBench-AMD/program-store.yml new file mode 100644 index 000000000..36e65732c --- /dev/null +++ b/examples/FinanceBench-AMD/program-store.yml @@ -0,0 +1,36 @@ +quick-ratio: + task: Assess liquidity health of {COMPANY} through its `Quick Ratio` as at {PERIOD} fiscal period end + + sub-htps: + - task: |- + Calculate `Quick Ratio` of {COMPANY} as at {PERIOD} fiscal period end as decimal value according to formula: + + `Quick Ratio` = ( + (`Cash & Cash Equivalents` + + `Short-Term Investments or (Current) Marketable Securities` + + `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`) + / `(Total) Current Liabilities` + ) + + sub-htps: + # 1 single Retrieval task for multiple quantities on same statement, for both efficiency & mutual consistency; + # retrieve individual numerator & denominator balance values only, without taking division + # because RAG LMs may not be good at calculation & mathematical reasoning + - task: |- + What are values in dollars of: + - `Cash & Cash Equivalents`; + - `Short-Term Investments or (Current) Marketable Securities`; + - `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`; and + - `(Total) Current Liabilities` + (or most similar-meaning reported line items to those) + + on one same `(Consolidated) Balance Sheet, a.k.a. Statement of (Consolidated) Financial Position` + (or most similar-meaning statement) of {COMPANY} + (and NOT Balance Sheets of its acquired and/or divested companies) + + as at {PERIOD} fiscal period end? + + - task: |- + Compare calculated `Quick Ratio` decimal value against 1.00 and make assessment: + - `Quick Ratio` >= 1.00: liquidity is healthy; or + - `Quick Ratio` < 1.00: liquidity is not very healthy diff --git a/examples/FinanceBench-AMD/rag-ground-truths.yml b/examples/FinanceBench-AMD/rag-ground-truths.yml new file mode 100644 index 000000000..6ef352009 --- /dev/null +++ b/examples/FinanceBench-AMD/rag-ground-truths.yml @@ -0,0 +1,914 @@ +defs: + + BS: (Consolidated) Balance Sheet, a.k.a. Statement of (Consolidated) Financial Position + + cash-and-equiv: Cash & Cash Equivalents + st-invest: Short-Term Investments or (Current) Marketable Securities + recvables: (Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables + invent: (Total) (Net) Inventory(ies) + curr-assets: (Total) Current Assets + fixed-assets: (Net) Fixed Assets, a.k.a. (Net) Property, Plant & Equipment (PP&E) + total-assets: Total Assets + + payables: Accounts Payable + st-debt: Short-Term Debt, or Current Portion of (Long-Term) Debt + curr-liabs: (Total) Current Liabilities + lt-debt: Long-Term Debt (EXCLUDING any current/short-term portion) + + + CF: (Consolidated) Cash Flow(s) Statement(s), a.k.a. (Consolidated) Statement(s) of Cash Flows + + d&a: Depreciation & Amortization, a.k.a. D&A (of Fixed Assets or Property, Plant & Equipment (PP&E)) + op-cf: (Net) Cash Flows from Operations, a.k.a. (Net) Operating Cash Flows + + capex: Capital Expenditure(s), a.k.a. CapEx, or Capital Spending, or Property, Plant & Equipment (PP&E) Expenditure(s)/Purchase(s) + + div: Cash Dividends + + + P&L: >- + (Consolidated) Income Statement, a.k.a. (Consolidated) Profit-and-Loss (P&L) Statement, + or (Consolidated) Earnings Statement, or (Consolidated) Operations Statement + + rev: (Total) (Net) (Operating) Revenue(s), a.k.a. (Total) (Net) Sales + cogs: (Total) Cost of Goods Sold, a.k.a. (Total) COGS, or (Total) Cost of Sales, or (Total) Cost of Revenue + gross: Gross Income, a.k.a. Gross Profit, or Gross Earnings (or Loss(es)) + op: (Unadjusted) Operating Income, a.k.a. Operating Profit, or Operating Earnings (or Loss(es)) + ebitda: (Unadjusted) Earnings before Interest, Tax, Depreciation & Amortization, a.k.a. EBITDA + ebit: Earnings before Interest & Tax, a.k.a. EBIT + int: Interest Expense + ebt: Income or Profit or Earnings (or Loss(es)) before (Income) Tax(es) + inc-tax: (Income) Tax Expense + net: Net Income, a.k.a. Net Profit, or Net Earnings (or Loss(es)) (Attributable to Shareholders) + + +ground-truths: + + 3M_2018_10K: + BS: + fixed-assets: + 2018: 8,738 million or 8.7 billion + 2017: 8,866 million or 8.9 billion # unreliable + + + 3M_2022_10K: + BS: + fixed-assets: + 2022: 9,178 million + 2021: 9,429 million + + total-assets: + 2022: 46,455 million + 2021: 47,072 million + + CF: + capex: + 2022: 1,749 million + 2021: 1,603 million + 2020: 1,501 million # unreliable + + P&L: + rev: + 2022: 34,229 million + 2021: 35,355 million + 2020: 32,184 million + + net: + 2022: 5,777 million + 2021: 5,921 million + 2020: 5,449 million + + + 3M_2023Q2_10Q: + BS: + cash-and-equiv: + 2023Q2: 4,258 million # unreliable + 2022: 3,655 million + + st-invest: + 2023Q2: 56 million + 2022: 238 million + + recvables: + 2023Q2: 4,947 million + 2022: 4,532 million + + invent: + 2023Q2: 5,280 million + 2022: 5,372 million + + curr-assets: + 2023Q2: 15,754 million + 2022: 14,688 million + + curr-liabs: + 2023Q2: 10,936 million + 2022: 9,523 million + + + ACTIVISIONBLIZZARD_2019_10K: + BS: + fixed-assets: + 2019: 253 million + 2018: 282 million + + CF: + capex: + 2019: 116 million + 2018: 131 million + 2017: 155 million # unreliable + + P&L: + rev: + 2019: 6,489 million + 2018: 7,500 million + 2017: 7,017 million + + + ADOBE_2015_10K: + BS: + curr-liabs: + 2015: 2,213.556 million or 2,213.6 million or 2.21 billion or 2.2 billion + 2014: 2,494.435 million or 2,494.4 million or 2.49 billion or 2.5 billion + + CF: + op-cf: + 2015: 1,469.502 million or 1,469.5 million or 1.47 billion or 1.5 billion + 2014: 1,287.482 million or 1,287.5 million or 1.29 billion or 1.3 billion + 2013: 1,151.686 million or 1,151.6 million or 1.15 billion or 1.2 billion + + + ADOBE_2016_10K: + P&L: + op: + 2016: 1,493.602 million or 1,493.6 million or 1.49 billion or 1.5 billion # unreliable + 2015: 903.095 million or 903.1 million or 0.9 billion # unreliable + 2014: 412.685 million or 412.7 million or 0.41 billion or 0.4 billion # unreliable + + + ADOBE_2017_10K: + BS: + curr-liabs: + 2017: 3,527.457 million or 3,527.5 million or 3.53 billion or 3.5 billion + 2016: 2,811.635 million or 2,811.6 million or 2.81 billion or 2.8 billion + + CF: + op-cf: + 2017: 2,912.853 million or 2,912.9 million or 2.91 billion or 2.9 billion + 2016: 2,199.728 million or 2,199.7 million or 2.2 billion + 2013: 1,469.502 million or 1,469.5 million or 1.47 billion or 1.5 billion # unreliable + + + ADOBE_2022_10K: + CF: + op-cf: + 2022: 7,838 million + 2021: 7,230 million + 2020: 5,727 million + + capex: + 2022: 442 million # unreliable + 2021: 348 million # unreliable + 2020: 419 million # unreliable + + P&L: + rev: + 2022: 17,606 million # unreliable + 2021: 15,785 million + 2020: 12,868 million + + op: + 2022: 6,098 million + 2021: 5,802 million + 2020: 4,237 million + + net: + 2022: 4,756 million + 2021: 4,822 million + 2020: 5,260 million + + + AES_2022_10K: + BS: + invent: + 2022: 1,055 million + 2021: 604 million + + total-assets: + 2022: 38,363 million + 2021: 32,963 million + + P&L: + cogs: + 2022: 10,069 million # unreliable + 2021: 8,430 million # unreliable + 2020: 6,967 million # unreliable + + net: + 2022: negative (loss) 546 million + 2021: negative (loss) 409 million # unreliable + 2020: 46 million + + + AMAZON_2017_10K: + BS: + invent: + 2017: 16,047 million + 2016: 11,461 million # unreliable + + payables: + 2017: 34,616 million + 2016: 25,309 million + + P&L: + rev: + 2017: 177,866 million + 2016: 135,987 million + 2015: 107,006 million + + cogs: + 2017: 111,934 million # unreliable: often mistaken for Total Operating Expenses $173,760 million + 2016: 88,265 million # unreliable: often mistaken for Total Operating Expenses $131,801 million + 2015: 71,651 million + + + AMCOR_2020_10K: + BS: + recvables: + 2020: 1,615.9 million # unreliable + 2019: 1,864.3 million # unreliable + + + AMCOR_2023_10K: + BS: + cash-and-equiv: + 2023: 689 million + 2022: 775 million + + st-invest: + 2023: 0 (or not explicitly reported) + 2022: 0 (or not explicitly reported) + + recvables: + 2023: 1,875 million # unreliable + 2022: 1,935 million + + invent: + 2023: 992 million + 1,221 million, or 2,213 million + 2022: 1,114 million + 1,325 million, or 2,439 million + + curr-assets: + 2023: 5,308 million + 2022: 5,853 million + + curr-liabs: + 2023: 4,476 million + 2022: 5,103 million + + P&L: + rev: + 2023: 14,694 million + 2022: 14,544 million + 2021: 12,861 million + + gross: + 2023: 2,725 million + 2022: 2,820 million + 2021: 2,732 million + + + AMCOR_2023Q4_EARNINGS: + P&L: + rev: + 2023Q4: 3,673 million + 2023FY: 14,694 million + 2022Q4: 3,909 million + 2022FY: 14,544 million + + ebitda: + 2023Q4: 540 million # unreliable: FY & Quarter numbers often mistaken for each other + 2023FY: 2,018 million # unreliable: FY & Quarter numbers often mistaken for each other + + + AMD_2015_10K: + CF: + d&a: + 2015: 167 million + 2014: 203 million + 2013: 236 million + + P&L: + rev: + 2015: 3,991 million + 2014: 5,506 million + 2013: 5,299 million + + + AMD_2022_10K: + BS: + cash-and-equiv: + 2022: 4,835 million # unreliable + 2021: 2,535 million # unreliable + + st-invest: + 2022: 1,020 million + 2021: 1,073 million + + recvables: + 2022: 4,126 million # unreliable + 2021: 2,706 million # unreliable + + invent: + 2022: 3,771 million + 2021: 1,955 million # unreliable + + curr-assets: + 2022: 15,019 million + 2021: 8,583 million + + curr-liabs: + 2022: 6,369 million + 2021: 4,240 million + + + AMERICANWATERWORKS_2021_10K: + CF: + d&a: + 2021: 636 million # unreliable + 2020: 604 million # unreliable + 2019: 582 million # unreliable + + P&L: + op: + 2021: 1,196 million + 2020: 1,248 million + 2019: 1,214 million + + + AMERICANWATERWORKS_2022_10K: + BS: + curr-assets: + 2022: 1,250 million + 2021: 1,554 million + + curr-liabs: + 2022: 2,811 million + 2021: 2,141 million + + + BESTBUY_2017_10K: + P&L: + rev: + 2017: 39,403 million + 2016: 39,528 million + 2015: 40,339 million + + net: + 2017: 1,228 million # unreliable: often mistaken for Net Earnings (Loss) from Continuing Operations $1,207m + 2016: 897 million # unreliable: often mistaken for Net Earnings (Loss) from Continuing Operations $807m + 2015: 1,233 million # unreliable: often mistaken for Net Earnings (Loss) from Continuing Operations $1,246m + + + BESTBUY_2019_10K: + BS: + invent: + 2019: 5,409 million + 2018: 5,209 million + + + BESTBUY_2023_10K: + P&L: + rev: + 2023: 46,298 million or 46.3 billion + 2022: 51,761 million or 51.8 billion + 2021: 47,262 million or 47.3 billion + + gross: + 2023: 9,912 million or 9.9 billion # unreliable + 2022: 11,640 million or 11.6 billion + 2021: 10,573 million or 10.6 billion + + + BLOCK_2016_10K: + BS: + curr-assets: + 2016: 1,001,425 or 1,001.4 million or 1.0 billion + 2015: 705,563 or 705.6 million or 0.7 billion + + curr-liabs: + 2016: 577,464 or 577.5 million or 0.6 billion # unreliable + 2015: 334,202 or 334.2 million or 0.3 billion # unreliable + + + BOEING_2018_10K: + BS: + fixed-assets: + 2018: 12,645 million # unreliable: 2018 & 2017 numbers often mixed up + 2017: 12,672 million # unreliable: 2018 & 2017 numbers often mixed up + + + BOEING_2022_10K: + P&L: + rev: + 2022: 66,608 million + 2021: 62,286 million + 2020: 58,158 million + + gross: + 2022: 3,502 million # unreliable because of missing line-time label + 2021: 3,017 million # unreliable because of missing line-time label + 2020: negative (loss) 5,685 million # unreliable because of missing line-time label + + ebt: + 2022: negative (loss) 5,022 million + 2021: negative (loss) 5,033 million + 2020: negative (loss) 14,476 million + + inc-tax: + 2022: tax of 31 million + 2021: tax benefit of 743 million + 2020: tax benefit of 2,535 million + + + COCACOLA_2017_10K: + BS: + total-assets: + 2017: 36,545 million # unreliable + 2016: 34,010 million # unreliable + + P&L: + net: + 2017: 1,248 million + 2016: 6,527 million + 2015: 7,351 million + + + COCACOLA_2021_10K: + P&L: + rev: + 2021: 38,655 million + 2020: 33,014 million + 2019: 37,266 million + + cogs: + 2021: 15,357 million + 2020: 13,433 million # unreliable + 2019: 14,619 million # unreliable + + + COCACOLA_2022_10K: + CF: + div: + 2022: 7,616 million + 2021: 7,252 million + 2020: 7,047 million + + P&L: + net: + 2022: 9,542 million + 2021: 9,771 million + 2020: 7,747 million + + + CORNING_2020_10K: + BS: + invent: + 2020: 2,438 million + 2019: 2,320 million + + payables: + 2020: 1,174 million # unreliable: often mistaken for Other Accrued Liabilities #2,437m + 2019: 1,587 million # unreliable: often mistaken for Other Accrued Liabilities $1,923m + + P&L: + cogs: + 2020: 7,772 million # unreliable: often failing to be retrieved at all + 2019: 7,468 million # unreliable: often failing to be retrieved at all + 2018: 6,829 million # unreliable: often failing to be retrieved at all + + + CORNING_2021_10K: + P&L: + rev: + 2021: 14,082 million # unreliable + 2020: 11,303 million + 2019: 11,503 million + + op: + 2021: 2,112 million + 2020: 509 million + 2019: 1,306 million + + + CORNING_2022_10K: + BS: + curr-assets: + 2022: 7,453 million + 2021: 7,659 million + + curr-liabs: + 2022: 5,175 million + 2021: 4,806 million + + + CVSHEALTH_2018_10K: + BS: + fixed-assets: + 2018: 11,349 million # unreliable: often failing to be retrieved at all + 2017: 10,292 million # unreliable: often failing to be retrieved at all + + P&L: + rev: + 2018: 194,579 million # unreliable: often mistaken for Pharmacy Services 2018 revenue $134,128m or Retail/LTC 2018 revenue $83,989m + 2017: 184,786 million # unreliable: often mistaken for Pharmacy Services 2017 revenue $130,601m + 2016: 177,546 million + + + CVSHEALTH_2022_10K: + BS: + fixed-assets: + 2022: 12,873 million # unreliable + 2021: 12,896 million + + total-assets: + 2022: 228,275 million + 2021: 232,999 million + + CF: + capex: + 2022: 2,727 million or 2.7 billion + 2021: 2,520 million or 2.5 billion + 2020: 2,437 million or 2.4 billion + + P&L: + rev: + 2022: 322,467 million + 2021: 292,111 million + 2020: 268,706 million + + net: + 2022: 4,149 million + 2021: 7,910 million # unreliable + 2020: 7,179 million # unreliable + + + GENERALMILLS_2019_10K: + BS: + recvables: + 2019: 1,679.7 million + 2018: 1,684.2 million # unreliable + + invent: + 2019: 1,559.3 million + 2018: 1,642.2 million # unreliable + + payables: + 2019: 2,854.1 million + 2018: 2,746.2 million # unreliable + + P&L: + rev: + 2019: 16,865.2 million + 2018: 15,740.4 million + 2017: 15,619.8 million + + cogs: + 2019: 11,108.4 million + 2018: 10,304.8 million + 2017: 10,052.0 million + + + GENERALMILLS_2020_10K: + BS: + curr-assets: + 2020: 5,121.3 million + 2019: 4,186.5 million + + curr-liabs: + 2020: 7,491.5 million + 2019: 7,087.1 million + + CF: + op-cf: + 2020: 3,676.2 million + 2019: 2,807.0 million + 2018: 2,841.0 million + + capex: + 2020: 460.8 million + 2019: 537.6 million + 2018: 622.7 million + + + GENERALMILLS_2022_10K: + CF: + div: + 2022: 1,244.5 million + 2021: 1,246.4 million + 2020: 1,195.8 million + + P&L: + net: + 2022: 2,707.3 million # unreliable + 2021: 2,339.8 million # unreliable + 2020: 2,181.2 million # unreliable + + + JOHNSON_JOHNSON_2022_10K: + BS: + invent: + 2022: 12,483 million + 2021: 10,387 million + + P&L: + cogs: + 2022: 31,089 million + 2021: 29,855 million + 2020: 28,427 million + + + KRAFTHEINZ_2019_10K: + BS: + invent: + 2019: 2,721 million + 2018: 2,667 million + + P&L: + cogs: + 2019: 16,830 million + 2018: 17,347 million # unreliable + 2017: 17,043 million + + + LOCKHEEDMARTIN_2020_10K: + BS: + total-assets: + 2020: 50,710 million + 2019: 47,528 million + + P&L: + rev: + 2020: 65,398 million + 2019: 59,812 million # unreliable + 2018: 53,762 million + + + LOCKHEEDMARTIN_2021_10K: + BS: + curr-assets: + 2021: 19,815 million + 2020: 19,378 million + + curr-liabs: + 2021: 13,997 million + 2020: 13,933 million + + + LOCKHEEDMARTIN_2022_10K: + P&L: + rev: + 2022: 65,984 million + 2021: 67,044 million + 2020: 65,398 million + + + MGMRESORTS_2018_10K: + BS: + payables: + 2018: 302.578 million or 302.6 million or 0.3 billion + 2017: 255.028 million or 255 million or 0.26 billion or 0.3 billion + + + MGMRESORTS_2020_10K: + CF: + capex: + 2020: 270.579 million or 271 million + 2019: 739.006 million or 739 million # unreliable + 2018: 1,486.843 million or 1,487 million # unreliable + + P&L: + rev: + 2020: 5,162.082 million or 5,162 million + 2019: 12,899.672 million or 12,900 million # unreliable + 2018: 11,763.096 million or 11,763 million + + + # MGMRESORTS_2022Q4_EARNINGS: + # P&L: + # ebit: + # int: + + + MICROSOFT_2016_10K: + P&L: + cogs: + 2016: 32,780 million # unreliable + 2015: 33,038 million # unreliable + 2014: 27,078 million # unreliable + + + MICROSOFT_2023_10K: + BS: + st-debt: + 2023: 5,247 million + 2022: 2,749 million + + lt-debt: + 2023: 41,990 million + 2022: 47,032 million + + + NETFLIX_2015_10K: + CF: + d&a: + 2015: 62.283 million or 62 million # unreliable: often failing to be retrieved at all + 2014: 54.028 million or 54 million # unreliable: often failing to be retrieved at all + 2013: 48.374 million or 48 million # unreliable: often failing to be retrieved at all + + P&L: + rev: + 2015: 6,779.511 million or 6,780 million + 2014: 5,504.656 million or 5,505 million + 2013: 4,374.562 million or 4,375 million + + op: + 2015: 305.826 million or 306 million + 2014: 402.648 million or 403 million + 2013: 228.347 million or 228 million + + + NIKE_2018_10K: + P&L: + rev: + 2018: 36,397 million + 2017: 34,350 million + 2016: 32,376 million + + cogs: + 2018: 20,441 million + 2017: 19,038 million + 2016: 17,405 million + + + NIKE_2021_10K: + BS: + invent: + 2021: 6,854 million + 2020: 7,367 million + + P&L: + cogs: + 2021: 24,576 million + 2020: 21,162 million # unreliable + 2019: 21,643 million + + + PAYPAL_2022_10K: + BS: + curr-assets: + 2022: 57,517 million + 2021: 52,574 million + + curr-liabs: + 2022: 45,101 million + 2021: 43,029 million + + + PEPSICO_2021_10K: + CF: + capex: + 2021: 4,625 million + 2020: 4,240 million + 2019: 4,232 million + + + PEPSICO_2022_10K: + CF: + d&a: + 2022: 2,763 million # unreliable + 2021: 2,710 million # unreliable + 2020: 2,548 million + + capex: + 2022: 5,207 million + 2021: 4,625 million + 2020: 4,240 million + + P&L: + rev: + 2022: 86,392 million # unreliable + 2021: 79,474 million # unreliable + 2020: 70,372 million # unreliable + + op: + 2022: 11,512 million + 2021: 11,162 million + 2020: 10,080 million + + + PFIZER_2021_10K: + BS: + fixed-assets: + 2021: 14,882 million # unreliable + 2020: 13,745 million # unreliable + + + VERIZON_2022_10K: + BS: + cash-and-equiv: + 2022: 2,605 million + 2021: 2,921 million + + st-invest: + 2022: 0 (or not explicitly reported) + 2021: 0 (or not explicitly reported) + + recvables: + 2022: 24,506 million # unreliable + 2021: 23,846 million # unreliable + + invent: + 2022: 2,388 million + 2021: 3,055 million + + curr-assets: + 2022: 37,857 million + 2021: 36,728 million + + fixed-assets: + 2022: 107,434 million + 2021: 99,696 million + + total-assets: + 2022: 379,680 million + 2021: 366,596 million + + curr-liabs: + 2022: 50,171 million + 2021: 47,160 million + + CF: + capex: + 2022: 23,087 million # unreliable + 2021: 20,286 million # unreliable + 2020: 18,192 million # unreliable + + P&L: + rev: + 2022: 136,835 million + 2021: 133,613 million + 2020: 128,292 million + + net: + 2022: 21,256 million # unreliable + 2021: 22,065 million + 2020: 17,801 million + + + WALMART_2018_10K: + BS: + invent: + 2018: 43,783 million + 2017: 43,046 million + + payables: + 2018: 46,092 million + 2017: 41,433 million + + P&L: + cogs: + 2018: 373,396 million # unreliable + 2017: 361,256 million # unreliable + 2016: 360,984 million # unreliable + + + WALMART_2019_10K: + P&L: + rev: + 2019: 514,405 million # unreliable + 2018: 500,343 million # unreliable + 2017: 485,873 million + + op: + 2019: 21,957 million + 2018: 20,437 million + 2017: 22,764 million # unreliable + + + WALMART_2020_10K: + CF: + d&a: + 2020: 10,987 million + 2019: 10,678 million + 2018: 10,529 million + + P&L: + rev: + 2020: 523,964 million # unreliable + 2019: 514,405 million # unreliable + 2018: 500,343 million + + op: + 2020: 20,568 million + 2019: 21,957 million + 2018: 20,437 million diff --git a/examples/FinanceBench-AMD/util.py b/examples/FinanceBench-AMD/util.py new file mode 100644 index 000000000..a7ab24305 --- /dev/null +++ b/examples/FinanceBench-AMD/util.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from functools import wraps +from typing import TYPE_CHECKING + +from loguru import logger +from tqdm import tqdm + +from data_and_knowledge import FbId, Answer, FB_IDS, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID, OUTPUT_FILE_PATH, get_or_create_output_df # noqa: E501 +from log import switch_log_file + +if TYPE_CHECKING: + from pandas import DataFrame + + +type QAFunc = Callable[[FbId], Answer] + + +@dataclass +class log_qa_and_update_output_file: # noqa: N801 + output_name: str + + def __call__(self, qa_func: QAFunc) -> QAFunc: + @wraps(wrapped=qa_func) + def decorated_qa_func(fb_id: FbId) -> Answer: + switch_log_file(fb_id=fb_id, output_name=self.output_name) + + logger.info((question := f'\n{fb_id}\n{DOC_NAMES_BY_FB_ID[fb_id]}:\n{QS_BY_FB_ID[fb_id]}\n') + + '\n... solving process starting ...\n', + depth=1) + + logger.info(question + (f'\n{self.output_name.upper()}:\n' + f'{(answer := qa_func(fb_id)).replace('{', '{{').replace('}', '}}')}\n'), + depth=1) + + output_df: DataFrame = get_or_create_output_df() + output_df.loc[fb_id, self.output_name]: str = answer + output_df.to_csv(OUTPUT_FILE_PATH, index=True) + + return answer + + return decorated_qa_func