lambda-feedback
diff --git a/‎app/eval_single_testing.py‎
Lines changed: 0 additions & 2 deletions b/‎app/eval_single_testing.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎app/evaluation.py‎
Lines changed: 136 additions & 80 deletions b/‎app/evaluation.py‎
Lines changed: 136 additions & 80 deletions
diff --git a/‎app/evaluation_test_cases.py‎
Lines changed: 30 additions & 2 deletions b/‎app/evaluation_test_cases.py‎
Lines changed: 30 additions & 2 deletions
diff --git a/‎app/evaluation_tests.py‎
Lines changed: 9 additions & 10 deletions b/‎app/evaluation_tests.py‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎app/miscellaneous_testing.py‎
Lines changed: 14 additions & 0 deletions b/‎app/miscellaneous_testing.py‎
Lines changed: 14 additions & 0 deletions
@@ -3,10 +3,10 @@
 from langchain_openai import ChatOpenAI
 
 from typing import Any, TypedDict
-from sympy import solve, Eq, simplify
+from sympy import solve, Eq, simplify, Symbol
 from sympy.parsing.sympy_parser import parse_expr, standard_transformations, implicit_multiplication_application
 import re
-
+from parameter import create_sympy_parsing_params
 
 class Params(TypedDict):
     pass
@@ -21,43 +21,92 @@ class Result(TypedDict):
 
 transformations = standard_transformations + (implicit_multiplication_application,)
 
+def has_unbalanced_parentheses(expr: str) -> bool:
+    """
+    Check if the expression has unbalanced parentheses
+    """
+    return expr.count("(") != expr.count(")")
 
 def contains_special_math(expr: str) -> bool:
     """
-    特殊な記号/演算が含まれているか判定
+    Check if the expression contains special mathematical symbols or operations
     """
+
     patterns = [
-        r"d(\^|\*\*)?\d*(\*\*)?\w*/d\w+(\^|\*\*)?\d*(\*\*)?", # Ordinary diff (dy/dx, d^2y/dx^2)
-        r"∂(\^|\*\*)?\d*(\*\*)?\w*/∂\w+(\^|\*\*)?\d*(\*\*)?",        # Partial diff (∂y/∂x, ∂^2y/∂x^2)
-        r"diff\(\w+, \w+\)",                # diff function (diff(y, x))
-        r"int",                          # integration (int_b^a f(x)dx)
-        r"∫",                            
+        # Differentiation
+        r"d(\*\*)?\d*\w*/d\w+(\*\*)?\d*",                     # dy/dx, d**2y/dx**2
+        r"d/d\w+\(.*\)",                                       # d/dx(y)
+        r"d(\*\*)?\d*/d\w+(\*\*)?\d*\([^\)]+\)",               # d**2/dx**2(y)
+        r"D(\*\*)?\d*\w*/D\w+(\*\*)?\d*",                      # Dy/Dx, D**2y/Dx**2
+        r"D/D\w+\(.*\)",                                       # D/Dx(y)
+        r"∂(\*\*)?\d*\w*/∂\w+(\*\*)?\d*",                      # ∂y/∂x
+        r"∂/∂\w+\(.*\)",                                       # ∂/∂x(y)
+        r"diff\([^\)]+\)",                                     # diff(y, x), diff(y,x,x)
+        # Integration
+        r"int\([^\)]+\)",                                      # int(f(x), x)
+        r"∫", r"∮",                                            # ∫f(x)dx, ∮f(x)dx
+        # Summation and delta functions
+        r"Σ", r"∑",                                            # summation symbols
+        r"Π", r"∏",                                            # product symbols
+        r"DiracDelta",                                         #delta functions
+        # Infinity variations
+        r"Infinity", r"infinity", r"∞", r"oo", r"Inf", r"inf", r"Infty", r"infty"
     ]
     return any(re.search(p, expr) for p in patterns)
 
-
-def is_equivalent_sympy(expr1, expr2) -> bool | None:
+def replace_greek_symbols(expr: str) -> str:
+    greek_map = {
+        # 小文字
+        "alpha": "α", "beta": "β", "gamma": "γ", "delta": "δ",
+        "epsilon": "ε", "zeta": "ζ", "eta": "η", "theta": "θ",
+        "iota": "ι", "kappa": "κ", "lambda": "λ", "mu": "μ",
+        "nu": "ν", "xi": "ξ", "omicron": "ο", "pi": "π",
+        "rho": "ρ", "sigma": "σ", "tau": "τ", "upsilon": "υ",
+        "phi": "φ", "chi": "χ", "psi": "ψ", "omega": "ω",
+        # 大文字
+        "Alpha": "Α", "Beta": "Β", "Gamma": "Γ", "Delta": "Δ",
+        "Epsilon": "Ε", "Zeta": "Ζ", "Eta": "Η", "Theta": "Θ",
+        "Iota": "Ι", "Kappa": "Κ", "Lambda": "Λ", "Mu": "Μ",
+        "Nu": "Ν", "Xi": "Ξ", "Omicron": "Ο", "Pi": "Π",
+        "Rho": "Ρ", "Sigma": "Σ", "Tau": "Τ", "Upsilon": "Υ",
+        "Phi": "Φ", "Chi": "Χ", "Psi": "Ψ", "Omega": "Ω"
+    }
+
+    for ascii_name, greek_letter in greek_map.items():
+        expr = re.sub(rf"\b{ascii_name}\b", greek_letter, expr)
+    return expr
+
+def is_equivalent_sympy(expr1, expr2, params) -> bool | None:
     """
     Return True/False if comparable with SymPy,
     or None if an error occurs.
     """
+    if not expr1.strip() and not expr2.strip():
+        return True
+    if not expr1.strip() or not expr2.strip():
+        return False
+
     try:
-        expr1, expr2 = expr1.replace("^", "**"), expr2.replace("^", "**")
-        if not expr1.strip() and not expr2.strip():
-            return True
-        elif not expr1.strip() or not expr2.strip():
-            return False
+        # Create parsing parameters (expressions渡す版)
+        parsing_params = create_sympy_parsing_params(params, expr1, expr2)
+        raw_dict = parsing_params["symbol_dict"]
+        transformations = parsing_params.get("extra_transformations", ())
+
+        # assumptions0 の辞書なら Symbol を作り直す
+        local_dict = {
+            name: Symbol(name, **attrs) if isinstance(attrs, dict) else attrs
+            for name, attrs in raw_dict.items()
+        }
 
         # Compare with Eq() for equations
         if "=" in expr1 and "=" in expr2:
             lhs1, rhs1 = expr1.split("=")
             lhs2, rhs2 = expr2.split("=")
 
-            # implicit multiplication handlable
-            lhs1_parsed = parse_expr(lhs1, transformations=transformations)
-            rhs1_parsed = parse_expr(rhs1, transformations=transformations)
-            lhs2_parsed = parse_expr(lhs2, transformations=transformations)
-            rhs2_parsed = parse_expr(rhs2, transformations=transformations)
+            lhs1_parsed = parse_expr(lhs1, transformations=transformations, local_dict=local_dict)
+            rhs1_parsed = parse_expr(rhs1, transformations=transformations, local_dict=local_dict)
+            lhs2_parsed = parse_expr(lhs2, transformations=transformations, local_dict=local_dict)
+            rhs2_parsed = parse_expr(rhs2, transformations=transformations, local_dict=local_dict)
 
             eq1 = Eq(lhs1_parsed - rhs1_parsed, 0)
             eq2 = Eq(lhs2_parsed - rhs2_parsed, 0)
@@ -69,79 +118,86 @@ def is_equivalent_sympy(expr1, expr2) -> bool | None:
 
             return set(sol1) == set(sol2)
         else:
-            expr1_parsed = parse_expr(expr1, transformations=transformations)
-            expr2_parsed = parse_expr(expr2, transformations=transformations)
+            expr1_parsed = parse_expr(expr1, transformations=transformations, local_dict=local_dict)
+            expr2_parsed = parse_expr(expr2, transformations=transformations, local_dict=local_dict)
             return simplify(expr1_parsed - expr2_parsed) == 0
 
     except Exception as e:
-        print(f" SymPy error: {e}")
+        print(f"SymPy error: {e}")
         return None
 
-
-def evaluation_function(response, answer, params):
+def convert_to_sympy(expr: str, params: Params) -> str:
     load_dotenv()
     llm = ChatOpenAI(
         model=os.environ['OPENAI_MODEL'],
         api_key=os.environ["OPENAI_API_KEY"],
     )
-
-    # Check if LLM priority is needed
-    needs_llm_priority = contains_special_math(response) or contains_special_math(answer)
-
-    # Check with SymPy first if not using LLM priority
-    sympy_result = None
-    if not needs_llm_priority:
-        sympy_result = is_equivalent_sympy(response, answer)
-
     prompt = fr"""
-    Follow these steps carefully:
-    A student response and an answer are provided below. Compare the two if they are mathematically equivalent.
-    Only return True if they are **exactly equivalent** for all possible values of all variables.
-    Do not assume expressions are equivalent based on similarity.
-    There are a few types of symbols for differentiation and the following in the same square brackets are considered equivalent:
-    [dy/dx, d/dx(y), diff(y,x)], [d^2y/dx^2, d**2y/dx**2, diff(y,x,x)], [∂y/∂x, ∂/∂x(y), diff(y,x), partial(y)/partial(x)], [∂^2y/∂x^2, ∂**2y/∂x**2, diff(y,x,x), partial**2(y)/partial(x)**2, partial^2(y)/partial(x)^2]
-    The terms above that are not in the same square brackets are not considered equivalent.
-    Student response: {response}
-    Answer: {answer}
-
-    Return either True or False as a single word and nothing else.
+Follow these steps carefully:
+A student response and an answer are provided below. Convert the student response into a SymPy expression.
+
+When the following notations in (a) and (b) are used, they must be replaced with the equivalent SymPy expressions.
+All the notations in the same square brackets are equivalent, and must be replaced with the notation after the right arrow (->) after the square brackets.
+
+(a) The following notations for derivatives, partial derivatives, and integrals **must be considered strictly equivalent** within the same group:
+- [dy/dx, d/dx(y), diff(y,x)] -> diff(y,x)
+- [d^2y/dx^2, d**2y/dx**2, diff(y,x,x)] -> diff(y,x,x)
+- [d^3y/dx^3, d**3y/dx**3, diff(y,x,x,x)] -> diff(y,x,x,x)
+- [Dy/Dx, D/Dx(y)] -> diff(y,t)+v.dot(gradient(y))
+- [∂y/∂x, ∂/∂x(y), diff(y,x), partial(y)/partial(x)] -> diff(y,x)
+- [∂^2y/∂x^2, ∂**2y/dx**2, diff(y,x,x), partial**2(y)/partial(x)**2, partial^2(y)/partial(x)^2] -> diff(y,x,x)
+- [∫f(x)dx, int(f(x),x), integrate(f(x),x), Integral(f(x),x)] -> integrate(f(x), x)
+- [∮f(x)dx, int(f(x),x,circular=True), integrate(f(x),x,circular=True), Integral(f(x),x,circular=True)] -> integrate(f(x), x)
+- [∫ₐᵇf(x)dx, ∫_a^bf(x)dx, int_a^bf(x)dx, int(f(x),(x,a,b)), integrate(f(x),(x,a,b)), Integral(f(x),(x,a,b))] -> integrate(f(x), (x, a, b))
+- [∫∫f(x,y)dxdy, int(int(f(x,y),x),y), integrate(f(x,y),x,y), Integral(f(x,y),x,y)] -> integrate(integrate(f(x,y),x),y)
+- [∇f, gradient(f), grad(f)] -> gradient(f)
+- [∇·F, div(F), divergence(F)] -> div(f)
+- [∇×F, curl(F), rot(F)] -> curl(f)
+
+(b) Other notations that **must be considered equivalent** within the same group:
+- [Infinity, infinity, ∞, oo, Inf, inf, Infty, infty] -> oo
+- [a·b, a⋅b, a.b, dot(a, b), a.dot(b)] -> a.dot(b)
+  *Note: a.b is only equivalent to these if a and b are variables, not constants like 0, 1, π, etc.*
+- [a×b, cross(a, b), a.cross(b)] -> a.cross(b)
+- [\vec{{a}}, vector(a), a.vector(), Matrix(a)] -> Matrix(a)
+- [â, \hat{{a}}, unit(a), normalize(a), a_hat] -> a/Abs(a)
+- [exp(x), e**x, e**x, exponential(x)] -> exp(x)
+
+When comparing integrals, assume that any derivative or expression between the integral sign and the differential (e.g., ∂y/∂x in ∫_a ∂y/∂x dx) is the complete integrand, even if parentheses around the integrand are missing.
+
+**Notations from different groups or not listed above are NOT equivalent.**
+
+This is the student response: {expr}
+Now convert it to a SymPy expression. Ouput only the SymPy expression, without any additional text or explanation.
     """
     llm_response = llm.invoke(prompt)
-    llm_result_text = llm_response.content.strip().lower()
+    return llm_response
 
-    if llm_result_text == "true":
-        llm_result = True
-    elif llm_result_text == "false":
-        llm_result = False
-    else:
-        # Any weird responses
-        llm_result = False
-
-    if sympy_result is not None:
-        if sympy_result == llm_result:
-            return {
-                "is_correct": sympy_result,
-                "sympy_result": sympy_result,
-                "llm_result": llm_result,
-                "mismatch_info": ""
-            }
-        else:
-            mismatch_info = (
-                f"Mismatch detected:\n"
-                f"- SymPy result: {sympy_result}\n"
-                f"- LLM result: {llm_result}\n"
-                f"Used LLM result due to mismatch"
-            )
-            return {
-                "is_correct": sympy_result, 
-                "sympy_result": sympy_result,
-                "llm_result": llm_result,
-                "mismatch_info": mismatch_info
-            }
-    else:
+def evaluation_function(response, answer, params):
+
+    if has_unbalanced_parentheses(response) or has_unbalanced_parentheses(answer):
         return {
-            "is_correct": llm_result,
+            "is_correct": False,
             "sympy_result": None,
-            "llm_result": llm_result,
-            "mismatch_info": "Used LLM result only"
-        }
+            "llm_result": False,
+            "mismatch_info": "Invalid syntax: unbalanced parentheses"
+        }
+    response = response.replace("^", "**")
+    answer = answer.replace("^", "**")
+    response = response.replace(" ", "")
+    answer = answer.replace(" ", "")
+    response = replace_greek_symbols(response)
+    answer = replace_greek_symbols(answer)
+
+    if response.strip() == "" or answer.strip() == "":
+        needs_conversion = False
+    else:
+        needs_conversion = contains_special_math(response) or contains_special_math(answer)
+
+    if needs_conversion:
+        response = convert_to_sympy(response, params).content.strip()
+        answer = convert_to_sympy(answer, params).content.strip()
+    result = None
+    result = is_equivalent_sympy(response, answer, params)
+
+    return {"is_correct": result}
@@ -1,7 +1,8 @@
-from evaluation import Params
+from evaluation import Params, evaluation_function
+from parameter import create_sympy_parsing_params
 # [response, answer, params, expected]
 test_cases = [
-            ["2+2", "4", Params(), True],
+            ["2+2", "4", Params(), True], #1
             ["sin(x)**2 + cos(x)**2", "1", Params(), True],
             ["x+y", "y+x", Params(), True],
             ["x*y", "x+y", Params(), False],
@@ -15,6 +16,7 @@
             ["x**3 + x**2", "x**2 * (x + 1)", Params(), True],
             ["", "", Params(), True],       
             ["", "x", Params(), False],
+            ["1+", "1", Params(), False],
             ["x+1=0", "-2x-2=0", Params(), True],
             ["dy/dx", "diff(y, x)", Params(), True],
             ["(x+y)/x", "1 + y/x", Params(), True],
@@ -23,4 +25,30 @@
             ["∂^2y/∂x^2", "diff(diff(y, x), x)", Params(), True],
             ["dy/dx + 1", "diff(y, x) + 1", Params(), True],
             ["∂y/∂x + 1", "diff(y, x) + 1", Params(), True],
+            ["dp/dt", "diff(p, t)", Params(), True],
+            ["dg/dm", "diff(y,x)", Params(), False],
+            ]
+test_cases2 = [
+            ["infty", "Infinity", Params(), True], #1
+            ["sqrt(-1)", "I", Params(), True],
+            ["sqrt(x**2)", "x", Params(), False],
+            ["1/(x-1)", "1/(1-x)", Params(), False],
+            ["x^2", "x**2", Params(), True],
+            ["x^^2", "x**2", Params(), False],
+            ["d^3y/dx^3", "diff(y, x, x, x)", Params(), True],
+            ["∫∫f(x)dxdy", "int(int(f(x), x), y)", Params(), True],
+            ["f(x)=x+1", "f(x)-x-1=0", Params(), True],
+            ["f(x) = x**2", "f(y) = y**2", Params(), False],#should this always be false?
+            ["diff(y,x)+", "diff(y,x)+0", Params(), False],
+            ["d/dx(y", "diff(y, x)", Params(), False],
+            ["DiracDelta(x)", "0", Params(), False],
+            ["∫_{V_sys} ∂ρ/∂t dV", "int(partial(ρ)/partial(t), (V, V_sys))", Params(), True],
+            ["∮_{A_sys} ρu·n̂ dA", "Integral(rho * u.dot(n), (A, A_sys), circular=True)", Params(), True],
+            ["rho", "ρ", Params(), True],
+            ["Dx/Dt=-div(u)", "Dx/Dt+div(u)=0", Params(), True],
+            ["(1/rho)*Drho/Dt=-div(u)", "(1/ρ)*Dρ/Dt+div(u)=0", Params(), True],
+            ]
+test_cases3 = [
+            ["abs(x)", "sqrt(x**2)", Params(), False],
+            ["abs(x)", "sqrt(x**2)", Params(symbol_assumptions={"x": {"real": True},}), True],
             ]
@@ -4,7 +4,7 @@
     from .evaluation import Params, evaluation_function
 except ImportError:
     from evaluation import Params, evaluation_function
-from evaluation_test_cases import test_cases
+from evaluation_test_cases import test_cases, test_cases2,test_cases3
 
 
 class TestEvaluationFunction(unittest.TestCase):
@@ -15,8 +15,8 @@ class TestEvaluationFunction(unittest.TestCase):
     def test_multiple_cases(self):
         passed = 0
         failed = 0
-
-        for i, (response, answer, params, expected) in enumerate(test_cases, 1):
+        case = [test_cases, test_cases2, test_cases3]
+        for i, (response, answer, params, expected) in enumerate(case[2], 1): #change here test_cases <-> test_cases2
             with self.subTest(test_case=i):
                 result = evaluation_function(response, answer, params)
                 is_correct = result.get("is_correct")
@@ -26,16 +26,15 @@ def test_multiple_cases(self):
                     print(f"Test {i} Passed")
                     passed += 1
                 except AssertionError:
-                    print(f"Test {i} Failed: expected {expected}, got {is_correct}")
+                    print(f"Test {i} Failed:")
+                    print(f"  Response: {response}")
+                    print(f"  Answer  : {answer}")
+                    print(f"  Params  : {params}")
+                    print(f"  Expected: {expected}, Got: {is_correct}")
                     failed += 1
 
-                    # mismatch_info があれば表示
-                    mismatch_info = result.get("mismatch_info")
-                    if mismatch_info:
-                        print(f"Mismatch Info (Test {i}):\n{mismatch_info}")
-
         print(f"\n--- Summary ---\nPassed: {passed}, Failed: {failed}, Total: {passed + failed}")
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
@@ -0,0 +1,14 @@
+from parameter import extract_variable_names, create_sympy_parsing_params
+from evaluation import Params, evaluation_function
+from sympy.parsing.sympy_parser import parse_expr
+from sympy import solve, Eq, simplify, Symbol
+
+# parsing_params = create_sympy_parsing_params(Params(symbol_assumptions={"x": {"real": True}}), "x+2+z", "x*y*z")
+# local_dict = parsing_params["symbol_dict"]
+# print(local_dict)
+lhs, rhs = "2*x+1", "0"
+eq = Eq(parse_expr(lhs), parse_expr(rhs))
+print(solve(eq))
+lhs, rhs = "x", "-1/2"
+eq = Eq(parse_expr(lhs), parse_expr(rhs))
+print(solve(eq))