Bugfix of build command

LeoEATLinden · LeoEATLinden · commit 5120cfee731f · 2023-06-21T19:36:16.000+02:00
diff --git a/Dockerfile.spark b/Dockerfile.spark
@@ -16,5 +16,8 @@ RUN java -version
 # Install pandas
 RUN pip install pandas
 
+# Install build
+RUN pip install build
+
 # Install PySpark
 RUN pip install pyspark
diff --git a/Dockerfile.test b/Dockerfile.test
@@ -1,12 +1,15 @@
-FROM spark_docker_v2
+FROM spark_docker_base
 
 # Build the package
-RUN python setup.py sdist bdist_wheel
+#RUN python setup.py sdist bdist_wheel
 
 # Add the distribution
 COPY src src
 
-RUN python -m build src
+# Add the config files
+ADD pyproject.toml pyproject.toml
+
+RUN python -m build
 
 # Install the package
 RUN pip install dist/*.whl
diff --git a/readme.md b/readme.md
@@ -24,6 +24,13 @@ Run tests by building the Dockerfile.test file using
 ```bash
 docker build -f Dockerfile.test -t test_package .
 ```
+
+If you are running the tests for the first you first have to build the base dockerfile containing pyspark.
+
+```bash
+docker build -f Dockerfile.spark -t spark_docker_base .
+```
+
 ### Usage
 First import the required function
 
diff --git a/tests/test_text.py b/tests/test_text.py
@@ -12,11 +12,13 @@
 class TestVectorDataFrame(unittest.TestCase):
     def test_vector_dataframe(self):
         spark = SparkSession.builder.master("local").getOrCreate()
-        with open(os.path.join(DATA_SET_PATH, "text/example_annotation.json"), "r") as f:
+        with open(os.path.join(DATA_SET_PATH, "text/example_annotation.json"),
+                  "r") as f:
             data = json.load(f)
 
         actual_df = get_text_dataframe([data], spark)
 
-        expected_df = spark.read.parquet(os.path.join(DATA_SET_PATH, "text/expected_df.parquet"))
+        expected_df = spark.read.parquet(os.path.join(
+            DATA_SET_PATH, "text/expected_df.parquet"))
         self.assertEqual(sorted(actual_df.collect()),
                          sorted(expected_df.collect()))