Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions regression-tests/daaltkregtests/lib/port.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[port]
__main__.arimaxtest.test_arima_scoring = 9100
__main__.arimaxtest.test_arx_scoring = 9101
__main__.arimaxtest.test_arimax_scoring = 9102
__main__.arimaxtest.test_max_scoring = 9103
__main__.kmeansclustering.test_model_scoring = 9104
__main__.gmm.test_model_scoring = 9105
__main__.ldamodeltest.test_model_scoring = 9106
__main__.linearregression.test_model_scoring = 9107
__main__.logisticregression.test_model_scoring = 9108
__main__.naivebayes.test_model_scoring = 9109
__main__.principalcomponent.test_model_scoring = 9110
__main__.randomforest.test_class_scoring = 9111
__main__.randomforest.test_reg_scoring = 9112
__main__.svmscoretest.test_model_scoring = 9113
# Following contain same port numbers as the above but with different key
# When the tests belong to a module, testcase id changes
arimax_test.arimaxtest.test_arima_scoring = 9100
arimax_test.arimaxtest.test_arx_scoring = 9101
arimax_test.arimaxtest.test_arimax_scoring = 9102
arimax_test.arimaxtest.test_max_scoring = 9103
kmeans_test.kmeansclustering.test_model_scoring = 9104
gmm_test.gmm.test_model_scoring = 9105
lda.ldamodeltest.test_model_scoring = 9106
linear_regression_test.linearregression.test_model_scoring = 9107
logistic_regression_test.logisticregression.test_model_scoring = 9108
naive_bayes_test.naivebayes.test_model_scoring = 9109
pca_test.principalcomponent.test_model_scoring = 9110
random_forest_test.randomforest.test_class_scoring = 9111
random_forest_test.randomforest.test_reg_scoring = 9112
svm_model_test.svmscoretest.test_model_scoring = 9113

84 changes: 84 additions & 0 deletions regression-tests/daaltkregtests/lib/score_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# vim: set encoding=utf-8

# Copyright (c) 2016 Intel Corporation 
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


""" Library to support scoring in TAP for the ATK service """
import subprocess as sp
import requests
import time
import signal
import os
import config
from ConfigParser import SafeConfigParser

class scorer(object):

def __init__(self, model_path, port_id, host=config.scoring_engine_host):
"""Set up the server location, port and model file"""
self.hdfs_path = model_path
self.name = host.split('.')[0]
self.host = host
#set port
config = SafeConfigParser()
filepath = os.path.abspath(os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"port.ini"))
config.read(filepath)
self.port = config.get('port', port_id)
self.scoring_process = None

def __enter__(self):
"""Activate the Server"""
#change current working directory to point at scoring_engine dir
run_path = os.path.abspath(os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"..", "..", "..", "scoring", "scoring_engine"))

#keep track of cwd for future
test_dir = os.getcwd()
os.chdir(run_path)
# make a new process group
self.scoring_process = sp.Popen(
["./bin/model-scoring.sh", "-Dtrustedanalytics.scoring-engine.archive-mar=%s" % self.hdfs_path,
"-Dtrustedanalytics.scoring.port=%s" % self.port],
preexec_fn=os.setsid)

#restore cwd
os.chdir(test_dir)

# wait for server to start
time.sleep(20)
return self

def __exit__(self, *args):
"""Teardown the server"""
# Get the process group to kill all of the suprocesses
pgrp = os.getpgid(self.scoring_process.pid)
os.killpg(pgrp, signal.SIGKILL)

def score(self, data_val):
"""score the json set data_val"""

# Magic headers to make the server respond appropriately
# Ask the head of scoring why these
headers = {'Content-type': 'application/json',
'Accept': 'application/json,text/plain'}

scoring_host = self.host + ":" + self.port
submit_string = 'http://'+scoring_host+'/v2/score'
response = requests.post(submit_string, json={"records":data_val}, headers=headers)
return response
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# vim: set encoding=utf-8

# Copyright (c) 2016 Intel Corporation 
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


""" test cases for the kmeans clustering algorithm """
import unittest
import time
import os
from daaltkregtests.lib import daaltk_test
from daaltkregtests.lib import score_utils
from daaltkregtests.lib import config


class KMeansClustering(daaltk_test.DaalTKTestCase):

def setUp(self):
"""Import the files to test against."""
super(KMeansClustering, self).setUp()
schema = [("Vec1", float),
("Vec2", float),
("Vec3", float),
("Vec4", float),
("Vec5", float),
("term", str)]

self.frame_train = self.context.frame.import_csv(
self.get_file("kmeans_train.csv"), schema=schema)
self.frame_test = self.context.frame.import_csv(
self.get_file("kmeans_test.csv"), schema=schema)

@unittest.skip("daaltk: kmeans scoring engine produces different result than predict")
def test_model_scoring(self):
"""Tests standard usage of the kmeans cluster algorithm."""
kmodel = self.context.daaltk.models.clustering.kmeans.train(
self.frame_train, ["Vec1", "Vec2", "Vec3", "Vec4", "Vec5"], k=5)

result_frame = kmodel.predict(self.frame_test)
test_rows = result_frame.to_pandas(50)
result = kmodel.export_to_mar(self.get_export_file(self.get_name("daaltk_kmeans")))

with score_utils.scorer(
result, self.id()) as scorer:
for index, row in test_rows.iterrows():
res = scorer.score(
[dict(zip(["Vec1", "Vec2", "Vec3", "Vec4", "Vec5"],
list(row[0:5])))])

self.assertEqual(row.predicted_cluster, res.json()["data"][0]['score'])


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# vim: set encoding=utf-8

# Copyright (c) 2016 Intel Corporation 
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


""" Tests Linear Regression scoring engine """
import unittest
import os
from daaltkregtests.lib import daaltk_test
from daaltkregtests.lib import score_utils


class LinearRegression(daaltk_test.DaalTKTestCase):

def setUp(self):
"""Build test frame"""
super(LinearRegression, self).setUp()
dataset = self.get_file("linear_regression_gen.csv")
schema = [("c1", float),
("c2", float),
("c3", float),
("c4", float),
("label", float)]

self.frame = self.context.frame.import_csv(
dataset, schema=schema)

def test_model_scoring(self):
"""Test publishing a linear regression model"""
model = self.context.daaltk.models.regression.linear_regression.train(self.frame, "label", ['c1', 'c2', 'c3', 'c4'])

predict = model.predict(self.frame, ['c1', 'c2', 'c3', 'c4'])
test_rows = predict.to_pandas(predict.count())

file_name = self.get_name("linear_regression")
model_path = model.export_to_mar(self.get_export_file(file_name))
with score_utils.scorer(
model_path, self.id()) as scorer:
for index, row in test_rows.iterrows():
res = scorer.score(
[dict(zip(["c1", "c2", "c3", "c4"], list(row[0:4])))])
self.assertAlmostEqual(
row["predict_label"], res.json()["data"][0]['score'])




if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# vim: set encoding=utf-8

# Copyright (c) 2016 Intel Corporation 
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


""" Tests Naive Bayes Model against known values. """
import unittest
import os
from daaltkregtests.lib import daaltk_test
from daaltkregtests.lib import score_utils


class NaiveBayes(daaltk_test.DaalTKTestCase):

def setUp(self):
"""Build the frames needed for the tests."""
super(NaiveBayes, self).setUp()

dataset = self.get_file("naive_bayes.csv")
schema = [("label", int),
("f1", int),
("f2", int),
("f3", int)]
self.frame = self.context.frame.import_csv(dataset, schema=schema)

def test_model_scoring(self):
"""Test training intializes theta, pi and labels"""
model = self.context.daaltk.models.classification.naive_bayes.train(self.frame, "label", ['f1', 'f2', 'f3'])

res = model.predict(self.frame, ['f1', 'f2', 'f3'])

analysis = res.to_pandas()
file_name = self.get_name("daal_naive_bayes")
model_path = model.export_to_mar(self.get_export_file(file_name))
with score_utils.scorer(
model_path, self.id()) as scorer:
for index, row in analysis.iterrows():
r = scorer.score(
[dict(zip(['f1', 'f2', 'f3'],
map(lambda x: int(x), (row[1:4]))))])
self.assertEqual(
r.json()["data"][0]['score'], row.predicted_class)


if __name__ == '__main__':
unittest.main()
69 changes: 69 additions & 0 deletions regression-tests/daaltkregtests/testcases/scoretests/pca_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# vim: set encoding=utf-8

# Copyright (c) 2016 Intel Corporation 
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


import unittest
from daaltkregtests.lib import daaltk_test
from daaltkregtests.lib import score_utils


class PrincipalComponent(daaltk_test.DaalTKTestCase):

def setUp(self):
super(PrincipalComponent, self).setUp()
schema = [("X1", int),
("X2", int),
("X3", int),
("X4", int),
("X5", int),
("X6", int),
("X7", int),
("X8", int),
("X9", int),
("X10", int)]
pca_traindata = self.get_file("pcadata.csv")
self.frame = self.context.frame.import_csv(pca_traindata, schema=schema)

def test_model_scoring(self):
"""Test pca scoring"""
model = self.context.daaltk.models.dimreduction.pca.train(
self.frame,
["X1", "X2", "X3", "X4", "X5",
"X6", "X7", "X8", "X9", "X10"],
mean_centered=False, k=10)

file_name = self.get_name("pca")
model_path = model.export_to_mar(self.get_export_file(file_name))

with score_utils.scorer(
model_path, self.id()) as scorer:
baseline = model.predict(self.frame, mean_centered=False)
testvals = baseline.to_pandas(50)

for index, row in testvals.iterrows():
r = scorer.score(
[dict(zip(["X1", "X2", "X3", "X4", "X5",
"X6", "X7", "X8", "X9", "X10"],
map(lambda x: x, row[0:10])))])
print "results: " + str(r.json()["data"][-1]["principal_components"])
print "row: " + str(row[10:])
map(lambda x, y: self.assertAlmostEqual(float(x),float(y)),
r.json()["data"][-1]["principal_components"], row[10:])


if __name__ == '__main__':
unittest.main()