Commit 8152325e authored by Danniene Wete's avatar Danniene Wete

implement codebook method

parent 940ca1f3
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
"from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"plt.style.use('bmh')\n",
"%matplotlib inline\n",
"%run codebook.ipynb\n",
"\n",
"def select_rfc_params(X_train, y_train, folds):\n",
" \"\"\"Hyperparameter optimization\n",
"\n",
" Args:\n",
" X_train, y_train: corresponding dataset and labels from load_data\n",
" Returns:\n",
" grid_search.best_params_: Best Parameters for SVC\n",
" \"\"\"\n",
" estimator_array = range(50,200,10)\n",
" depth_array = range(2,15)\n",
" params = {'n_estimators': estimator_array, 'max_depth': depth_array}\n",
" # Create a based model\n",
" rf = RandomForestClassifier()\n",
" # Instantiate the grid search model\n",
" grid_search = GridSearchCV(estimator=rf, param_grid=params, cv=folds)\n",
" grid_search.fit(X_train, y_train)\n",
" return grid_search.best_params_\n",
"\n",
"#RandomForest classifier\n",
"def rfc(X_train, y_train, X_test, y_test, best_params):\n",
" \"\"\"Random Forest Classifier\n",
"\n",
" Args:\n",
" X_train, y_train, X_test, y_test: trained model and testing data\n",
" Returns:\n",
" score, y_pred: Accuracy and prediction labels\n",
" \"\"\"\n",
" rfc = RandomForestClassifier(n_estimators=best_params[\"n_estimators\"], max_depth=best_params[\"max_depth\"])\n",
" rfc.fit(X_train, y_train)\n",
" #y_pred = rfc.predict(X_test)\n",
" score = rfc.score(X_test, y_test)\n",
" return score"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_train = np.loadtxt(\"../data/y_train.txt\")\n",
"y_train = np.array([int(i)-1 for i in y_train])\n",
"y_test = np.loadtxt(\"../data/y_test.txt\")\n",
"y_test = np.array([int(i)-1 for i in y_test])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for window_length in range(5,40,5):\n",
" \n",
" for n_clusters in range(5,30, 3):\n",
" X_train, X_test = codebook(window_length, n_clusters)\n",
" best_params = select_rfc_params(X_train, y_train, 10)\n",
" #RFC classifier\n",
" score = rfc(X_train, y_train, X_test, y_test, best_params)\n",
" p = [window_length, n_clusters, score]\n",
" \n",
" with open('paramTuningRFC.txt', 'a') as file: # save output in file \n",
" s = ['[', ']', ',']\n",
" p = str(list(p))\n",
" for e in s:\n",
" p = p.replace(e, '')\n",
" file.write(p)\n",
" file.write('\\n') \n",
"print('FINISH.')\n",
" \n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
"from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"plt.style.use('bmh')\n",
"%matplotlib inline\n",
"%run CodebookApproach.ipynb\n",
"\n",
"def select_rfc_params(X_train, y_train, folds):\n",
" \"\"\"Hyperparameter optimization\n",
"\n",
" Args:\n",
" X_train, y_train: corresponding dataset and labels from load_data\n",
" Returns:\n",
" grid_search.best_params_: Best Parameters for SVC\n",
" \"\"\"\n",
" estimator_array = range(50,200,10)\n",
" depth_array = range(2,15)\n",
" params = {'n_estimators': estimator_array, 'max_depth': depth_array}\n",
" # Create a based model\n",
" rf = RandomForestClassifier()\n",
" # Instantiate the grid search model\n",
" grid_search = GridSearchCV(estimator=rf, param_grid=params, cv=folds)\n",
" grid_search.fit(X_train, y_train)\n",
" return grid_search.best_params_\n",
"\n",
"#RandomForest classifier\n",
"def rfc(X_train, y_train, X_test, y_test, best_params):\n",
" \"\"\"Random Forest Classifier\n",
"\n",
" Args:\n",
" X_train, y_train, X_test, y_test: trained model and testing data\n",
" Returns:\n",
" score, y_pred: Accuracy and prediction labels\n",
" \"\"\"\n",
" rfc = RandomForestClassifier(n_estimators=best_params[\"n_estimators\"], max_depth=best_params[\"max_depth\"])\n",
" rfc.fit(X_train, y_train)\n",
" y_pred = rfc.predict(X_test)\n",
" score = rfc.score(X_test, y_test)\n",
" return score, y_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_train = np.loadtxt(\"../data/y_train.txt\")\n",
"y_train = np.array([int(i)-1 for i in y_train])\n",
"y_test = np.loadtxt(\"../data/y_test.txt\")\n",
"y_test = np.array([int(i)-1 for i in y_test])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#codebook approach\n",
"X_train, X_test = codebook(20, 10)\n",
"#hyperparameter optimization\n",
"best_params = select_rfc_params(X_train, y_train, 10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"score, y_pred = rfc(X_train, y_train, X_test, y_test, best_params)\n",
"print('%d, %d, %.4f' % (window_length, n_clusters, score))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels = {0:'WALKING',1:'WALKING_UPSTAIRS',2:'WALKING_DOWNSTAIRS',3:'SITTING',4:'STANDING',5:'LAYING'}\n",
"print(classification_report(y_test, y_pred, target_names=list(labels.values())))\n",
"conf_mat = confusion_matrix(y_test, y_pred)\n",
"\n",
"fig = plt.figure(figsize=(20,20))\n",
"plt.imshow(conf_mat, cmap=plt.cm.hot, interpolation='nearest')\n",
"plt.colorbar()\n",
"plt.title('Confusion Matrix')\n",
"plt.ylabel('Ground Truth Labels')\n",
"plt.xlabel('Predicted labels')\n",
"plt.xticks(range(len(labels.values())), [l for l in labels.values()], rotation = 90)\n",
"plt.yticks(range(len(labels.values())), [l for l in labels.values()])\n",
"\n",
"#activate normalization confusion matrix\n",
"normalize = False\n",
"\n",
"if normalize:\n",
" conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]\n",
"\n",
"for i, j in itertools.product(range(conf_mat.shape[0]), range(conf_mat.shape[1])):\n",
" if conf_mat[i, j] > 0:\n",
" if normalize:\n",
" plt.text(j, i, \"{:0.2f}\".format(conf_mat[i, j]), horizontalalignment=\"center\", color=\"black\")\n",
" else:\n",
" plt.text(j, i, \"{:}\".format(conf_mat[i, j]), horizontalalignment=\"center\", color=\"black\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import svm\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import classification_report, confusion_matrix\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"plt.style.use('bmh')\n",
"%matplotlib inline\n",
"%run codebook.ipynb\n",
"\n",
"def select_svc_params(X_train, y_train, folds):\n",
" \"\"\"Hyperparameter optimization\n",
"\n",
" Args:\n",
" X_train, y_train: corresponding dataset and labels from load_data\n",
" Returns:\n",
" grid_search.best_params_: Best Parameters for SVC\n",
" \"\"\"\n",
" gamma = [0.001, 0.01, 0.1, 1]\n",
" C = [0.001, 0.01, 0.1, 1, 10]\n",
" params = {'C': C, 'gamma' : gamma}\n",
" grid_search = GridSearchCV(svm.SVC(kernel='rbf'), params, cv=folds, iid=False)\n",
" grid_search.fit(X_train, y_train)\n",
" return grid_search.best_params_\n",
"\n",
"def svc(X_train, y_train, X_test, y_test):\n",
" \"\"\"Support Vector Machine Classifier\n",
"\n",
" Args:\n",
" X_train, y_train, X_test, y_test: trained model and testing data\n",
" Returns:\n",
" score, y_pred: Accuracy and prediction labels\n",
" \"\"\"\n",
" svc = SVC(kernel='rbf', C=best_params[\"C\"], gamma=best_params[\"gamma\"])\n",
" svc.fit(X_train, y_train)\n",
" y_pred = svc.predict(X_test)\n",
" score = svc.score(X_test, y_test)\n",
" return score, y_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_train = np.loadtxt(\"../data/y_train.txt\")\n",
"y_train = np.array([int(i)-1 for i in y_train])\n",
"y_test = np.loadtxt(\"../data/y_test.txt\")\n",
"y_test = np.array([int(i)-1 for i in y_test])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#codebook approach\n",
"X_train, X_test = codebook(20, 10)\n",
"#hyperparameter optimization\n",
"best_params = select_svc_params(X_train, y_train, 10)\n",
"#SVM classifier\n",
"score, y_pred = svc(X_train, y_train, X_test, y_test)\n",
"print('%.4f' % (score))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels = {0:'WALKING',1:'WALKING_UPSTAIRS',2:'WALKING_DOWNSTAIRS',3:'SITTING',4:'STANDING',5:'LAYING'}\n",
"print(classification_report(y_test, y_pred, target_names=list(labels.values())))\n",
"conf_mat = confusion_matrix(y_test, y_pred)\n",
"\n",
"fig = plt.figure(figsize=(20,20))\n",
"plt.imshow(conf_mat, cmap=plt.cm.hot, interpolation='nearest')\n",
"plt.colorbar()\n",
"plt.title('Confusion Matrix')\n",
"plt.ylabel('Ground Truth Labels')\n",
"plt.xlabel('Predicted labels')\n",
"plt.xticks(range(len(labels.values())), [l for l in labels.values()], rotation = 90)\n",
"plt.yticks(range(len(labels.values())), [l for l in labels.values()])\n",
"\n",
"#activate normalization confusion matrix\n",
"normalize = False\n",
"\n",
"if normalize:\n",
" conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]\n",
"\n",
"for i, j in itertools.product(range(conf_mat.shape[0]), range(conf_mat.shape[1])):\n",
" if conf_mat[i, j] > 0:\n",
" if normalize:\n",
" plt.text(j, i, \"{:0.2f}\".format(conf_mat[i, j]), horizontalalignment=\"center\", color=\"black\")\n",
" else:\n",
" plt.text(j, i, \"{:}\".format(conf_mat[i, j]), horizontalalignment=\"center\", color=\"black\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import svm\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import precision_recall_fscore_support\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"plt.style.use('bmh')\n",
"%matplotlib inline\n",
"%run codebook.ipynb\n",
"\n",
"def select_svc_params(X_train, y_train, folds):\n",
" \"\"\"Hyperparameter optimization\n",
"\n",
" Args:\n",
" X_train, y_train: corresponding dataset and labels from load_data\n",
" Returns:\n",
" grid_search.best_params_: Best Parameters for SVC\n",
" \"\"\"\n",
" gamma = [0.001, 0.01, 0.1, 1]\n",
" C = [0.001, 0.01, 0.1, 1, 10]\n",
" params = {'C': C, 'gamma' : gamma}\n",
" svm = SVC()\n",
" grid_search = GridSearchCV(svm, params, cv=folds, iid=False)\n",
" grid_search.fit(X_train, y_train)\n",
" return grid_search.best_params_\n",
"\n",
"def run_svc(X_train, y_train, X_test, y_test):\n",
" \"\"\"Support Vector Machine Classifier\n",
"\n",
" Args:\n",
" X_train, y_train, X_test, y_test: trained model and testing data\n",
" Returns:\n",
" score, y_pred: Accuracy and prediction labels\n",
" \"\"\"\n",
" svc = SVC(C=best_params[\"C\"], gamma=best_params[\"gamma\"])\n",
" svc.fit(X_train, y_train)\n",
" #y_pred = svc.predict(X_test)\n",
" score = svc.score(X_test, y_test)\n",
" return score"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_train = np.loadtxt(\"../data/y_train.txt\")\n",
"y_train = np.array([int(i)-1 for i in y_train])\n",
"y_test = np.loadtxt(\"../data/y_test.txt\")\n",
"y_test = np.array([int(i)-1 for i in y_test])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for window_length in range(5,40,5):\n",
" for n_clusters in range(5,30, 3): \n",
" X_train, X_test = codebook(window_length, n_clusters)\n",
" best_params = select_svc_params(X_train, y_train, 10)\n",
" #SVM classifier\n",
" score = run_svc(X_train, y_train, X_test, y_test)\n",
" p = [window_length, n_clusters, score]\n",
" \n",
" with open('paramTuningSVM.txt', 'a') as file: # save output in file \n",
" s = ['[', ']', ',']\n",
" p = str(list(p))\n",
" for e in s:\n",
" p = p.replace(e, '')\n",
" file.write(p)\n",
" file.write('\\n') \n",
"print('FINISH.')\n",
" \n",
" \n",
" \n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
"from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"plt.style.use('bmh')\n",
"%matplotlib inline\n",
"%run codebook.ipynb\n",
"\n",
"def select_rfc_params(X_train, y_train, folds):\n",
" \"\"\"Hyperparameter optimization\n",
"\n",
" Args:\n",
" X_train, y_train: corresponding dataset and labels from load_data\n",
" Returns:\n",
" grid_search.best_params_: Best Parameters for SVC\n",
" \"\"\"\n",
" estimator_array = range(50,200,10)\n",
" depth_array = range(2,15)\n",
" params = {'n_estimators': estimator_array, 'max_depth': depth_array}\n",
" # Create a based model\n",
" rf = RandomForestClassifier()\n",
" # Instantiate the grid search model\n",
" grid_search = GridSearchCV(estimator=rf, param_grid=params, cv=folds)\n",
" grid_search.fit(X_train, y_train)\n",
" return grid_search.best_params_\n",
"\n",
"#RandomForest classifier\n",
"def rfc(X_train, y_train, X_test, y_test, best_params):\n",
" \"\"\"Random Forest Classifier\n",
"\n",
" Args:\n",
" X_train, y_train, X_test, y_test: trained model and testing data\n",
" Returns:\n",
" score, y_pred: Accuracy and prediction labels\n",
" \"\"\"\n",
" rfc = RandomForestClassifier(n_estimators=best_params[\"n_estimators\"], max_depth=best_params[\"max_depth\"])\n",
" rfc.fit(X_train, y_train)\n",
" #y_pred = rfc.predict(X_test)\n",
" score = rfc.score(X_test, y_test)\n",
" return score"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_train = np.loadtxt(\"../data/y_train.txt\")\n",
"y_train = np.array([int(i)-1 for i in y_train])\n",
"y_test = np.loadtxt(\"../data/y_test.txt\")\n",
"y_test = np.array([int(i)-1 for i in y_test])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for window_length in range(5,40,5):\n",
" \n",
" for n_clusters in range(5,30, 3):\n",
" X_train, X_test = codebook(window_length, n_clusters)\n",
" best_params = select_rfc_params(X_train, y_train, 10)\n",
" #RFC classifier\n",
" score = rfc(X_train, y_train, X_test, y_test, best_params)\n",
" p = [window_length, n_clusters, score]\n",
" \n",
" with open('paramTuningRFC.txt', 'a') as file: # save output in file \n",
" s = ['[', ']', ',']\n",
" p = str(list(p))\n",
" for e in s:\n",
" p = p.replace(e, '')\n",
" file.write(p)\n",
" file.write('\\n') \n",
"print('FINISH.')\n",
" \n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
"from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"plt.style.use('bmh')\n",
"%matplotlib inline\n",
"%run CodebookApproach.ipynb\n",
"\n",
"def select_rfc_params(X_train, y_train, folds):\n",
" \"\"\"Hyperparameter optimization\n",
"\n",
" Args:\n",
" X_train, y_train: corresponding dataset and labels from load_data\n",
" Returns:\n",
" grid_search.best_params_: Best Parameters for SVC\n",
" \"\"\"\n",
" estimator_array = range(50,200,10)\n",
" depth_array = range(2,15)\n",
" params = {'n_estimators': estimator_array, 'max_depth': depth_array}\n",
" # Create a based model\n",
" rf = RandomForestClassifier()\n",
" # Instantiate the grid search model\n",
" grid_search = GridSearchCV(estimator=rf, param_grid=params, cv=folds)\n",
" grid_search.fit(X_train, y_train)\n",
" return grid_search.best_params_\n",
"\n",
"#RandomForest classifier\n",
"def rfc(X_train, y_train, X_test, y_test, best_params):\n",
" \"\"\"Random Forest Classifier\n",
"\n",
" Args:\n",
" X_train, y_train, X_test, y_test: trained model and testing data\n",
" Returns:\n",
" score, y_pred: Accuracy and prediction labels\n",
" \"\"\"\n",
" rfc = RandomForestClassifier(n_estimators=best_params[\"n_estimators\"], max_depth=best_params[\"max_depth\"])\n",
" rfc.fit(X_train, y_train)\n",
" y_pred = rfc.predict(X_test)\n",
" score = rfc.score(X_test, y_test)\n",
" return score, y_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],