Commit ced3d742 authored by Danniene Wete's avatar Danniene Wete

parameter tuning for lda+svm and rfc

parent 03ef6fe2
......@@ -205,7 +205,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
......@@ -2,10 +2,21 @@
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"import pandas as pd\n",
"\n",
"# Gensim\n",
"import gensim\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
"import gensim.corpora as corpora\n",
"from gensim.models import CoherenceModel\n",
"from sklearn.metrics import precision_recall_fscore_support, accuracy_score\n",
"\n",
"import itertools\n",
"import pickle\n",
"import numpy as np\n",
......@@ -56,7 +67,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
......@@ -69,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
......@@ -79,7 +90,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
......@@ -89,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
......@@ -99,7 +110,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
......@@ -108,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
......@@ -117,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 14,
"metadata": {},
"outputs": [
{
......@@ -132,7 +143,7 @@
" ['xwv', 'xja', 'xwi', ..., 'zba', 'zgd', 'ziq']], dtype='<U4')"
]
},
"execution_count": 11,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
......@@ -143,21 +154,9 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "write() argument must be str, not numpy.float64",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-12-4de6330cca49>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'score_svm.txt'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'a'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# save output to file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m \u001b[0mfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 35\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: write() argument must be str, not numpy.float64"
]
}
],
"outputs": [],
"source": [
"mallet_path = '/home/danniene/mallet/bin/mallet'\n",
"alpha = 2\n",
......@@ -189,7 +188,7 @@
" \n",
" best_params = select_rfc_params(X_train, y_train, 10)\n",
" #SVM classifier\n",
" score = rfc(X_train, y_train, X_test, y_test)\n",
" score = rfc(X_train, y_train, X_test, y_test, best_params)\n",
" \n",
" with open('score_rfc.txt', 'a') as file: # save output to file \n",
" file.write(str(score))\n",
......@@ -199,7 +198,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
......
......@@ -6,7 +6,6 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"from collections import Counter\n",
"import pandas as pd\n",
"import pickle\n",
......@@ -151,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
......
......@@ -253,7 +253,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
10 0.8690193417034272
20 0.8927723108245673
30 0.9019341703427214
40 0.8890397013912453
50 0.8829317950458093
60 0.8795385137427892
70 0.8758059043094673
80 0.8741092636579573
90 0.8547675602307431
100 0.8537495758398371
10 0.8571428571428571
20 0.8903970139124533
30 0.9168646080760094
40 0.9060061079063454
50 0.8995588734306074
60 0.9185612487275195
70 0.9138106549032915
80 0.9134713267729895
90 0.9192399049881235
100 0.9189005768578216
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment