Commit 3d461d76 authored by Danniene Wete's avatar Danniene Wete

plot SW for approach 1 and 5

parent 247e3959
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
This diff is collapsed.
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -28,7 +28,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -39,22 +39,26 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7352, 42)"
"array(['axf', 'axp', 'axcc', 'axw', 'axaa', 'axo', 'axbb', 'ayy', 'aycc',\n",
" 'ays', 'aycc', 'ays', 'ayr', 'ayo', 'azy', 'azd', 'aza', 'azr',\n",
" 'azbb', 'azd', 'azn', 'gxu', 'gxx', 'gxx', 'gxx', 'gxx', 'gxf',\n",
" 'gxbb', 'gyl', 'gyz', 'gycc', 'gyb', 'gyq', 'gym', 'gyz', 'gzu',\n",
" 'gzx', 'gzx', 'gzx', 'gzx', 'gzf', 'gzbb'], dtype='<U4')"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_docs.shape"
"train_docs[152]"
]
},
{
......
This diff is collapsed.
......@@ -171,7 +171,7 @@
" words = []\n",
" \n",
" x1x2_words = list(zip(x_acc[i], x_gyr[i]))\n",
" words.extend([(str('x')+(a+b)) for (a,b) in x1x2_words])\n",
" words.extend([(str('xx')+(a+b)) for (a,b) in x1x2_words])\n",
" \n",
" x1y2_words = list(zip(x_acc[i], y_gyr[i]))\n",
" words.extend([(str('xy')+(a+b)) for (a, b) in x1y2_words])\n",
......@@ -180,7 +180,7 @@
" words.extend([(str('xz')+(a+b)) for (a, b) in x1z2_words])\n",
" \n",
" y1x2_words = list(zip(y_acc[i], x_gyr[i]))\n",
" words.extend([(str('yz')+(a+b)) for (a, b) in y1x2_words]) \n",
" words.extend([(str('yx')+(a+b)) for (a, b) in y1x2_words]) \n",
" \n",
" \n",
" y1y2_words = list(zip(y_acc[i], y_gyr[i]))\n",
......@@ -225,8 +225,8 @@
" trainAcc_window = sliding_window_approach(trainAcc,window_length,overlap_length)\n",
" trainGyr_window = sliding_window_approach(trainGyr,window_length,overlap_length)\n",
" \n",
" testAcc_window = sliding_window_approach(testAcc,window_length,overlap_length)\n",
" testGyr_window = sliding_window_approach(testGyr,window_length,overlap_length)\n",
" #testAcc_window = sliding_window_approach(testAcc,window_length,overlap_length)\n",
" #testGyr_window = sliding_window_approach(testGyr,window_length,overlap_length)\n",
" \n",
" \n",
" \n",
......@@ -235,10 +235,10 @@
" trainGyr_centroids = calc_centroids_array(trainGyr_window, n_cluster)\n",
" \n",
" # Save centroids on disk for using later\n",
" with open('trainTestCorpus_fs4/trainAcc.centroids', 'wb') as fp:\n",
" # with open('trainTestCorpus_fs4/trainAcc.centroids', 'wb') as fp:\n",
" pickle.dump(trainAcc_centroids, fp)\n",
" with open('trainTestCorpus_fs4/trainGyr.centroids', 'wb') as fp: \n",
" pickle.dump(trainGyr_centroids, fp)\n",
" # with open('trainTestCorpus_fs4/trainGyr.centroids', 'wb') as fp: \n",
" # pickle.dump(trainGyr_centroids, fp)\n",
" \n",
" \n",
" #3. Map centroids to characters\n",
......@@ -249,22 +249,22 @@
" \n",
" trainAcc_count = trainAcc.shape[0]\n",
" trainGyr_count = trainGyr.shape[0]\n",
" testAcc_count = testAcc.shape[0]\n",
" testGyr_count = testGyr.shape[0]\n",
" #testAcc_count = testAcc.shape[0]\n",
" #testGyr_count = testGyr.shape[0]\n",
" \n",
" #4 Assign subsequences to cluster centre and replace subsequence with alphabet of cluster centre\n",
" trainAcc_charsSeq = mapCodewordsToChars(trainAcc_window, trainAcc_centroids,trainAcc_count)\n",
" trainGyr_charsSeq = mapCodewordsToChars(trainGyr_window, trainGyr_centroids,trainGyr_count) \n",
" testAcc_charsSeq = mapCodewordsToChars(testAcc_window, trainAcc_centroids, testAcc_count)\n",
" testGyr_charsSeq = mapCodewordsToChars(testGyr_window, trainGyr_centroids, testGyr_count)\n",
" #testAcc_charsSeq = mapCodewordsToChars(testAcc_window, trainAcc_centroids, testAcc_count)\n",
" #testGyr_charsSeq = mapCodewordsToChars(testGyr_window, trainGyr_centroids, testGyr_count)\n",
" \n",
" \n",
" #5. Create train and test bag of words\n",
" trainBow = create_words(trainAcc_charsSeq, trainGyr_charsSeq)\n",
" testBow = create_words(testAcc_charsSeq, testGyr_charsSeq) \n",
" #testBow = create_words(testAcc_charsSeq, testGyr_charsSeq) \n",
" \n",
" \n",
" return trainBow, testBow "
" return trainBow #testBow "
]
},
{
......@@ -291,7 +291,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -1112,7 +1112,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -33,7 +33,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -171,7 +171,7 @@
" words = []\n",
" \n",
" x1x2_words = list(zip(x_acc[i], x_gyr[i]))\n",
" words.extend([(str('x')+(a+b)) for (a,b) in x1x2_words])\n",
" words.extend([(str('xx')+(a+b)) for (a,b) in x1x2_words])\n",
" \n",
" x1y2_words = list(zip(x_acc[i], y_gyr[i]))\n",
" words.extend([(str('xy')+(a+b)) for (a, b) in x1y2_words])\n",
......@@ -180,7 +180,7 @@
" words.extend([(str('xz')+(a+b)) for (a, b) in x1z2_words])\n",
" \n",
" y1x2_words = list(zip(y_acc[i], x_gyr[i]))\n",
" words.extend([(str('yz')+(a+b)) for (a, b) in y1x2_words]) \n",
" words.extend([(str('yx')+(a+b)) for (a, b) in y1x2_words]) \n",
" \n",
" \n",
" y1y2_words = list(zip(y_acc[i], y_gyr[i]))\n",
......@@ -205,7 +205,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
......@@ -225,8 +225,8 @@
" trainAcc_window = sliding_window_approach(trainAcc,window_length,overlap_length)\n",
" trainGyr_window = sliding_window_approach(trainGyr,window_length,overlap_length)\n",
" \n",
" testAcc_window = sliding_window_approach(testAcc,window_length,overlap_length)\n",
" testGyr_window = sliding_window_approach(testGyr,window_length,overlap_length)\n",
" #testAcc_window = sliding_window_approach(testAcc,window_length,overlap_length)\n",
" #testGyr_window = sliding_window_approach(testGyr,window_length,overlap_length)\n",
" \n",
" \n",
" \n",
......@@ -235,10 +235,10 @@
" trainGyr_centroids = calc_centroids_array(trainGyr_window, n_cluster)\n",
" \n",
" # Save centroids on disk for using later\n",
" with open('trainTestCorpus_fs4/trainAcc.centroids', 'wb') as fp:\n",
" pickle.dump(trainAcc_centroids, fp)\n",
" with open('trainTestCorpus_fs4/trainGyr.centroids', 'wb') as fp: \n",
" pickle.dump(trainGyr_centroids, fp)\n",
" # with open('trainTestCorpus_fs4/trainAcc.centroids', 'wb') as fp:\n",
" #pickle.dump(trainAcc_centroids, fp)\n",
" # with open('trainTestCorpus_fs4/trainGyr.centroids', 'wb') as fp: \n",
" # pickle.dump(trainGyr_centroids, fp)\n",
" \n",
" \n",
" #3. Map centroids to characters\n",
......@@ -249,22 +249,22 @@
" \n",
" trainAcc_count = trainAcc.shape[0]\n",
" trainGyr_count = trainGyr.shape[0]\n",
" testAcc_count = testAcc.shape[0]\n",
" testGyr_count = testGyr.shape[0]\n",
" #testAcc_count = testAcc.shape[0]\n",
" #testGyr_count = testGyr.shape[0]\n",
" \n",
" #4 Assign subsequences to cluster centre and replace subsequence with alphabet of cluster centre\n",
" trainAcc_charsSeq = mapCodewordsToChars(trainAcc_window, trainAcc_centroids,trainAcc_count)\n",
" trainGyr_charsSeq = mapCodewordsToChars(trainGyr_window, trainGyr_centroids,trainGyr_count) \n",
" testAcc_charsSeq = mapCodewordsToChars(testAcc_window, trainAcc_centroids, testAcc_count)\n",
" testGyr_charsSeq = mapCodewordsToChars(testGyr_window, trainGyr_centroids, testGyr_count)\n",
" #testAcc_charsSeq = mapCodewordsToChars(testAcc_window, trainAcc_centroids, testAcc_count)\n",
" #testGyr_charsSeq = mapCodewordsToChars(testGyr_window, trainGyr_centroids, testGyr_count)\n",
" \n",
" \n",
" #5. Create train and test bag of words\n",
" trainBow = create_words(trainAcc_charsSeq, trainGyr_charsSeq)\n",
" testBow = create_words(testAcc_charsSeq, testGyr_charsSeq) \n",
" #testBow = create_words(testAcc_charsSeq, testGyr_charsSeq) \n",
" \n",
" \n",
" return trainBow, testBow "
" return trainBow #testBow "
]
},
{
......@@ -291,7 +291,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
5 5 0 0.5973884657236126
5 5 1 0.5790261153427638
5 5 2 0.5799782372143635
5 5 3 0.5463819368879217
5 5 4 0.5486942328618063
5 5 5 0.5530467899891186
5 5 6 0.5757616974972797
5 5 7 0.594804134929271
5 5 8 0.5485582154515778
5 5 9 0.46354733405875953
5 8 0 0.6526115342763874
5 8 1 0.44518498367791076
5 8 2 0.551278563656148
5 8 3 0.47116430903155604
5 8 4 0.574265505984766
5 8 5 0.544885745375408
5 8 6 0.5238030467899891
5 8 7 0.655739934711643
5 8 8 0.5690968443960827
5 8 9 0.5952121871599565
5 11 0 0.4964635473340588
5 11 1 0.5069368879216539
5 11 2 0.5548150163220892
5 11 3 0.5648803046789989
5 11 4 0.500544069640914
5 11 5 0.4926550598476605
5 11 6 0.6215995647442872
5 11 7 0.5500544069640914
5 11 8 0.42260609357997825
5 11 9 0.45212187159956474
5 14 0 0.5194504896626768
5 14 1 0.5922198041349293
5 14 2 0.5639281828073993
5 14 3 0.5397170837867247
5 14 4 0.4740206746463547
5 14 5 0.5142818280739935
5 14 6 0.5831066376496191
5 14 7 0.514961915125136
5 14 8 0.5544069640914037
5 14 9 0.46259521218715993
5 17 0 0.45606637649619153
5 17 1 0.4804134929270947
5 17 2 0.5012241566920566
5 17 3 0.4476332970620239
5 17 4 0.500816104461371
5 17 5 0.5783460282916213
5 17 6 0.5352285092491839
5 17 7 0.4899347116430903
5 17 8 0.4959194776931447
5 17 9 0.4117247007616975
5 20 0 0.5289717083786725
5 20 1 0.4846300326441785
5 20 2 0.5718171926006529
5 20 3 0.5957562568008705
5 20 4 0.5125136017410229
5 20 5 0.5195865070729053
5 20 6 0.5314200217627857
5 20 7 0.5416213275299239
5 20 8 0.5068008705114254
5 20 9 0.5485582154515778
5 23 0 0.551550598476605
5 23 1 0.4430087051142546
5 23 2 0.6126224156692056
5 23 3 0.4960554951033732
5 23 4 0.5179542981501633
5 23 5 0.46817192600652885
5 23 6 0.565152339499456
5 23 7 0.49034276387377584
5 23 8 0.5126496191512514
5 23 9 0.5316920565832427
5 26 0 0.4434167573449401
5 26 1 0.5175462459194777
5 26 2 0.5425734494015234
5 26 3 0.5676006528835691
5 26 4 0.5103373231773667
5 26 5 0.5140097932535365
5 26 6 0.49455930359085964
5 26 7 0.5104733405875952
5 26 8 0.463139281828074
5 26 9 0.44164853101196955
5 29 0 0.5270674646354734
5 29 1 0.4457290533188248
5 29 2 0.4432807399347116
5 29 3 0.4849020674646355
5 29 4 0.5209466811751904
5 29 5 0.4458650707290533
5 29 6 0.4738846572361262
5 29 7 0.5257072905331882
5 29 8 0.4665397170837867
5 29 9 0.4910228509249184
10 5 0 0.5184983677910773
10 5 1 0.6293525571273123
10 5 2 0.46749183895538626
10 5 3 0.49510337323177367
10 5 4 0.588683351468988
10 5 5 0.610038084874864
10 5 6 0.5417573449401524
10 5 7 0.6113982589771491
10 5 8 0.6425462459194777
10 5 9 0.6568280739934712
10 8 0 0.5548150163220892
10 8 1 0.6177910772578891
10 8 2 0.5189064200217628
10 8 3 0.5967083786724701
10 8 4 0.6230957562568009
10 8 5 0.4657236126224157
10 8 6 0.4961915125136017
10 8 7 0.4985038084874864
10 8 8 0.5535908596300326
10 8 9 0.5402611534276387
10 11 0 0.5142818280739935
10 11 1 0.5051686615886833
10 11 2 0.5617519042437432
10 11 3 0.45266594124047876
10 11 4 0.4325353645266594
10 11 5 0.529379760609358
10 11 6 0.48680631120783463
10 11 7 0.596436343852013
10 11 8 0.580658324265506
10 11 9 0.588411316648531
10 14 0 0.5297878128400435
10 14 1 0.6225516866158868
10 14 2 0.5224428726877041
10 14 3 0.5403971708378672
10 14 4 0.5238030467899891
10 14 5 0.51550598476605
10 14 6 0.4915669205658324
10 14 7 0.4366158868335147
10 14 8 0.5866430903155604
10 14 9 0.4644994559303591
10 17 0 0.44872143634385203
10 17 1 0.4761969532100109
10 17 2 0.5231229597388466
10 17 3 0.5609357997823722
10 17 4 0.4621871599564744
10 17 5 0.5137377584330794
10 17 6 0.5631120783460283
10 17 7 0.48585418933623503
10 17 8 0.5474700761697497
10 17 9 0.5810663764961915
10 20 0 0.48449401523394997
10 20 1 0.5156420021762785
10 20 2 0.48272578890097934
10 20 3 0.4465451577801959
10 20 4 0.6303046789989118
10 20 5 0.4498095756256801
10 20 6 0.5341403699673558
10 20 7 0.4839499455930359
10 20 8 0.5878672470076169
10 20 9 0.5331882480957563
10 23 0 0.5126496191512514
10 23 1 0.5661044613710555
10 23 2 0.47878128400435255
10 23 3 0.515778019586507
10 23 4 0.4238302502720348
10 23 5 0.5053046789989118
10 23 6 0.654107725788901
10 23 7 0.44940152339499456
10 23 8 0.5099292709466812
10 23 9 0.5
10 26 0 0.5632480957562568
10 26 1 0.5133297062023939
10 26 2 0.5233949945593036
10 26 3 0.5548150163220892
10 26 4 0.5161860718171926
10 26 5 0.49496735582154516
10 26 6 0.5382208922742111
10 26 7 0.49034276387377584
10 26 8 0.5225788900979326
10 26 9 0.5208106637649619
10 29 0 0.6743743199129488
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -113,7 +113,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {},
"outputs": [
{
......@@ -145,12 +145,12 @@
"metadata": {},
"outputs": [],
"source": [
"for window_length in range(5, 50, 5):\n",
"for window_length in range(5, 40, 5):\n",
" for n_clusters in range(5, 30, 3): \n",
" train_docs, test_docs = codebook_approach(window_length, n_clusters) \n",
" train_docs = codebook_approach(window_length, n_clusters) \n",
" id2word = corpora.Dictionary(train_docs)\n",
" corpus = [id2word.doc2bow(doc) for doc in train_docs] \n",
" filename1 = 'corpus'+str(window_length)+str(n_clusters)+'.train'\n",
" #filename1 = 'corpus'+str(window_length)+str(n_clusters)+'.train'\n",
" \n",
" for rdst in range(0, 10): \n",
" pred_topics = lda(corpus, id2word, rdst)\n",
......@@ -163,7 +163,7 @@
" acc = accuracy_score(true_labels, pred_labels)\n",
" p = [window_length, n_clusters, rdst, acc]\n",
" #print('%d, %d, %.4f, %.4f' % (window_length, n_clusters, idftreshold, acc))\n",
" with open('createWords_scores.txt', 'a') as file: # save output in file \n",
" with open('paramTuningScores_m4.txt', 'a') as file: # save output in file \n",
" s = ['[', ']', ',']\n",
" p = str(list(p))\n",
" for e in s:\n",
......@@ -207,7 +207,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
This diff is collapsed.
......@@ -35,6 +35,26 @@
" train_docs = pickle.load(fp)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(63,)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_docs[152].shape"
]
},
{
"cell_type": "code",
"execution_count": 3,
......
......@@ -271,11 +271,23 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"0.8131120783460283"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = np.loadtxt('paramTuningScores_fs4.txt')"
"data = np.loadtxt('paramTuningScores_fs4OLD.txt')\n",
"np.max(data[:,3])"
]
},
{
......
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"metadata": {},
"outputs": [
{
......@@ -28,7 +28,7 @@
"5.0"
]
},
"execution_count": 6,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment