Commit 9ccc444b authored by Danniene Wete's avatar Danniene Wete

plot sensory words an increase cluster number for approach a

parent 3d461d76
......@@ -14,7 +14,15 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(7352, 128)\n"
]
}
],
"source": [
"#Load data\n",
"\n",
......@@ -27,7 +35,7 @@
"gyro_z = np.loadtxt('../../data/body_gyro_z_train.txt')\n",
"\n",
"\n",
"# Combine all 3 channels data to form one 3D matrix of data\n",
"# Combine all 3 channels data to form one 3D matrix of data for each sensor\n",
"def combineData(X, Y, Z):\n",
" combinedData = []\n",
" \n",
......@@ -37,7 +45,8 @@
" return np.array(combinedData)\n",
"\n",
"trainAcc = combineData(acc_x, acc_y, acc_z)\n",
"trainGyr = combineData(gyro_x, gyro_y, gyro_z)"
"trainGyr = combineData(gyro_x, gyro_y, gyro_z)\n",
"print(acc_x.shape)"
]
},
{
......@@ -95,7 +104,7 @@
" \n",
"\n",
"\n",
"def createInstanceArray(X, activity_count):\n",
"def createDocsWithWordIndexes(X, activity_count): # create documents with integer indexes of words\n",
" \n",
" \n",
" docs = []\n",
......@@ -116,7 +125,7 @@
" return np.array(docs) \n",
"\n",
"\n",
"def combineWindow(acc, gyr):\n",
"def combineWindow(acc, gyr): # Make pairwise combinaisons of the channels of the acc. and the gyr. sensor.\n",
" \n",
" x_acc = acc[0]\n",
" y_acc = acc[1]\n",
......@@ -188,7 +197,7 @@
"metadata": {},
"outputs": [],
"source": [
"window_length = 15\n",
"window_length = 10\n",
"overlap_length = window_length // 2\n",
"\n",
"trainAcc_window = sliding_window_approach(trainAcc,window_length,overlap_length)\n",
......@@ -241,7 +250,7 @@
"metadata": {},
"outputs": [],
"source": [
"A = createInstanceArray(trainAcc_window, 7352)"
"A = createDocsWithWordIndexes(trainAcc_window, 7352)"
]
},
{
......@@ -252,7 +261,7 @@
{
"data": {
"text/plain": [
"(7352, 24, 10)"
"(3, 7352, 24, 10)"
]
},
"execution_count": 9,
......@@ -261,7 +270,7 @@
}
],
"source": [
"A[0].shape"
"A.shape"
]
},
{
......@@ -270,7 +279,7 @@
"metadata": {},
"outputs": [],
"source": [
"G = createInstanceArray(trainGyr_window, 7352)"
"G = createDocsWithWordIndexes(trainGyr_window, 7352)"
]
},
{
......@@ -281,7 +290,7 @@
{
"data": {
"text/plain": [
"(7352, 24, 10)"
"7352"
]
},
"execution_count": 11,
......@@ -290,7 +299,7 @@
}
],
"source": [
"G[0].shape"
"G[0].shape[0]"
]
},
{
......@@ -326,9 +335,18 @@
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1058688, 20)\n"
]
}
],
"source": [
"vocabulary = createVocaburlary(T)"
"vocabulary = createVocaburlary(T)\n",
"print(vocabulary.shape)"
]
},
{
......@@ -336,10 +354,20 @@
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1058688, 20)\n"
]
},
{
"data": {
"text/plain": [
"(1058688, 20)"
"array([-1.600949 , -1.534397 , -1.083371 , -0.6458657, -0.390808 ,\n",
" -0.4107456, -0.3745613, -0.3256869, -0.4300842, -0.4692831,\n",
" 0.5019508, -0.2118332, -0.7104929, -0.7797034, -0.4770705,\n",
" -0.1704645, 0.1173261, 0.3861244, 0.5918656, 0.7064326])"
]
},
"execution_count": 15,
......@@ -348,7 +376,9 @@
}
],
"source": [
"vocabulary.shape"
"v = np.unique(vocabulary, axis=0)\n",
"print(v.shape)\n",
"v[0]"
]
},
{
......@@ -387,7 +417,7 @@
"outputs": [],
"source": [
"# Prepare corpus file for Gaussian LDA. Please change this link\n",
"f = open('/home/danniene/environments/glda_UCI/codebook/Gaussian_LDA-master/corpus.train', 'w')\n",
"f = open('/home/danniene/environments/Gaussian_LDA-master/corpus.train', 'w')\n",
"s = ['[', ']', ',']\n",
"for d in wordIndex:\n",
" d = str(list(d))\n",
......@@ -405,7 +435,7 @@
"outputs": [],
"source": [
"#Prepare embedding file for Gaussian LDA. Please change this link\n",
"file = open('/home/danniene/environments/glda_UCI/codebook/Gaussian_LDA-master/embeddings.txt', 'w')\n",
"file = open('/home/danniene/environments/Gaussian_LDA-master/embeddings.txt', 'w')\n",
"s = ['[', ']', ',']\n",
"for w in vocabulary:\n",
" w = str(list(w))\n",
......@@ -441,7 +471,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
\begin{tabular}{lrrr}
\toprule
{} & Topic0 & Topic1 & Topic3 \\
\midrule
Class0 & 0 & 13 & 1213 \\
Class1 & 0 & 0 & 1073 \\
Class2 & 0 & 82 & 904 \\
Class3 & 1283 & 0 & 3 \\
Class4 & 1368 & 0 & 6 \\
Class5 & 1394 & 0 & 13 \\
\bottomrule
\end{tabular}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
\begin{tabular}{lrrr}
\toprule
{} & Precision & Recall & F1 score \\
\midrule
WALKING & 0.206 & 0.186 & 0.195 \\
WALKING\_UPSTAIRS & 0.115 & 0.062 & 0.081 \\
WALKING\_DOWNSTAIRS & 0.170 & 0.115 & 0.137 \\
SITTING & 0.202 & 0.187 & 0.194 \\
STANDING & 0.210 & 0.283 & 0.241 \\
LAYING & 0.240 & 0.333 & 0.279 \\
\bottomrule
\end{tabular}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -5,12 +5,12 @@
"metadata": {},
"source": [
"### Features extracted: mean, slope, and standard deviation of each subsequence.\n",
"### Feature vectors of subsequences in the same time window along the three channels of each sensor are concatenated"
"### Feature vectors of subsequences in the same time window along the three channels of each sensor are concatenated. Feature vectors are normalized to get all features on the same scale."
]
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
......@@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
......@@ -109,7 +109,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
......@@ -169,6 +169,18 @@
" stderr = lr[-1] # slope of the line of best fit\n",
" return stderr\n",
"\n",
"def normalize(X):\n",
" \"\"\"Normalize Function for X\n",
"\n",
" Args:\n",
" X: input data\n",
" Returns:\n",
" X_norm: normalized output data\n",
" \"\"\"\n",
" scaler = MinMaxScaler(feature_range=(0, 1))\n",
" X_norm = scaler.fit_transform(X)\n",
" return X_norm\n",
"\n",
"def get_Orientation(vector):\n",
" slope = (vector[-1]-vector[0])/(len(vector)-1) # Compute the slope \n",
" slope = math.degrees(math.atan(slope)) # Convert slope to angle in degree and return \n",
......@@ -189,12 +201,16 @@
" #min_c = minimum(current_subsequence)\n",
" #max_c = maximum(current_subsequence)\n",
" #em_c = energy_measure(current_subsequence)\n",
" #line_c = get_lineBestFit(current_subsequence)\n",
" ##line_c = get_lineBestFit(current_subsequence)\n",
" #stdrr_c = get_stderr(current_subsequence)\n",
" Features_array[c].append([slope_c, mean_c])\n",
" Features_array = np.array(Features_array)\n",
" \n",
" \n",
" return np.array(Features_array)\n",
" Features_array_normalized = [[], [],[]]\n",
" for channel in range(0, Features_array.shape[0]): # normalize feature vectors in each channel\n",
" Features_array_normalized[channel] = normalize(Features_array[channel])\n",
" \n",
" return np.array(Features_array_normalized)\n",
"\n",
"\n",
"\n",
......@@ -283,22 +299,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def runFeaturesExtraction(window_length):\n",
" overlap_length = window_length // 2\n",
" \n",
" trainAcc_window = sliding_window_approach(trainAcc,window_length,overlap_length)\n",
" trainGyr_window = sliding_window_approach(trainGyr,window_length,overlap_length)\n",
" trainAcc_normalized = normalizeSequenceValues(trainAcc)\n",
" trainGyr_normalized = normalizeSequenceValues(trainGyr)\n",
" \n",
" trainAcc_window = sliding_window_approach(trainAcc_normalized,window_length,overlap_length)\n",
" trainGyr_window = sliding_window_approach(trainGyr_normalized,window_length,overlap_length)\n",
" \n",
" trainAcc_features = extractFeatures(trainAcc_window)\n",
" trainGyr_features = extractFeatures(trainGyr_window)\n",
......@@ -318,62 +330,59 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"train_corpus, train_vocabulary = runFeaturesExtraction(50)"
"train_corpus, train_vocabulary = runFeaturesExtraction(10) # without scaling, window length = 30"
]
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(176448, 4)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"(264672, 4)\n"
]
}
],
"source": [
"train_vocabulary.shape"
"train_vocabulary[578] # array([ 4.60539523e-03, 1.02079573e+00, 1.29865663e-02, -4.59548657e-04]) without normalization\n",
"print(train_vocabulary.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
" 17, 18, 19, 20, 21, 22, 23])"
"(7352, 36)"
]
},
"execution_count": 50,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_corpus[0]"
"train_corpus.shape"
]
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# Prepare corpus file for Gaussian LDA. Please change this link\n",
"f = open('/home/danniene/environments/glda_UCI/hom/Gaussian_LDA-master/corpus.train', 'w')\n",
"f = open('/home/danniene/environments/Gaussian_LDA-master/corpus.train', 'w')\n",
"s = ['[', ']', ',',]\n",
"for d in train_corpus:\n",
" d = str(list(d))\n",
......@@ -386,12 +395,12 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"#Prepare embedding file for Gaussian LDA. Please change this link\n",
"file = open('/home/danniene/environments/glda_UCI/hom/Gaussian_LDA-master/embeddings.txt', 'w')\n",
"file = open('/home/danniene/environments/Gaussian_LDA-master/embeddings.txt', 'w')\n",
"s = ['[', ']', ',']\n",
"for w in train_vocabulary:\n",
" w = str(list(w))\n",
......@@ -419,7 +428,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
......@@ -5,7 +5,7 @@
"metadata": {},
"source": [
"#### Features extracted: slope, mean of each subsequence.\n",
"#### Fusion method: Feature vectors of subsequences are combined based on the channel.\n",
"#### Fusion method: Feature vectors of subsequences are concatenated in each sensor along the channels.\n",
"\n"
]
},
......@@ -455,7 +455,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
}
},
"nbformat": 4,
......
0.40043525571273125
0.41145266594124047
0.42287812840043526
0.42233405875952124
0.44042437431991294
0.3554134929270947
0.42859085963003263
0.4009793253536453
0.3781284004352557
0.38492927094668117
0.4247823721436344
0.40383569096844396
0.4291349292709467
0.43702393906420023
0.42872687704026113
0.41566920565832427
0.39567464635473343
0.43035908596300326
0.44178454842219805
0.4427366702937976
0.4398803046789989
0.3808487486398259
0.3982589771490751
0.38465723612622416
0.38424918389553864
0.4133569096844396
0.382752992383025
0.4085963003264418
0.4201577801958651
0.4119967355821545
0.41063656147986943
0.4281828073993471
0.4164853101196953
0.4084602829162133
0.4205658324265506
0.42546245919477693
0.4291349292709467
0.39621871599564745
0.4236942328618063
0.4232861806311208
0.4209738846572361
0.4201577801958651
0.4236942328618063
0.39608269858541895
0.44110446137105547
0.4351196953210011
0.4087323177366703
0.4309031556039173
0.4241022850924918
0.4102285092491839
0.3592219804134929
0.4209738846572361
0.3944504896626768
0.4368879216539717
0.38873775843307945
0.4355277475516866
0.42872687704026113
0.38751360174102284
0.4204298150163221
0.42872687704026113
0.42723068552774757
0.3762241566920566
0.42247007616974974
0.3898258977149075
0.4013873775843308
0.375272034820457
0.4423286180631121
0.4242383025027203
0.43607181719260063
0.41512513601741025
0.4284548422198041
0.3937704026115343
0.4158052230685528
0.4009793253536453
0.4238302502720348
0.40057127312295976
0.41539717083786726
0.41376496191512513
0.3929542981501632
0.42301414581066377
0.4175734494015234
0.42287812840043526
0.4094124047878128
0.38737758433079433
0.41526115342763875
0.39921109902067464
0.4103645266594124