basic usage

Aviezer Lifshitz

2023-08-20

Basic usage of the package.

Basic usage

First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:

data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
##      id        V1        V2 true_clust
## 1     1 1.2104301 1.3683836          1
## 2     2 1.4491215 0.4625674          1
## 3     3 1.2559885 0.9372232          1
## 4     4 0.8384915 0.7257761          1
## 5     5 1.3445816 0.8939351          1
## 6     6 1.3381513 1.7039413          1
## 7     7 0.6435033 1.3375870          1
## 8     8 0.8303762 1.0858607          1
## 9     9 0.2114928 0.9388908          1
## 10   10 1.1833862 1.1472999          1
## 11   11 1.4171330 1.6443124          1
## 12   12 1.3276149 1.3874632          1
## 13   13 1.5965528 1.3105140          1
## 14   14 0.7908713 0.3863466          1
## 15   15 1.1231271 1.0351274          1
## 16   16 1.0951040 0.8324043          1
## 17   17 1.4269767 0.7685852          1
## 18   18 0.6916302 1.2483960          1
## 19   19 0.9714620 0.6577174          1
## 20   20 1.3784464 1.5836677          1
## 21   21 0.7969110 1.2915563          1
## 22   22 1.2691834 0.7648148          1
## 23   23 1.7154622 0.8518827          1
## 24   24 1.1913115 0.9343243          1
## 25   25 1.1243154 0.9461786          1
## 26   26 0.8732431 0.9824090          1
## 27   27 1.3918277 1.0916876          1
## 28   28 1.5159796 1.2975739          1
## 29   29 1.5274221 1.2271982          1
## 30   30 1.0974767 0.9427814          1
## 31   31 1.2828950 0.7675730          1
## 32   32 1.3499113 0.4990147          1
## 33   33 0.6995950 1.7106876          1
## 34   34 1.1639380 1.4218098          1
## 35   35 0.9888706 0.7425405          1
## 36   36 1.1338190 1.2437757          1
## 37   37 0.8432348 1.1731891          1
## 38   38 0.8849448 1.2731927          1
## 39   39 0.8983317 1.2373607          1
## 40   40 0.7187551 0.9990338          1
## 41   41 0.4915366 1.2359479          1
## 42   42 1.5570788 0.7139513          1
## 43   43 0.6810858 1.7778102          1
## 44   44 0.9350087 1.2944691          1
## 45   45 0.7820555 0.9061553          1
## 46   46 1.3198188 0.6327240          1
## 47   47 1.2693935 1.2556958          1
## 48   48 1.2655209 0.7677790          1
## 49   49 1.0492183 1.0071171          1
## 50   50 1.3436999 0.8914861          1
## 51   51 0.8649826 0.4393002          1
## 52   52 0.8373202 1.2861909          1
## 53   53 1.3380771 0.9031222          1
## 54   54 1.0646184 0.8269146          1
## 55   55 1.0562299 1.1478297          1
## 56   56 0.7874496 0.6551511          1
## 57   57 0.8406047 0.8766421          1
## 58   58 1.3362826 1.0910906          1
## 59   59 0.8672484 0.9945076          1
## 60   60 1.1358433 0.6998124          1
## 61   61 0.9683520 0.6204357          1
## 62   62 1.0711068 0.7565281          1
## 63   63 0.8514140 1.5520788          1
## 64   64 1.2442232 0.6942918          1
## 65   65 1.2103619 1.1214371          1
## 66   66 1.0695594 0.8500226          1
## 67   67 1.1367212 1.0370786          1
## 68   68 0.9939847 1.0128525          1
## 69   69 1.2033169 1.1006504          1
## 70   70 1.0782586 0.6374707          1
## 71   71 0.2288410 0.6416650          1
## 72   72 1.2769338 1.0937538          1
## 73   73 1.1421456 0.9497381          1
## 74   74 0.7868260 1.1260806          1
## 75   75 1.0532944 1.0440163          1
## 76   76 0.5311549 0.7625642          1
## 77   77 0.8498302 1.3852686          1
## 78   78 0.8344997 0.9354545          1
## 79   79 1.2074861 0.2554480          1
## 80   80 0.7307841 1.1591555          1
## 81   81 0.8972436 1.1210308          1
## 82   82 1.2554231 0.5900796          1
## 83   83 1.2608541 0.8421847          1
## 84   84 0.8130890 1.0557354          1
## 85   85 0.8404473 0.8711645          1
## 86   86 0.8349969 1.4003360          1
## 87   87 1.0816576 1.4512211          1
## 88   88 0.9149284 1.5146929          1
## 89   89 0.9911115 0.8629948          1
## 90   90 0.7919034 0.8436254          1
## 91   91 0.4852926 0.9230567          1
## 92   92 1.2506452 1.0277350          1
## 93   93 0.9789287 0.8972017          1
## 94   94 1.4397487 0.7961277          1
## 95   95 1.3050221 0.9499535          1
## 96   96 0.7019074 1.3787199          1
## 97   97 0.6176745 1.1109013          1
## 98   98 0.8390911 0.8890517          1
## 99   99 0.6900021 0.7087577          1
## 100 100 0.6799433 0.8882345          1
## 101 101 2.0391639 2.0181653          2
## 102 102 2.4388016 2.4557976          2
## 103 103 2.0174240 2.5521841          2
## 104 104 1.8587122 1.4773660          2
## 105 105 2.2228069 1.8643797          2
## 106 106 2.2205658 2.5838951          2
## 107 107 2.3288492 2.3399334          2
## 108 108 1.8282585 1.7239360          2
## 109 109 1.9028747 1.7636406          2
## 110 110 2.1315162 1.8946201          2
## 111 111 1.9070960 2.5482367          2
## 112 112 1.3471719 1.9344844          2
## 113 113 2.3481745 2.6074791          2
## 114 114 1.8829352 2.0381267          2
## 115 115 2.0074736 2.1634433          2
## 116 116 1.5513230 2.4508656          2
## 117 117 2.0193820 1.9183353          2
## 118 118 2.0312597 2.3190341          2
## 119 119 1.8542131 2.2321659          2
## 120 120 1.9060893 2.0646348          2
## 121 121 2.4722898 1.9740046          2
## 122 122 1.8770491 1.9627591          2
## 123 123 2.0650205 2.3379996          2
## 124 124 1.8670126 2.4002337          2
## 125 125 1.7917625 2.4462792          2
## 126 126 1.7968311 2.1800546          2
## 127 127 1.8854365 1.7858832          2
## 128 128 2.0310415 1.9465927          2
## 129 129 2.1572609 1.3434804          2
## 130 130 1.7041422 2.0584772          2
## 131 131 1.9672706 2.1634058          2
## 132 132 1.3957523 2.2280470          2
## 133 133 1.9491033 1.8333605          2
## 134 134 2.7426726 1.8499737          2
## 135 135 2.3808409 1.6649506          2
## 136 136 2.1683032 2.1805902          2
## 137 137 1.8711416 2.3592386          2
## 138 138 2.0285836 2.2770725          2
## 139 139 1.8092272 2.4288718          2
## 140 140 1.5577312 2.2269449          2
## 141 141 1.4773338 2.0939169          2
## 142 142 1.5450671 2.0904800          2
## 143 143 1.8244810 2.0558441          2
## 144 144 1.6451175 1.5896367          2
## 145 145 2.3707862 2.3826735          2
## 146 146 2.3383979 2.0764678          2
## 147 147 2.2404646 1.9210600          2
## 148 148 1.9270636 1.7799585          2
## 149 149 1.9005145 2.0439734          2
## 150 150 2.4991068 1.8043598          2
## 151 151 2.6816245 1.8650372          2
## 152 152 1.9831453 2.3908543          2
## 153 153 1.4493428 2.1726293          2
## 154 154 1.9927737 1.9988305          2
## 155 155 1.7215717 2.1145590          2
## 156 156 1.3711692 1.8178593          2
## 157 157 2.0527785 1.8409058          2
## 158 158 1.8687704 1.8579349          2
## 159 159 2.0012288 2.3898236          2
## 160 160 2.3538437 1.9176856          2
## 161 161 2.1268875 1.9789530          2
## 162 162 1.9698392 1.9783816          2
## 163 163 2.0237972 1.7201793          2
## 164 164 1.9978177 1.6915337          2
## 165 165 1.7981725 2.3774763          2
## 166 166 1.5054348 1.7052174          2
## 167 167 1.8698982 2.1030603          2
## 168 168 2.1865770 1.7654210          2
## 169 169 2.2238929 2.2340106          2
## 170 170 2.4256951 1.6375617          2
## 171 171 1.9488542 2.3069497          2
## 172 172 2.2238790 1.9878563          2
## 173 173 1.8948674 2.1295271          2
## 174 174 1.8754898 1.6970517          2
## 175 175 1.7590136 1.9580199          2
## 176 176 1.8026681 1.8304441          2
## 177 177 1.7497553 1.9836410          2
## 178 178 1.6672878 1.8854531          2
## 179 179 2.0736994 1.8903495          2
## 180 180 2.6008386 2.0007398          2
## 181 181 1.7572684 1.8883062          2
## 182 182 2.3615302 2.8693811          2
## 183 183 1.6699534 1.8502708          2
## 184 184 1.7321898 2.0740680          2
## 185 185 2.2498669 2.0181920          2
## 186 186 2.2532141 1.9559326          2
## 187 187 1.8039143 2.0612574          2
## 188 188 2.2792179 2.1759185          2
## 189 189 1.5791727 1.3624400          2
## 190 190 1.7931790 2.0183778          2
## 191 191 1.8332814 1.7607641          2
## 192 192 1.8987932 1.8791867          2
## 193 193 1.6623870 1.8733202          2
## 194 194 1.5725394 1.7035323          2
## 195 195 1.5807183 2.1476385          2
## 196 196 2.0456221 1.9865136          2
## 197 197 1.7354827 1.8701078          2
## 198 198 2.2273729 2.3203348          2
## 199 199 1.7379647 2.2614389          2
## 200 200 1.8285528 1.8891335          2
## 201 201 3.0228406 2.5742012          3
## 202 202 2.8264634 2.5193997          3
## 203 203 2.9795499 2.9430383          3
## 204 204 2.5758978 3.3117884          3
## 205 205 2.9320324 3.2966298          3
## 206 206 2.6298736 3.0509339          3
## 207 207 3.5191178 3.1996077          3
## 208 208 3.3145599 2.8849450          3
## 209 209 3.2691203 2.6418992          3
## 210 210 2.7862355 3.6152586          3
## 211 211 2.6568845 3.7019399          3
## 212 212 2.9425532 3.1743587          3
## 213 213 2.5443905 2.6887054          3
## 214 214 3.0179333 4.0055047          3
## 215 215 3.1292554 2.7458439          3
## 216 216 3.0196192 3.2530946          3
## 217 217 2.9128796 3.0726575          3
## 218 218 3.6563366 3.2661185          3
## 219 219 2.6945793 3.1150197          3
## 220 220 3.4230353 2.9653637          3
## 221 221 3.1687852 2.7597259          3
## 222 222 2.7532207 2.6670718          3
## 223 223 2.4448156 2.9681616          3
## 224 224 2.4300829 2.9822555          3
## 225 225 2.8616584 3.1300119          3
## 226 226 3.2975809 2.6457393          3
## 227 227 3.0014331 3.2918639          3
## 228 228 3.7418102 2.7802559          3
## 229 229 2.6969278 2.5702803          3
## 230 230 3.3951944 3.0383523          3
## 231 231 3.3107304 2.8405802          3
## 232 232 2.8476121 3.0831595          3
## 233 233 3.1908668 2.9201190          3
## 234 234 2.3890699 3.1932735          3
## 235 235 3.1141137 3.0658503          3
## 236 236 2.6316894 2.8234095          3
## 237 237 3.3142624 2.9993528          3
## 238 238 3.1911930 3.1066114          3
## 239 239 2.9683849 3.4586191          3
## 240 240 2.6095891 2.9251905          3
## 241 241 3.1909567 3.2204691          3
## 242 242 3.1804196 3.8110207          3
## 243 243 2.8824807 2.6830037          3
## 244 244 3.0821712 2.8421912          3
## 245 245 2.7967161 3.2160224          3
## 246 246 3.2738716 2.9243666          3
## 247 247 3.0570122 3.2224496          3
## 248 248 2.7290787 2.8826562          3
## 249 249 3.0741243 3.2942872          3
## 250 250 2.7109808 3.0250313          3
## 251 251 2.7592079 2.7649743          3
## 252 252 2.8173936 3.0421431          3
## 253 253 2.0893164 2.4664163          3
## 254 254 3.2908883 3.3662660          3
## 255 255 3.3613008 3.7782209          3
## 256 256 2.9399723 3.0523287          3
## 257 257 3.2103825 2.6404745          3
## 258 258 3.0918192 3.1156866          3
## 259 259 2.9687267 3.4040006          3
## 260 260 2.8655536 2.5524561          3
## 261 261 2.9082046 2.6380456          3
## 262 262 2.5406484 2.9733453          3
## 263 263 3.1332768 3.3851481          3
## 264 264 2.5979987 3.1081696          3
## 265 265 3.2099134 3.2326707          3
## 266 266 2.1269733 2.5703307          3
## 267 267 2.9056134 2.7590293          3
## 268 268 2.9144735 2.8913101          3
## 269 269 2.9409252 3.6665764          3
## 270 270 2.1816313 3.0306729          3
## 271 271 3.0241775 3.3440866          3
## 272 272 2.8901780 2.8408377          3
## 273 273 2.7263616 2.7822060          3
## 274 274 3.3628130 3.3399851          3
## 275 275 2.9206051 2.9606082          3
## 276 276 3.3774914 3.4511808          3
## 277 277 3.0255056 3.2377552          3
## 278 278 2.8056764 3.3026700          3
## 279 279 3.1158295 2.7345826          3
## 280 280 3.0193448 2.6935288          3
## 281 281 2.3713686 2.8172566          3
## 282 282 2.3228749 3.2678787          3
## 283 283 3.1443417 2.6714102          3
## 284 284 2.9217835 2.9460468          3
## 285 285 2.9346401 2.6732815          3
## 286 286 3.1988354 2.8717862          3
## 287 287 2.6853385 2.9613781          3
## 288 288 3.1631581 3.2231121          3
## 289 289 2.4083477 3.1599477          3
## 290 290 3.0519035 2.3891263          3
## 291 291 2.8201274 3.1464579          3
## 292 292 3.1368296 3.0520403          3
## 293 293 3.0997534 2.8530752          3
## 294 294 3.2923619 3.0868825          3
## 295 295 2.7540963 3.2833736          3
## 296 296 2.3546711 2.8627440          3
## 297 297 2.7946858 3.0152221          3
## 298 298 3.1415943 3.1597159          3
## 299 299 2.7195751 2.8931792          3
## 300 300 3.0756822 3.3554485          3
## 301 301 3.6884516 3.7221189          4
## 302 302 3.8288771 3.9053497          4
## 303 303 3.9793156 3.5152524          4
## 304 304 4.2314435 3.7308093          4
## 305 305 3.9485961 3.3284119          4
## 306 306 3.6326828 4.0357080          4
## 307 307 4.0643474 3.9678727          4
## 308 308 3.7726889 4.1642610          4
## 309 309 4.2834901 4.1102333          4
## 310 310 3.9514273 4.2101962          4
## 311 311 4.1346328 3.9975055          4
## 312 312 3.7704318 3.9629276          4
## 313 313 3.6205448 4.0448399          4
## 314 314 4.0025090 3.9998914          4
## 315 315 3.6621169 3.9393469          4
## 316 316 4.7560493 4.2881091          4
## 317 317 4.1279586 4.2358091          4
## 318 318 3.8563616 3.9070165          4
## 319 319 3.9356267 4.0960128          4
## 320 320 4.1732335 4.2190993          4
## 321 321 4.2129027 4.2557813          4
## 322 322 4.5449944 4.1437936          4
## 323 323 3.6240873 4.5588799          4
## 324 324 4.2809168 3.8955731          4
## 325 325 3.5159098 4.1164808          4
## 326 326 4.0959180 3.7298479          4
## 327 327 3.8211777 4.2507414          4
## 328 328 3.8113820 4.4978229          4
## 329 329 4.5300123 3.9777361          4
## 330 330 4.1847565 3.9363343          4
## 331 331 3.9374313 3.4318420          4
## 332 332 4.6762341 4.0253805          4
## 333 333 3.3632409 4.2158958          4
## 334 334 3.8811024 3.3110981          4
## 335 335 3.5423012 3.8583800          4
## 336 336 4.3320430 3.6152731          4
## 337 337 4.3603633 4.2065882          4
## 338 338 4.2168854 4.1724254          4
## 339 339 4.2827776 4.0712652          4
## 340 340 3.4197209 3.9426643          4
## 341 341 3.5634185 3.4434580          4
## 342 342 4.2604882 4.1764107          4
## 343 343 4.6134920 4.0624458          4
## 344 344 4.2002316 4.0790578          4
## 345 345 3.7772521 4.3783260          4
## 346 346 3.6464681 3.6843470          4
## 347 347 4.5281357 4.1168485          4
## 348 348 4.2963845 3.6576726          4
## 349 349 3.4156272 4.1575434          4
## 350 350 3.6951108 4.2392037          4
## 351 351 3.4398336 4.2966140          4
## 352 352 3.6976667 4.4101329          4
## 353 353 4.1619451 4.1456427          4
## 354 354 3.9077289 3.9741202          4
## 355 355 3.6214215 4.1272374          4
## 356 356 4.4873152 4.5416221          4
## 357 357 4.3606232 4.2723192          4
## 358 358 3.5460653 4.2232890          4
## 359 359 3.5559428 3.8824824          4
## 360 360 4.1074274 4.6042786          4
## 361 361 3.7278963 4.1619784          4
## 362 362 4.1190905 3.8522400          4
## 363 363 3.7968741 3.4320511          4
## 364 364 3.7079632 4.0477676          4
## 365 365 3.9656941 4.0240688          4
## 366 366 4.4880741 3.8327801          4
## 367 367 4.1267916 4.0539403          4
## 368 368 3.7907997 3.9493458          4
## 369 369 3.7213180 3.6928604          4
## 370 370 4.6216621 4.6851062          4
## 371 371 4.3557618 3.6822297          4
## 372 372 4.4629404 4.1302436          4
## 373 373 3.9019839 4.3488407          4
## 374 374 3.8233173 3.9189693          4
## 375 375 4.3239639 3.4663861          4
## 376 376 3.8246406 3.8119997          4
## 377 377 3.6997411 4.1857216          4
## 378 378 3.8532408 4.0260158          4
## 379 379 3.6122314 4.0729654          4
## 380 380 3.5028944 3.8320522          4
## 381 381 3.7593790 3.9948957          4
## 382 382 4.3323486 4.1757377          4
## 383 383 3.5453701 4.1819934          4
## 384 384 3.8341699 4.1337212          4
## 385 385 3.5454753 3.9049875          4
## 386 386 3.9949851 4.3941884          4
## 387 387 3.6725118 4.2897380          4
## 388 388 3.9756914 4.0404751          4
## 389 389 4.1410160 3.9063258          4
## 390 390 4.3205476 3.5994082          4
## 391 391 4.3804207 3.7896964          4
## 392 392 3.9929013 3.9956640          4
## 393 393 3.8124848 4.1960163          4
## 394 394 3.9584686 3.7715580          4
## 395 395 4.2983172 4.8471691          4
## 396 396 4.3698917 3.9677632          4
## 397 397 4.0991057 4.1926265          4
## 398 398 3.9928729 4.1775785          4
## 399 399 3.9432750 4.1233490          4
## 400 400 4.4524295 3.7825198          4
## 401 401 5.5133063 5.0450294          5
## 402 402 5.1019782 4.8001901          5
## 403 403 5.1172691 4.9525798          5
## 404 404 5.1910605 4.4270148          5
## 405 405 5.2762189 4.7392097          5
## 406 406 5.7538329 5.1287255          5
## 407 407 5.3225482 4.4610521          5
## 408 408 4.5381460 4.6968687          5
## 409 409 4.5936500 4.9350762          5
## 410 410 5.1226113 4.7935962          5
## 411 411 4.8943111 4.6335769          5
## 412 412 5.4567585 4.5135088          5
## 413 413 5.7292191 5.0221079          5
## 414 414 4.9176475 4.8675722          5
## 415 415 5.1472754 5.2588447          5
## 416 416 4.6608851 4.8356509          5
## 417 417 5.4573166 5.1031367          5
## 418 418 4.9670786 5.0988470          5
## 419 419 5.1181242 5.1269276          5
## 420 420 4.6896242 5.0606529          5
## 421 421 4.4276857 4.7738091          5
## 422 422 4.6688578 5.2349823          5
## 423 423 4.6604069 5.2767647          5
## 424 424 5.2292148 4.8734313          5
## 425 425 5.2921353 5.1933868          5
## 426 426 4.8161732 5.1981492          5
## 427 427 4.8976412 5.0642983          5
## 428 428 5.2836220 4.6629048          5
## 429 429 4.8955325 5.6683789          5
## 430 430 5.0337536 4.6593043          5
## 431 431 5.0759803 4.9800395          5
## 432 432 4.9067557 5.0370765          5
## 433 433 4.7416282 4.8916794          5
## 434 434 4.9160477 5.0655779          5
## 435 435 4.7819251 5.0270777          5
## 436 436 5.0567739 5.0870725          5
## 437 437 4.8634289 5.2379301          5
## 438 438 5.0721565 5.0963812          5
## 439 439 5.2079232 5.1998098          5
## 440 440 4.7704942 5.1582392          5
## 441 441 4.6746363 4.7272163          5
## 442 442 4.4621512 4.6310220          5
## 443 443 4.9204140 5.4607818          5
## 444 444 4.5866643 4.9764021          5
## 445 445 5.1278497 5.3497267          5
## 446 446 4.9780524 4.9725658          5
## 447 447 4.5841538 5.3399144          5
## 448 448 4.8168370 5.0250592          5
## 449 449 5.1074989 4.7561170          5
## 450 450 4.4419290 4.8718176          5
## 451 451 5.2725587 5.0118051          5
## 452 452 4.7283077 5.1266616          5
## 453 453 5.4874816 4.9463756          5
## 454 454 5.1043362 5.0618757          5
## 455 455 4.6466943 5.0227658          5
## 456 456 5.1278688 4.9726940          5
## 457 457 5.3199372 4.8624794          5
## 458 458 5.5166335 5.0561581          5
## 459 459 5.0498237 5.1415792          5
## 460 460 5.1769154 5.3066221          5
## 461 461 4.6530496 4.8915139          5
## 462 462 5.0045274 5.0384046          5
## 463 463 4.8735601 4.5343010          5
## 464 464 4.4360070 4.9546956          5
## 465 465 5.1534273 4.9487389          5
## 466 466 4.5508130 4.7018845          5
## 467 467 4.5984322 4.8850922          5
## 468 468 4.7681095 5.2515668          5
## 469 469 5.2606449 4.5093633          5
## 470 470 4.9324109 4.7596517          5
## 471 471 5.3014319 4.9238760          5
## 472 472 4.3784635 5.3527621          5
## 473 473 4.5349217 5.0800590          5
## 474 474 4.5606143 4.4334509          5
## 475 475 4.5418760 5.3025480          5
## 476 476 5.0434911 5.1425728          5
## 477 477 4.9617557 5.5410689          5
## 478 478 5.1177725 4.7492209          5
## 479 479 4.6515451 5.2139247          5
## 480 480 4.9646100 4.9970420          5
## 481 481 5.4265762 4.6943047          5
## 482 482 5.2314653 5.0259304          5
## 483 483 5.5465440 5.3743223          5
## 484 484 5.1888457 4.6135011          5
## 485 485 4.6169163 5.2088242          5
## 486 486 4.9429869 4.8267085          5
## 487 487 4.8556434 4.4610117          5
## 488 488 4.8040523 4.3878327          5
## 489 489 5.3077146 4.9592097          5
## 490 490 5.0213129 4.9233404          5
## 491 491 5.4606565 4.9351379          5
## 492 492 4.9922979 5.5963421          5
## 493 493 4.8246139 5.4308834          5
## 494 494 4.6460268 5.5015898          5
## 495 495 4.9556460 5.4008026          5
## 496 496 4.9442921 5.0929200          5
## 497 497 5.1365860 5.6318518          5
## 498 498 4.9740604 4.9784884          5
## 499 499 4.8770270 4.9628846          5
## 500 500 5.0317630 4.9630071          5

This is how our data looks like:

data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "true cluster")

Now we can cluster it using kmeans++:

data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
    k = 5,
    metric = "euclid",
    verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 82 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 440 dist was 2.59777
## add new core from 440 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 274 dist was 1.3275
## add new core from 274 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 374 dist was 0.729166
## add new core from 374 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 191 dist was 0.613397
## add new core from 191 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 3
## KMEans: iter 1
## KMEans: iter 2 changed 4
## KMEans: iter 2
## KMEans: iter 3 changed 0

The returned list contains 3 fields:

names(km)
## [1] "centers" "cluster" "size"

km$centers contains a tibble with clust column and the cluster centers:

km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1  1.03  1.00
## 2     2  1.95  2.04
## 3     3  3.94  4.00
## 4     4  4.96  4.98
## 5     5  2.94  3.02

clusters are numbered according to order_func (see ‘Custom cluster ordering’ section).

km$cluster contains tibble with id column with the observation id (1:n if no id column was supplied), and clust column with the observation assigned cluster:

km$cluster
## # A tibble: 500 × 2
##    id    clust
##    <chr> <int>
##  1 1         1
##  2 2         1
##  3 3         1
##  4 4         1
##  5 5         1
##  6 6         2
##  7 7         1
##  8 8         1
##  9 9         1
## 10 10        1
## # ℹ 490 more rows

km$size contains tibble with clust column and n column with the number of points in each cluster:

km$size
## # A tibble: 5 × 2
##   clust     n
##   <int> <int>
## 1     1    99
## 2     2   102
## 3     3    99
## 4     4   104
## 5     5    96

We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters function is internal to the package and is used only in this vignette):

d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.974

And plot the results:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

Custom cluster ordering

By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers)))) - hclust of the euclidean distance of the correlation matrix of the centers.

We can supply our own function to order the clusters using reorder_func argument. The function would be applied to each center and he clusters would be ordered by the result.

km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE,
    reorder_func = median
)
km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1  1.05 0.963
## 2     2  1.95 2.06 
## 3     3  2.93 3.04 
## 4     4  3.88 4.04 
## 5     5  4.98 5.00

Missing data

tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:

data$V1[sample(1:nrow(data), round(nrow(data) * 0.2))] <- NA
data
##      id        V1        V2 true_clust
## 1     1 1.2104301 1.3683836          1
## 2     2 1.4491215 0.4625674          1
## 3     3        NA 0.9372232          1
## 4     4 0.8384915 0.7257761          1
## 5     5        NA 0.8939351          1
## 6     6 1.3381513 1.7039413          1
## 7     7        NA 1.3375870          1
## 8     8        NA 1.0858607          1
## 9     9 0.2114928 0.9388908          1
## 10   10 1.1833862 1.1472999          1
## 11   11 1.4171330 1.6443124          1
## 12   12 1.3276149 1.3874632          1
## 13   13 1.5965528 1.3105140          1
## 14   14 0.7908713 0.3863466          1
## 15   15 1.1231271 1.0351274          1
## 16   16 1.0951040 0.8324043          1
## 17   17 1.4269767 0.7685852          1
## 18   18        NA 1.2483960          1
## 19   19 0.9714620 0.6577174          1
## 20   20 1.3784464 1.5836677          1
## 21   21        NA 1.2915563          1
## 22   22 1.2691834 0.7648148          1
## 23   23 1.7154622 0.8518827          1
## 24   24 1.1913115 0.9343243          1
## 25   25 1.1243154 0.9461786          1
## 26   26 0.8732431 0.9824090          1
## 27   27 1.3918277 1.0916876          1
## 28   28 1.5159796 1.2975739          1
## 29   29 1.5274221 1.2271982          1
## 30   30 1.0974767 0.9427814          1
## 31   31 1.2828950 0.7675730          1
## 32   32 1.3499113 0.4990147          1
## 33   33        NA 1.7106876          1
## 34   34 1.1639380 1.4218098          1
## 35   35 0.9888706 0.7425405          1
## 36   36        NA 1.2437757          1
## 37   37        NA 1.1731891          1
## 38   38 0.8849448 1.2731927          1
## 39   39 0.8983317 1.2373607          1
## 40   40 0.7187551 0.9990338          1
## 41   41 0.4915366 1.2359479          1
## 42   42 1.5570788 0.7139513          1
## 43   43        NA 1.7778102          1
## 44   44 0.9350087 1.2944691          1
## 45   45        NA 0.9061553          1
## 46   46 1.3198188 0.6327240          1
## 47   47 1.2693935 1.2556958          1
## 48   48 1.2655209 0.7677790          1
## 49   49 1.0492183 1.0071171          1
## 50   50 1.3436999 0.8914861          1
## 51   51 0.8649826 0.4393002          1
## 52   52        NA 1.2861909          1
## 53   53 1.3380771 0.9031222          1
## 54   54 1.0646184 0.8269146          1
## 55   55 1.0562299 1.1478297          1
## 56   56 0.7874496 0.6551511          1
## 57   57 0.8406047 0.8766421          1
## 58   58 1.3362826 1.0910906          1
## 59   59 0.8672484 0.9945076          1
## 60   60 1.1358433 0.6998124          1
## 61   61 0.9683520 0.6204357          1
## 62   62 1.0711068 0.7565281          1
## 63   63 0.8514140 1.5520788          1
## 64   64 1.2442232 0.6942918          1
## 65   65        NA 1.1214371          1
## 66   66 1.0695594 0.8500226          1
## 67   67 1.1367212 1.0370786          1
## 68   68 0.9939847 1.0128525          1
## 69   69 1.2033169 1.1006504          1
## 70   70 1.0782586 0.6374707          1
## 71   71 0.2288410 0.6416650          1
## 72   72 1.2769338 1.0937538          1
## 73   73 1.1421456 0.9497381          1
## 74   74 0.7868260 1.1260806          1
## 75   75 1.0532944 1.0440163          1
## 76   76 0.5311549 0.7625642          1
## 77   77 0.8498302 1.3852686          1
## 78   78        NA 0.9354545          1
## 79   79 1.2074861 0.2554480          1
## 80   80 0.7307841 1.1591555          1
## 81   81 0.8972436 1.1210308          1
## 82   82 1.2554231 0.5900796          1
## 83   83 1.2608541 0.8421847          1
## 84   84 0.8130890 1.0557354          1
## 85   85 0.8404473 0.8711645          1
## 86   86 0.8349969 1.4003360          1
## 87   87 1.0816576 1.4512211          1
## 88   88        NA 1.5146929          1
## 89   89 0.9911115 0.8629948          1
## 90   90 0.7919034 0.8436254          1
## 91   91 0.4852926 0.9230567          1
## 92   92 1.2506452 1.0277350          1
## 93   93 0.9789287 0.8972017          1
## 94   94 1.4397487 0.7961277          1
## 95   95 1.3050221 0.9499535          1
## 96   96 0.7019074 1.3787199          1
## 97   97 0.6176745 1.1109013          1
## 98   98 0.8390911 0.8890517          1
## 99   99 0.6900021 0.7087577          1
## 100 100 0.6799433 0.8882345          1
## 101 101 2.0391639 2.0181653          2
## 102 102 2.4388016 2.4557976          2
## 103 103 2.0174240 2.5521841          2
## 104 104 1.8587122 1.4773660          2
## 105 105 2.2228069 1.8643797          2
## 106 106        NA 2.5838951          2
## 107 107 2.3288492 2.3399334          2
## 108 108 1.8282585 1.7239360          2
## 109 109 1.9028747 1.7636406          2
## 110 110 2.1315162 1.8946201          2
## 111 111 1.9070960 2.5482367          2
## 112 112 1.3471719 1.9344844          2
## 113 113 2.3481745 2.6074791          2
## 114 114 1.8829352 2.0381267          2
## 115 115 2.0074736 2.1634433          2
## 116 116        NA 2.4508656          2
## 117 117 2.0193820 1.9183353          2
## 118 118        NA 2.3190341          2
## 119 119 1.8542131 2.2321659          2
## 120 120 1.9060893 2.0646348          2
## 121 121 2.4722898 1.9740046          2
## 122 122 1.8770491 1.9627591          2
## 123 123 2.0650205 2.3379996          2
## 124 124        NA 2.4002337          2
## 125 125 1.7917625 2.4462792          2
## 126 126        NA 2.1800546          2
## 127 127 1.8854365 1.7858832          2
## 128 128 2.0310415 1.9465927          2
## 129 129 2.1572609 1.3434804          2
## 130 130        NA 2.0584772          2
## 131 131 1.9672706 2.1634058          2
## 132 132        NA 2.2280470          2
## 133 133 1.9491033 1.8333605          2
## 134 134 2.7426726 1.8499737          2
## 135 135        NA 1.6649506          2
## 136 136 2.1683032 2.1805902          2
## 137 137        NA 2.3592386          2
## 138 138        NA 2.2770725          2
## 139 139 1.8092272 2.4288718          2
## 140 140 1.5577312 2.2269449          2
## 141 141 1.4773338 2.0939169          2
## 142 142 1.5450671 2.0904800          2
## 143 143        NA 2.0558441          2
## 144 144        NA 1.5896367          2
## 145 145        NA 2.3826735          2
## 146 146 2.3383979 2.0764678          2
## 147 147 2.2404646 1.9210600          2
## 148 148 1.9270636 1.7799585          2
## 149 149 1.9005145 2.0439734          2
## 150 150 2.4991068 1.8043598          2
## 151 151 2.6816245 1.8650372          2
## 152 152 1.9831453 2.3908543          2
## 153 153 1.4493428 2.1726293          2
## 154 154 1.9927737 1.9988305          2
## 155 155        NA 2.1145590          2
## 156 156 1.3711692 1.8178593          2
## 157 157 2.0527785 1.8409058          2
## 158 158 1.8687704 1.8579349          2
## 159 159 2.0012288 2.3898236          2
## 160 160 2.3538437 1.9176856          2
## 161 161 2.1268875 1.9789530          2
## 162 162 1.9698392 1.9783816          2
## 163 163 2.0237972 1.7201793          2
## 164 164 1.9978177 1.6915337          2
## 165 165 1.7981725 2.3774763          2
## 166 166 1.5054348 1.7052174          2
## 167 167 1.8698982 2.1030603          2
## 168 168 2.1865770 1.7654210          2
## 169 169        NA 2.2340106          2
## 170 170 2.4256951 1.6375617          2
## 171 171 1.9488542 2.3069497          2
## 172 172 2.2238790 1.9878563          2
## 173 173        NA 2.1295271          2
## 174 174 1.8754898 1.6970517          2
## 175 175 1.7590136 1.9580199          2
## 176 176 1.8026681 1.8304441          2
## 177 177        NA 1.9836410          2
## 178 178 1.6672878 1.8854531          2
## 179 179 2.0736994 1.8903495          2
## 180 180 2.6008386 2.0007398          2
## 181 181 1.7572684 1.8883062          2
## 182 182 2.3615302 2.8693811          2
## 183 183 1.6699534 1.8502708          2
## 184 184 1.7321898 2.0740680          2
## 185 185 2.2498669 2.0181920          2
## 186 186 2.2532141 1.9559326          2
## 187 187 1.8039143 2.0612574          2
## 188 188 2.2792179 2.1759185          2
## 189 189        NA 1.3624400          2
## 190 190 1.7931790 2.0183778          2
## 191 191 1.8332814 1.7607641          2
## 192 192 1.8987932 1.8791867          2
## 193 193        NA 1.8733202          2
## 194 194 1.5725394 1.7035323          2
## 195 195 1.5807183 2.1476385          2
## 196 196 2.0456221 1.9865136          2
## 197 197 1.7354827 1.8701078          2
## 198 198 2.2273729 2.3203348          2
## 199 199 1.7379647 2.2614389          2
## 200 200 1.8285528 1.8891335          2
## 201 201 3.0228406 2.5742012          3
## 202 202 2.8264634 2.5193997          3
## 203 203 2.9795499 2.9430383          3
## 204 204 2.5758978 3.3117884          3
## 205 205 2.9320324 3.2966298          3
## 206 206 2.6298736 3.0509339          3
## 207 207 3.5191178 3.1996077          3
## 208 208        NA 2.8849450          3
## 209 209        NA 2.6418992          3
## 210 210        NA 3.6152586          3
## 211 211 2.6568845 3.7019399          3
## 212 212 2.9425532 3.1743587          3
## 213 213 2.5443905 2.6887054          3
## 214 214 3.0179333 4.0055047          3
## 215 215 3.1292554 2.7458439          3
## 216 216        NA 3.2530946          3
## 217 217 2.9128796 3.0726575          3
## 218 218        NA 3.2661185          3
## 219 219 2.6945793 3.1150197          3
## 220 220 3.4230353 2.9653637          3
## 221 221 3.1687852 2.7597259          3
## 222 222 2.7532207 2.6670718          3
## 223 223 2.4448156 2.9681616          3
## 224 224 2.4300829 2.9822555          3
## 225 225 2.8616584 3.1300119          3
## 226 226 3.2975809 2.6457393          3
## 227 227 3.0014331 3.2918639          3
## 228 228 3.7418102 2.7802559          3
## 229 229 2.6969278 2.5702803          3
## 230 230        NA 3.0383523          3
## 231 231 3.3107304 2.8405802          3
## 232 232 2.8476121 3.0831595          3
## 233 233        NA 2.9201190          3
## 234 234 2.3890699 3.1932735          3
## 235 235        NA 3.0658503          3
## 236 236        NA 2.8234095          3
## 237 237 3.3142624 2.9993528          3
## 238 238        NA 3.1066114          3
## 239 239 2.9683849 3.4586191          3
## 240 240        NA 2.9251905          3
## 241 241 3.1909567 3.2204691          3
## 242 242 3.1804196 3.8110207          3
## 243 243 2.8824807 2.6830037          3
## 244 244        NA 2.8421912          3
## 245 245 2.7967161 3.2160224          3
## 246 246        NA 2.9243666          3
## 247 247 3.0570122 3.2224496          3
## 248 248        NA 2.8826562          3
## 249 249 3.0741243 3.2942872          3
## 250 250 2.7109808 3.0250313          3
## 251 251 2.7592079 2.7649743          3
## 252 252 2.8173936 3.0421431          3
## 253 253 2.0893164 2.4664163          3
## 254 254 3.2908883 3.3662660          3
## 255 255 3.3613008 3.7782209          3
## 256 256 2.9399723 3.0523287          3
## 257 257 3.2103825 2.6404745          3
## 258 258 3.0918192 3.1156866          3
## 259 259 2.9687267 3.4040006          3
## 260 260 2.8655536 2.5524561          3
## 261 261 2.9082046 2.6380456          3
## 262 262        NA 2.9733453          3
## 263 263 3.1332768 3.3851481          3
## 264 264        NA 3.1081696          3
## 265 265 3.2099134 3.2326707          3
## 266 266 2.1269733 2.5703307          3
## 267 267 2.9056134 2.7590293          3
## 268 268 2.9144735 2.8913101          3
## 269 269 2.9409252 3.6665764          3
## 270 270 2.1816313 3.0306729          3
## 271 271 3.0241775 3.3440866          3
## 272 272        NA 2.8408377          3
## 273 273 2.7263616 2.7822060          3
## 274 274        NA 3.3399851          3
## 275 275 2.9206051 2.9606082          3
## 276 276 3.3774914 3.4511808          3
## 277 277 3.0255056 3.2377552          3
## 278 278 2.8056764 3.3026700          3
## 279 279 3.1158295 2.7345826          3
## 280 280 3.0193448 2.6935288          3
## 281 281 2.3713686 2.8172566          3
## 282 282 2.3228749 3.2678787          3
## 283 283 3.1443417 2.6714102          3
## 284 284 2.9217835 2.9460468          3
## 285 285 2.9346401 2.6732815          3
## 286 286 3.1988354 2.8717862          3
## 287 287 2.6853385 2.9613781          3
## 288 288 3.1631581 3.2231121          3
## 289 289        NA 3.1599477          3
## 290 290        NA 2.3891263          3
## 291 291 2.8201274 3.1464579          3
## 292 292 3.1368296 3.0520403          3
## 293 293 3.0997534 2.8530752          3
## 294 294 3.2923619 3.0868825          3
## 295 295 2.7540963 3.2833736          3
## 296 296 2.3546711 2.8627440          3
## 297 297 2.7946858 3.0152221          3
## 298 298 3.1415943 3.1597159          3
## 299 299 2.7195751 2.8931792          3
## 300 300 3.0756822 3.3554485          3
## 301 301        NA 3.7221189          4
## 302 302 3.8288771 3.9053497          4
## 303 303        NA 3.5152524          4
## 304 304        NA 3.7308093          4
## 305 305 3.9485961 3.3284119          4
## 306 306 3.6326828 4.0357080          4
## 307 307 4.0643474 3.9678727          4
## 308 308        NA 4.1642610          4
## 309 309 4.2834901 4.1102333          4
## 310 310 3.9514273 4.2101962          4
## 311 311        NA 3.9975055          4
## 312 312 3.7704318 3.9629276          4
## 313 313 3.6205448 4.0448399          4
## 314 314 4.0025090 3.9998914          4
## 315 315 3.6621169 3.9393469          4
## 316 316        NA 4.2881091          4
## 317 317 4.1279586 4.2358091          4
## 318 318 3.8563616 3.9070165          4
## 319 319        NA 4.0960128          4
## 320 320 4.1732335 4.2190993          4
## 321 321 4.2129027 4.2557813          4
## 322 322 4.5449944 4.1437936          4
## 323 323        NA 4.5588799          4
## 324 324 4.2809168 3.8955731          4
## 325 325 3.5159098 4.1164808          4
## 326 326 4.0959180 3.7298479          4
## 327 327 3.8211777 4.2507414          4
## 328 328 3.8113820 4.4978229          4
## 329 329 4.5300123 3.9777361          4
## 330 330 4.1847565 3.9363343          4
## 331 331 3.9374313 3.4318420          4
## 332 332 4.6762341 4.0253805          4
## 333 333 3.3632409 4.2158958          4
## 334 334 3.8811024 3.3110981          4
## 335 335        NA 3.8583800          4
## 336 336 4.3320430 3.6152731          4
## 337 337 4.3603633 4.2065882          4
## 338 338 4.2168854 4.1724254          4
## 339 339 4.2827776 4.0712652          4
## 340 340 3.4197209 3.9426643          4
## 341 341 3.5634185 3.4434580          4
## 342 342 4.2604882 4.1764107          4
## 343 343 4.6134920 4.0624458          4
## 344 344        NA 4.0790578          4
## 345 345 3.7772521 4.3783260          4
## 346 346 3.6464681 3.6843470          4
## 347 347 4.5281357 4.1168485          4
## 348 348 4.2963845 3.6576726          4
## 349 349 3.4156272 4.1575434          4
## 350 350 3.6951108 4.2392037          4
## 351 351 3.4398336 4.2966140          4
## 352 352 3.6976667 4.4101329          4
## 353 353 4.1619451 4.1456427          4
## 354 354 3.9077289 3.9741202          4
## 355 355        NA 4.1272374          4
## 356 356 4.4873152 4.5416221          4
## 357 357 4.3606232 4.2723192          4
## 358 358 3.5460653 4.2232890          4
## 359 359 3.5559428 3.8824824          4
## 360 360 4.1074274 4.6042786          4
## 361 361 3.7278963 4.1619784          4
## 362 362        NA 3.8522400          4
## 363 363 3.7968741 3.4320511          4
## 364 364 3.7079632 4.0477676          4
## 365 365 3.9656941 4.0240688          4
## 366 366 4.4880741 3.8327801          4
## 367 367 4.1267916 4.0539403          4
## 368 368        NA 3.9493458          4
## 369 369 3.7213180 3.6928604          4
## 370 370        NA 4.6851062          4
## 371 371 4.3557618 3.6822297          4
## 372 372 4.4629404 4.1302436          4
## 373 373        NA 4.3488407          4
## 374 374        NA 3.9189693          4
## 375 375 4.3239639 3.4663861          4
## 376 376        NA 3.8119997          4
## 377 377        NA 4.1857216          4
## 378 378 3.8532408 4.0260158          4
## 379 379        NA 4.0729654          4
## 380 380 3.5028944 3.8320522          4
## 381 381 3.7593790 3.9948957          4
## 382 382        NA 4.1757377          4
## 383 383 3.5453701 4.1819934          4
## 384 384 3.8341699 4.1337212          4
## 385 385 3.5454753 3.9049875          4
## 386 386 3.9949851 4.3941884          4
## 387 387 3.6725118 4.2897380          4
## 388 388 3.9756914 4.0404751          4
## 389 389 4.1410160 3.9063258          4
## 390 390 4.3205476 3.5994082          4
## 391 391 4.3804207 3.7896964          4
## 392 392 3.9929013 3.9956640          4
## 393 393        NA 4.1960163          4
## 394 394 3.9584686 3.7715580          4
## 395 395        NA 4.8471691          4
## 396 396 4.3698917 3.9677632          4
## 397 397        NA 4.1926265          4
## 398 398 3.9928729 4.1775785          4
## 399 399 3.9432750 4.1233490          4
## 400 400 4.4524295 3.7825198          4
## 401 401        NA 5.0450294          5
## 402 402 5.1019782 4.8001901          5
## 403 403 5.1172691 4.9525798          5
## 404 404 5.1910605 4.4270148          5
## 405 405 5.2762189 4.7392097          5
## 406 406 5.7538329 5.1287255          5
## 407 407 5.3225482 4.4610521          5
## 408 408 4.5381460 4.6968687          5
## 409 409 4.5936500 4.9350762          5
## 410 410        NA 4.7935962          5
## 411 411 4.8943111 4.6335769          5
## 412 412        NA 4.5135088          5
## 413 413 5.7292191 5.0221079          5
## 414 414 4.9176475 4.8675722          5
## 415 415 5.1472754 5.2588447          5
## 416 416 4.6608851 4.8356509          5
## 417 417 5.4573166 5.1031367          5
## 418 418        NA 5.0988470          5
## 419 419        NA 5.1269276          5
## 420 420 4.6896242 5.0606529          5
## 421 421 4.4276857 4.7738091          5
## 422 422 4.6688578 5.2349823          5
## 423 423 4.6604069 5.2767647          5
## 424 424        NA 4.8734313          5
## 425 425 5.2921353 5.1933868          5
## 426 426 4.8161732 5.1981492          5
## 427 427        NA 5.0642983          5
## 428 428 5.2836220 4.6629048          5
## 429 429 4.8955325 5.6683789          5
## 430 430        NA 4.6593043          5
## 431 431 5.0759803 4.9800395          5
## 432 432 4.9067557 5.0370765          5
## 433 433 4.7416282 4.8916794          5
## 434 434 4.9160477 5.0655779          5
## 435 435 4.7819251 5.0270777          5
## 436 436 5.0567739 5.0870725          5
## 437 437        NA 5.2379301          5
## 438 438        NA 5.0963812          5
## 439 439 5.2079232 5.1998098          5
## 440 440 4.7704942 5.1582392          5
## 441 441 4.6746363 4.7272163          5
## 442 442 4.4621512 4.6310220          5
## 443 443 4.9204140 5.4607818          5
## 444 444 4.5866643 4.9764021          5
## 445 445        NA 5.3497267          5
## 446 446        NA 4.9725658          5
## 447 447 4.5841538 5.3399144          5
## 448 448 4.8168370 5.0250592          5
## 449 449 5.1074989 4.7561170          5
## 450 450 4.4419290 4.8718176          5
## 451 451 5.2725587 5.0118051          5
## 452 452 4.7283077 5.1266616          5
## 453 453 5.4874816 4.9463756          5
## 454 454 5.1043362 5.0618757          5
## 455 455 4.6466943 5.0227658          5
## 456 456 5.1278688 4.9726940          5
## 457 457        NA 4.8624794          5
## 458 458 5.5166335 5.0561581          5
## 459 459        NA 5.1415792          5
## 460 460 5.1769154 5.3066221          5
## 461 461 4.6530496 4.8915139          5
## 462 462        NA 5.0384046          5
## 463 463 4.8735601 4.5343010          5
## 464 464 4.4360070 4.9546956          5
## 465 465 5.1534273 4.9487389          5
## 466 466 4.5508130 4.7018845          5
## 467 467 4.5984322 4.8850922          5
## 468 468 4.7681095 5.2515668          5
## 469 469        NA 4.5093633          5
## 470 470        NA 4.7596517          5
## 471 471 5.3014319 4.9238760          5
## 472 472 4.3784635 5.3527621          5
## 473 473 4.5349217 5.0800590          5
## 474 474 4.5606143 4.4334509          5
## 475 475        NA 5.3025480          5
## 476 476        NA 5.1425728          5
## 477 477 4.9617557 5.5410689          5
## 478 478        NA 4.7492209          5
## 479 479 4.6515451 5.2139247          5
## 480 480 4.9646100 4.9970420          5
## 481 481 5.4265762 4.6943047          5
## 482 482 5.2314653 5.0259304          5
## 483 483 5.5465440 5.3743223          5
## 484 484        NA 4.6135011          5
## 485 485 4.6169163 5.2088242          5
## 486 486 4.9429869 4.8267085          5
## 487 487        NA 4.4610117          5
## 488 488 4.8040523 4.3878327          5
## 489 489 5.3077146 4.9592097          5
## 490 490 5.0213129 4.9233404          5
## 491 491 5.4606565 4.9351379          5
## 492 492 4.9922979 5.5963421          5
## 493 493 4.8246139 5.4308834          5
## 494 494 4.6460268 5.5015898          5
## 495 495 4.9556460 5.4008026          5
## 496 496        NA 5.0929200          5
## 497 497 5.1365860 5.6318518          5
## 498 498 4.9740604 4.9784884          5
## 499 499 4.8770270 4.9628846          5
## 500 500 5.0317630 4.9630071          5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.96

and plotting the results (without the NA’s) we get:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).

High dimensions

Let’s move to higher dimensions (and higher noise):

data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1

Comparison with R vanilla kmeans

Let’s compare it to R vanilla kmeans:

km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)

d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.6

We can see that kmeans++ clusters significantly better than R vanilla kmeans.

Random seed

we can set the seed for the c++ random number generator, for reproducible results:

km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE